summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/netfs_library.rst23
-rw-r--r--MAINTAINERS21
-rw-r--r--arch/arm/configs/mxs_defconfig3
-rw-r--r--arch/csky/configs/defconfig3
-rw-r--r--arch/mips/configs/ip27_defconfig3
-rw-r--r--arch/mips/configs/lemote2f_defconfig3
-rw-r--r--arch/mips/configs/loongson3_defconfig3
-rw-r--r--arch/mips/configs/pic32mzda_defconfig3
-rw-r--r--arch/s390/configs/debug_defconfig3
-rw-r--r--arch/s390/configs/defconfig3
-rw-r--r--arch/sh/configs/sdk7786_defconfig3
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_addr.c353
-rw-r--r--fs/9p/vfs_file.c89
-rw-r--r--fs/9p/vfs_inode.c16
-rw-r--r--fs/9p/vfs_inode_dotl.c8
-rw-r--r--fs/9p/vfs_super.c14
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile1
-rw-r--r--fs/afs/dynroot.c2
-rw-r--r--fs/afs/file.c213
-rw-r--r--fs/afs/inode.c28
-rw-r--r--fs/afs/internal.h72
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/afs/write.c826
-rw-r--r--fs/cachefiles/Kconfig2
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/cachefiles/io.c34
-rw-r--r--fs/cachefiles/ondemand.c2
-rw-r--r--fs/ceph/addr.c25
-rw-r--r--fs/ceph/cache.h45
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/erofs/Kconfig7
-rw-r--r--fs/fs-writeback.c10
-rw-r--r--fs/fscache/Kconfig40
-rw-r--r--fs/fscache/Makefile16
-rw-r--r--fs/fscache/internal.h277
-rw-r--r--fs/netfs/Kconfig39
-rw-r--r--fs/netfs/Makefile22
-rw-r--r--fs/netfs/buffered_read.c229
-rw-r--r--fs/netfs/buffered_write.c1253
-rw-r--r--fs/netfs/direct_read.c125
-rw-r--r--fs/netfs/direct_write.c171
-rw-r--r--fs/netfs/fscache_cache.c (renamed from fs/fscache/cache.c)0
-rw-r--r--fs/netfs/fscache_cookie.c (renamed from fs/fscache/cookie.c)0
-rw-r--r--fs/netfs/fscache_internal.h14
-rw-r--r--fs/netfs/fscache_io.c (renamed from fs/fscache/io.c)42
-rw-r--r--fs/netfs/fscache_main.c (renamed from fs/fscache/main.c)25
-rw-r--r--fs/netfs/fscache_proc.c (renamed from fs/fscache/proc.c)23
-rw-r--r--fs/netfs/fscache_stats.c (renamed from fs/fscache/stats.c)13
-rw-r--r--fs/netfs/fscache_volume.c (renamed from fs/fscache/volume.c)0
-rw-r--r--fs/netfs/internal.h284
-rw-r--r--fs/netfs/io.c213
-rw-r--r--fs/netfs/iterator.c97
-rw-r--r--fs/netfs/locking.c216
-rw-r--r--fs/netfs/main.c109
-rw-r--r--fs/netfs/misc.c260
-rw-r--r--fs/netfs/objects.c59
-rw-r--r--fs/netfs/output.c478
-rw-r--r--fs/netfs/stats.c42
-rw-r--r--fs/nfs/Kconfig4
-rw-r--r--fs/nfs/fscache.c7
-rw-r--r--fs/nfs/fscache.h2
-rw-r--r--fs/smb/client/cifsfs.c9
-rw-r--r--fs/smb/client/file.c18
-rw-r--r--fs/smb/client/fscache.c2
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/fscache-cache.h3
-rw-r--r--include/linux/fscache.h45
-rw-r--r--include/linux/netfs.h181
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/afs.h31
-rw-r--r--include/trace/events/netfs.h155
-rw-r--r--mm/filemap.c2
74 files changed, 4158 insertions, 2180 deletions
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 48b95d04f72d..4cc657d743f7 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -295,7 +295,6 @@ through which it can issue requests and negotiate::
struct netfs_request_ops {
void (*init_request)(struct netfs_io_request *rreq, struct file *file);
void (*free_request)(struct netfs_io_request *rreq);
- int (*begin_cache_operation)(struct netfs_io_request *rreq);
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
@@ -317,20 +316,6 @@ The operations are as follows:
[Optional] This is called as the request is being deallocated so that the
filesystem can clean up any state it has attached there.
- * ``begin_cache_operation()``
-
- [Optional] This is called to ask the network filesystem to call into the
- cache (if present) to initialise the caching state for this read. The netfs
- library module cannot access the cache directly, so the cache should call
- something like fscache_begin_read_operation() to do this.
-
- The cache gets to store its state in ->cache_resources and must set a table
- of operations of its own there (though of a different type).
-
- This should return 0 on success and an error code otherwise. If an error is
- reported, the operation may proceed anyway, just without local caching (only
- out of memory and interruption errors cause failure here).
-
* ``expand_readahead()``
[Optional] This is called to allow the filesystem to expand the size of a
@@ -460,14 +445,14 @@ When implementing a local cache to be used by the read helpers, two things are
required: some way for the network filesystem to initialise the caching for a
read request and a table of operations for the helpers to call.
-The network filesystem's ->begin_cache_operation() method is called to set up a
-cache and this must call into the cache to do the work. If using fscache, for
-example, the cache would call::
+To begin a cache operation on an fscache object, the following function is
+called::
int fscache_begin_read_operation(struct netfs_io_request *rreq,
struct fscache_cookie *cookie);
-passing in the request pointer and the cookie corresponding to the file.
+passing in the request pointer and the cookie corresponding to the file. This
+fills in the cache resources mentioned below.
The netfs_io_request object contains a place for the cache to hang its
state::
diff --git a/MAINTAINERS b/MAINTAINERS
index 8709c7cd3656..2fb78ede9625 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8214,6 +8214,19 @@ S: Supported
F: fs/iomap/
F: include/linux/iomap.h
+FILESYSTEMS [NETFS LIBRARY]
+M: David Howells <dhowells@redhat.com>
+L: linux-cachefs@redhat.com (moderated for non-subscribers)
+L: linux-fsdevel@vger.kernel.org
+S: Supported
+F: Documentation/filesystems/caching/
+F: Documentation/filesystems/netfs_library.rst
+F: fs/netfs/
+F: include/linux/fscache*.h
+F: include/linux/netfs.h
+F: include/trace/events/fscache.h
+F: include/trace/events/netfs.h
+
FILESYSTEMS [STACKABLE]
M: Miklos Szeredi <miklos@szeredi.hu>
M: Amir Goldstein <amir73il@gmail.com>
@@ -8659,14 +8672,6 @@ F: Documentation/power/freezing-of-tasks.rst
F: include/linux/freezer.h
F: kernel/freezer.c
-FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
-M: David Howells <dhowells@redhat.com>
-L: linux-cachefs@redhat.com (moderated for non-subscribers)
-S: Supported
-F: Documentation/filesystems/caching/
-F: fs/fscache/
-F: include/linux/fscache*.h
-
FSCRYPT: FILE SYSTEM LEVEL ENCRYPTION SUPPORT
M: Eric Biggers <ebiggers@kernel.org>
M: Theodore Y. Ts'o <tytso@mit.edu>
diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
index feb38a94c1a7..43bc1255a5db 100644
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@@ -138,7 +138,8 @@ CONFIG_PWM_MXS=y
CONFIG_NVMEM_MXS_OCOTP=y
CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_FSCACHE_STATS=y
CONFIG_CACHEFILES=m
CONFIG_VFAT_FS=y
diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig
index af722e4dfb47..ff559e5162aa 100644
--- a/arch/csky/configs/defconfig
+++ b/arch/csky/configs/defconfig
@@ -34,7 +34,8 @@ CONFIG_GENERIC_PHY=y
CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_QUOTA=y
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_FSCACHE_STATS=y
CONFIG_CACHEFILES=m
CONFIG_MSDOS_FS=y
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index b51f738a39a0..4714074c8bd7 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -287,7 +287,8 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_FSCACHE_STATS=y
CONFIG_CACHEFILES=m
CONFIG_PROC_KCORE=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 38f17b658421..3389e6e885d9 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -238,7 +238,8 @@ CONFIG_BTRFS_FS=m
CONFIG_QUOTA=y
CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 07839a4b397e..78f498752066 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -356,7 +356,8 @@ CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_VIRTIO_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_MSDOS_FS=m
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
index 166d2ad372d1..54774f90c23e 100644
--- a/arch/mips/configs/pic32mzda_defconfig
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -68,7 +68,8 @@ CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 67ba0157fbdb..cae2dd34fbb4 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -637,8 +637,9 @@ CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_SUPPORT=m
CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 4c2650c1fbdd..42b988873e54 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -622,8 +622,9 @@ CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_SUPPORT=m
CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index cf59b98446e4..7b427c17fbfe 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -171,7 +171,8 @@ CONFIG_BTRFS_FS=y
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 731e3d14b67d..0e8418066a48 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -42,6 +42,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_free_inode(struct inode *inode);
struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode,
dev_t rdev);
+void v9fs_set_netfs_context(struct inode *inode);
int v9fs_init_inode(struct v9fs_session_info *v9ses,
struct inode *inode, umode_t mode, dev_t rdev);
void v9fs_evict_inode(struct inode *inode);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 8a635999a7d6..047855033d32 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -19,12 +19,45 @@
#include <linux/netfs.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
+#include <trace/events/netfs.h>
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "cache.h"
#include "fid.h"
+static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq)
+{
+ struct p9_fid *fid = subreq->rreq->netfs_priv;
+ int err, len;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
+ netfs_write_subrequest_terminated(subreq, len ?: err, false);
+}
+
+static void v9fs_upload_to_server_worker(struct work_struct *work)
+{
+ struct netfs_io_subrequest *subreq =
+ container_of(work, struct netfs_io_subrequest, work);
+
+ v9fs_upload_to_server(subreq);
+}
+
+/*
+ * Set up write requests for a writeback slice. We need to add a write request
+ * for each write we want to make.
+ */
+static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
+{
+ struct netfs_io_subrequest *subreq;
+
+ subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
+ start, len, v9fs_upload_to_server_worker);
+ if (subreq)
+ netfs_queue_write_request(subreq);
+}
+
/**
* v9fs_issue_read - Issue a read from 9P
* @subreq: The read to make
@@ -33,14 +66,10 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
struct p9_fid *fid = rreq->netfs_priv;
- struct iov_iter to;
- loff_t pos = subreq->start + subreq->transferred;
- size_t len = subreq->len - subreq->transferred;
int total, err;
- iov_iter_xarray(&to, ITER_DEST, &rreq->mapping->i_pages, pos, len);
-
- total = p9_client_read(fid, pos, &to, &err);
+ total = p9_client_read(fid, subreq->start + subreq->transferred,
+ &subreq->io_iter, &err);
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
@@ -50,25 +79,42 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
}
/**
- * v9fs_init_request - Initialise a read request
+ * v9fs_init_request - Initialise a request
* @rreq: The read request
* @file: The file being read from
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- struct p9_fid *fid = file->private_data;
-
- BUG_ON(!fid);
+ struct p9_fid *fid;
+ bool writing = (rreq->origin == NETFS_READ_FOR_WRITE ||
+ rreq->origin == NETFS_WRITEBACK ||
+ rreq->origin == NETFS_WRITETHROUGH ||
+ rreq->origin == NETFS_LAUNDER_WRITE ||
+ rreq->origin == NETFS_UNBUFFERED_WRITE ||
+ rreq->origin == NETFS_DIO_WRITE);
+
+ if (file) {
+ fid = file->private_data;
+ if (!fid)
+ goto no_fid;
+ p9_fid_get(fid);
+ } else {
+ fid = v9fs_fid_find_inode(rreq->inode, writing, INVALID_UID, true);
+ if (!fid)
+ goto no_fid;
+ }
/* we might need to read from a fid that was opened write-only
* for read-modify-write of page cache, use the writeback fid
* for that */
- WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE &&
- !(fid->mode & P9_ORDWR));
-
- p9_fid_get(fid);
+ WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && !(fid->mode & P9_ORDWR));
rreq->netfs_priv = fid;
return 0;
+
+no_fid:
+ WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n",
+ rreq->inode->i_ino);
+ return -EINVAL;
}
/**
@@ -82,281 +128,20 @@ static void v9fs_free_request(struct netfs_io_request *rreq)
p9_fid_put(fid);
}
-/**
- * v9fs_begin_cache_operation - Begin a cache operation for a read
- * @rreq: The read request
- */
-static int v9fs_begin_cache_operation(struct netfs_io_request *rreq)
-{
-#ifdef CONFIG_9P_FSCACHE
- struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(rreq->inode));
-
- return fscache_begin_read_operation(&rreq->cache_resources, cookie);
-#else
- return -ENOBUFS;
-#endif
-}
-
const struct netfs_request_ops v9fs_req_ops = {
.init_request = v9fs_init_request,
.free_request = v9fs_free_request,
- .begin_cache_operation = v9fs_begin_cache_operation,
.issue_read = v9fs_issue_read,
+ .create_write_requests = v9fs_create_write_requests,
};
-/**
- * v9fs_release_folio - release the private state associated with a folio
- * @folio: The folio to be released
- * @gfp: The caller's allocation restrictions
- *
- * Returns true if the page can be released, false otherwise.
- */
-
-static bool v9fs_release_folio(struct folio *folio, gfp_t gfp)
-{
- if (folio_test_private(folio))
- return false;
-#ifdef CONFIG_9P_FSCACHE
- if (folio_test_fscache(folio)) {
- if (current_is_kswapd() || !(gfp & __GFP_FS))
- return false;
- folio_wait_fscache(folio);
- }
- fscache_note_page_release(v9fs_inode_cookie(V9FS_I(folio_inode(folio))));
-#endif
- return true;
-}
-
-static void v9fs_invalidate_folio(struct folio *folio, size_t offset,
- size_t length)
-{
- folio_wait_fscache(folio);
-}
-
-#ifdef CONFIG_9P_FSCACHE
-static void v9fs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
- bool was_async)
-{
- struct v9fs_inode *v9inode = priv;
- __le32 version;
-
- if (IS_ERR_VALUE(transferred_or_error) &&
- transferred_or_error != -ENOBUFS) {
- version = cpu_to_le32(v9inode->qid.version);
- fscache_invalidate(v9fs_inode_cookie(v9inode), &version,
- i_size_read(&v9inode->netfs.inode), 0);
- }
-}
-#endif
-
-static int v9fs_vfs_write_folio_locked(struct folio *folio)
-{
- struct inode *inode = folio_inode(folio);
- loff_t start = folio_pos(folio);
- loff_t i_size = i_size_read(inode);
- struct iov_iter from;
- size_t len = folio_size(folio);
- struct p9_fid *writeback_fid;
- int err;
- struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode);
- struct fscache_cookie __maybe_unused *cookie = v9fs_inode_cookie(v9inode);
-
- if (start >= i_size)
- return 0; /* Simultaneous truncation occurred */
-
- len = min_t(loff_t, i_size - start, len);
-
- iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len);
-
- writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true);
- if (!writeback_fid) {
- WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n",
- inode->i_private);
- return -EINVAL;
- }
-
- folio_wait_fscache(folio);
- folio_start_writeback(folio);
-
- p9_client_write(writeback_fid, start, &from, &err);
-
-#ifdef CONFIG_9P_FSCACHE
- if (err == 0 &&
- fscache_cookie_enabled(cookie) &&
- test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags)) {
- folio_start_fscache(folio);
- fscache_write_to_cache(v9fs_inode_cookie(v9inode),
- folio_mapping(folio), start, len, i_size,
- v9fs_write_to_cache_done, v9inode,
- true);
- }
-#endif
-
- folio_end_writeback(folio);
- p9_fid_put(writeback_fid);
-
- return err;
-}
-
-static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct folio *folio = page_folio(page);
- int retval;
-
- p9_debug(P9_DEBUG_VFS, "folio %p\n", folio);
-
- retval = v9fs_vfs_write_folio_locked(folio);
- if (retval < 0) {
- if (retval == -EAGAIN) {
- folio_redirty_for_writepage(wbc, folio);
- retval = 0;
- } else {
- mapping_set_error(folio_mapping(folio), retval);
- }
- } else
- retval = 0;
-
- folio_unlock(folio);
- return retval;
-}
-
-static int v9fs_launder_folio(struct folio *folio)
-{
- int retval;
-
- if (folio_clear_dirty_for_io(folio)) {
- retval = v9fs_vfs_write_folio_locked(folio);
- if (retval)
- return retval;
- }
- folio_wait_fscache(folio);
- return 0;
-}
-
-/**
- * v9fs_direct_IO - 9P address space operation for direct I/O
- * @iocb: target I/O control block
- * @iter: The data/buffer to use
- *
- * The presence of v9fs_direct_IO() in the address space ops vector
- * allowes open() O_DIRECT flags which would have failed otherwise.
- *
- * In the non-cached mode, we shunt off direct read and write requests before
- * the VFS gets them, so this method should never be called.
- *
- * Direct IO is not 'yet' supported in the cached mode. Hence when
- * this routine is called through generic_file_aio_read(), the read/write fails
- * with an error.
- *
- */
-static ssize_t
-v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct file *file = iocb->ki_filp;
- loff_t pos = iocb->ki_pos;
- ssize_t n;
- int err = 0;
-
- if (iov_iter_rw(iter) == WRITE) {
- n = p9_client_write(file->private_data, pos, iter, &err);
- if (n) {
- struct inode *inode = file_inode(file);
- loff_t i_size = i_size_read(inode);
-
- if (pos + n > i_size)
- inode_add_bytes(inode, pos + n - i_size);
- }
- } else {
- n = p9_client_read(file->private_data, pos, iter, &err);
- }
- return n ? n : err;
-}
-
-static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned int len,
- struct page **subpagep, void **fsdata)
-{
- int retval;
- struct folio *folio;
- struct v9fs_inode *v9inode = V9FS_I(mapping->host);
-
- p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
-
- /* Prefetch area to be written into the cache if we're caching this
- * file. We need to do this before we get a lock on the page in case
- * there's more than one writer competing for the same cache block.
- */
- retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata);
- if (retval < 0)
- return retval;
-
- *subpagep = &folio->page;
- return retval;
-}
-
-static int v9fs_write_end(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct page *subpage, void *fsdata)
-{
- loff_t last_pos = pos + copied;
- struct folio *folio = page_folio(subpage);
- struct inode *inode = mapping->host;
-
- p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
-
- if (!folio_test_uptodate(folio)) {
- if (unlikely(copied < len)) {
- copied = 0;
- goto out;
- }
-
- folio_mark_uptodate(folio);
- }
-
- /*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hold the i_mutex.
- */
- if (last_pos > inode->i_size) {
- inode_add_bytes(inode, last_pos - inode->i_size);
- i_size_write(inode, last_pos);
-#ifdef CONFIG_9P_FSCACHE
- fscache_update_cookie(v9fs_inode_cookie(V9FS_I(inode)), NULL,
- &last_pos);
-#endif
- }
- folio_mark_dirty(folio);
-out:
- folio_unlock(folio);
- folio_put(folio);
-
- return copied;
-}
-
-#ifdef CONFIG_9P_FSCACHE
-/*
- * Mark a page as having been made dirty and thus needing writeback. We also
- * need to pin the cache object to write back to.
- */
-static bool v9fs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
- struct v9fs_inode *v9inode = V9FS_I(mapping->host);
-
- return fscache_dirty_folio(mapping, folio, v9fs_inode_cookie(v9inode));
-}
-#else
-#define v9fs_dirty_folio filemap_dirty_folio
-#endif
-
const struct address_space_operations v9fs_addr_operations = {
- .read_folio = netfs_read_folio,
- .readahead = netfs_readahead,
- .dirty_folio = v9fs_dirty_folio,
- .writepage = v9fs_vfs_writepage,
- .write_begin = v9fs_write_begin,
- .write_end = v9fs_write_end,
- .release_folio = v9fs_release_folio,
- .invalidate_folio = v9fs_invalidate_folio,
- .launder_folio = v9fs_launder_folio,
- .direct_IO = v9fs_direct_IO,
+ .read_folio = netfs_read_folio,
+ .readahead = netfs_readahead,
+ .dirty_folio = netfs_dirty_folio,
+ .release_folio = netfs_release_folio,
+ .invalidate_folio = netfs_invalidate_folio,
+ .launder_folio = netfs_launder_folio,
+ .direct_IO = noop_direct_IO,
+ .writepages = netfs_writepages,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 11cd8d23f6f2..bae330c2f0cf 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -353,25 +353,15 @@ static ssize_t
v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct p9_fid *fid = iocb->ki_filp->private_data;
- int ret, err = 0;
p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
fid->fid, iov_iter_count(to), iocb->ki_pos);
- if (!(fid->mode & P9L_DIRECT)) {
- p9_debug(P9_DEBUG_VFS, "(cached)\n");
- return generic_file_read_iter(iocb, to);
- }
-
- if (iocb->ki_filp->f_flags & O_NONBLOCK)
- ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
- else
- ret = p9_client_read(fid, iocb->ki_pos, to, &err);
- if (!ret)
- return err;
+ if (fid->mode & P9L_DIRECT)
+ return netfs_unbuffered_read_iter(iocb, to);
- iocb->ki_pos += ret;
- return ret;
+ p9_debug(P9_DEBUG_VFS, "(cached)\n");
+ return netfs_file_read_iter(iocb, to);
}
/*
@@ -407,46 +397,14 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct p9_fid *fid = file->private_data;
- ssize_t retval;
- loff_t origin;
- int err = 0;
p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid);
- if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) {
- p9_debug(P9_DEBUG_CACHE, "(cached)\n");
- return generic_file_write_iter(iocb, from);
- }
+ if (fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))
+ return netfs_unbuffered_write_iter(iocb, from);
- retval = generic_write_checks(iocb, from);
- if (retval <= 0)
- return retval;
-
- origin = iocb->ki_pos;
- retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
- if (retval > 0) {
- struct inode *inode = file_inode(file);
- loff_t i_size;
- unsigned long pg_start, pg_end;
-
- pg_start = origin >> PAGE_SHIFT;
- pg_end = (origin + retval - 1) >> PAGE_SHIFT;
- if (inode->i_mapping && inode->i_mapping->nrpages)
- invalidate_inode_pages2_range(inode->i_mapping,
- pg_start, pg_end);
- iocb->ki_pos += retval;
- i_size = i_size_read(inode);
- if (iocb->ki_pos > i_size) {
- inode_add_bytes(inode, iocb->ki_pos - i_size);
- /*
- * Need to serialize against i_size_write() in
- * v9fs_stat2inode()
- */
- v9fs_i_size_write(inode, iocb->ki_pos);
- }
- return retval;
- }
- return err;
+ p9_debug(P9_DEBUG_CACHE, "(cached)\n");
+ return netfs_file_write_iter(iocb, from);
}
static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
@@ -519,36 +477,7 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
static vm_fault_t
v9fs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct folio *folio = page_folio(vmf->page);
- struct file *filp = vmf->vma->vm_file;
- struct inode *inode = file_inode(filp);
-
-
- p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
- folio, (unsigned long)filp->private_data);
-
- /* Wait for the page to be written to the cache before we allow it to
- * be modified. We then assume the entire page will need writing back.
- */
-#ifdef CONFIG_9P_FSCACHE
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0)
- return VM_FAULT_NOPAGE;
-#endif
-
- /* Update file times before taking page lock */
- file_update_time(filp);
-
- if (folio_lock_killable(folio) < 0)
- return VM_FAULT_RETRY;
- if (folio_mapping(folio) != inode->i_mapping)
- goto out_unlock;
- folio_wait_stable(folio);
-
- return VM_FAULT_LOCKED;
-out_unlock:
- folio_unlock(folio);
- return VM_FAULT_NOPAGE;
+ return netfs_page_mkwrite(vmf, NULL);
}
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b845ee18a80b..32572982f72e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -246,10 +246,10 @@ void v9fs_free_inode(struct inode *inode)
/*
* Set parameters for the netfs library
*/
-static void v9fs_set_netfs_context(struct inode *inode)
+void v9fs_set_netfs_context(struct inode *inode)
{
struct v9fs_inode *v9inode = V9FS_I(inode);
- netfs_inode_init(&v9inode->netfs, &v9fs_req_ops);
+ netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true);
}
int v9fs_init_inode(struct v9fs_session_info *v9ses,
@@ -326,8 +326,6 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
err = -EINVAL;
goto error;
}
-
- v9fs_set_netfs_context(inode);
error:
return err;
@@ -359,6 +357,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
iput(inode);
return ERR_PTR(err);
}
+ v9fs_set_netfs_context(inode);
return inode;
}
@@ -374,11 +373,8 @@ void v9fs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
-#ifdef CONFIG_9P_FSCACHE
version = cpu_to_le32(v9inode->qid.version);
- fscache_clear_inode_writeback(v9fs_inode_cookie(v9inode), inode,
- &version);
-#endif
+ netfs_clear_inode_writeback(inode, &version);
clear_inode(inode);
filemap_fdatawrite(&inode->i_data);
@@ -464,6 +460,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
goto error;
v9fs_stat2inode(st, inode, sb, 0);
+ v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
unlock_new_inode(inode);
return inode;
@@ -1113,7 +1110,7 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
- truncate_pagecache(inode, iattr->ia_size);
+ netfs_resize_file(netfs_inode(inode), iattr->ia_size, true);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache & CACHE_FSCACHE) {
@@ -1181,6 +1178,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
+ v9inode->netfs.remote_i_size = stat->length;
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
v9fs_i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c7319af2f471..3505227e1704 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -128,6 +128,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
goto error;
v9fs_stat2inode_dotl(st, inode, 0);
+ v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
retval = v9fs_get_acl(inode, fid);
if (retval)
@@ -598,7 +599,7 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
i_size_read(inode)) {
truncate_setsize(inode, iattr->ia_size);
- truncate_pagecache(inode, iattr->ia_size);
+ netfs_resize_file(netfs_inode(inode), iattr->ia_size, true);
#ifdef CONFIG_9P_FSCACHE
if (v9ses->cache & CACHE_FSCACHE)
@@ -655,6 +656,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
+ v9inode->netfs.remote_i_size = stat->st_size;
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
v9fs_i_size_write(inode, stat->st_size);
inode->i_blocks = stat->st_blocks;
@@ -683,8 +685,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
inode->i_mode = mode;
}
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
- stat->st_result_mask & P9_STATS_SIZE)
+ stat->st_result_mask & P9_STATS_SIZE) {
+ v9inode->netfs.remote_i_size = stat->st_size;
v9fs_i_size_write(inode, stat->st_size);
+ }
if (stat->st_result_mask & P9_STATS_BLOCKS)
inode->i_blocks = stat->st_blocks;
}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 73db55c050bf..941f7d0e0bfa 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -289,31 +289,21 @@ static int v9fs_drop_inode(struct inode *inode)
static int v9fs_write_inode(struct inode *inode,
struct writeback_control *wbc)
{
- struct v9fs_inode *v9inode;
-
/*
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
-
- v9inode = V9FS_I(inode);
- fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
-
- return 0;
+ return netfs_unpin_writeback(inode, wbc);
}
static int v9fs_write_inode_dotl(struct inode *inode,
struct writeback_control *wbc)
{
- struct v9fs_inode *v9inode;
- v9inode = V9FS_I(inode);
p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
- fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
-
- return 0;
+ return netfs_unpin_writeback(inode, wbc);
}
static const struct super_operations v9fs_super_ops = {
diff --git a/fs/Kconfig b/fs/Kconfig
index a3159831ba98..89fdbefd1075 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -144,7 +144,6 @@ source "fs/overlayfs/Kconfig"
menu "Caches"
source "fs/netfs/Kconfig"
-source "fs/fscache/Kconfig"
source "fs/cachefiles/Kconfig"
endmenu
diff --git a/fs/Makefile b/fs/Makefile
index a6962c588962..c09016257f05 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -61,7 +61,6 @@ obj-$(CONFIG_DLM) += dlm/
# Do not add any filesystems before this line
obj-$(CONFIG_NETFS_SUPPORT) += netfs/
-obj-$(CONFIG_FSCACHE) += fscache/
obj-$(CONFIG_REISERFS_FS) += reiserfs/
obj-$(CONFIG_EXT4_FS) += ext4/
# We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 2cd40ba601f1..d3bc4a2d7085 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -76,7 +76,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
/* there shouldn't be an existing inode */
BUG_ON(!(inode->i_state & I_NEW));
- netfs_inode_init(&vnode->netfs, NULL);
+ netfs_inode_init(&vnode->netfs, NULL, false);
inode->i_size = 0;
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
if (root) {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 30914e0d9cb2..3d33b221d9ca 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -20,9 +20,6 @@
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
static int afs_symlink_read_folio(struct file *file, struct folio *folio);
-static void afs_invalidate_folio(struct folio *folio, size_t offset,
- size_t length);
-static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags);
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
@@ -37,7 +34,7 @@ const struct file_operations afs_file_operations = {
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = afs_file_read_iter,
- .write_iter = afs_file_write,
+ .write_iter = netfs_file_write_iter,
.mmap = afs_file_mmap,
.splice_read = afs_file_splice_read,
.splice_write = iter_file_splice_write,
@@ -53,22 +50,21 @@ const struct inode_operations afs_file_inode_operations = {
};
const struct address_space_operations afs_file_aops = {
+ .direct_IO = noop_direct_IO,
.read_folio = netfs_read_folio,
.readahead = netfs_readahead,
- .dirty_folio = afs_dirty_folio,
- .launder_folio = afs_launder_folio,
- .release_folio = afs_release_folio,
- .invalidate_folio = afs_invalidate_folio,
- .write_begin = afs_write_begin,
- .write_end = afs_write_end,
- .writepages = afs_writepages,
+ .dirty_folio = netfs_dirty_folio,
+ .launder_folio = netfs_launder_folio,
+ .release_folio = netfs_release_folio,
+ .invalidate_folio = netfs_invalidate_folio,
.migrate_folio = filemap_migrate_folio,
+ .writepages = afs_writepages,
};
const struct address_space_operations afs_symlink_aops = {
.read_folio = afs_symlink_read_folio,
- .release_folio = afs_release_folio,
- .invalidate_folio = afs_invalidate_folio,
+ .release_folio = netfs_release_folio,
+ .invalidate_folio = netfs_invalidate_folio,
.migrate_folio = filemap_migrate_folio,
};
@@ -323,11 +319,7 @@ static void afs_issue_read(struct netfs_io_subrequest *subreq)
fsreq->len = subreq->len - subreq->transferred;
fsreq->key = key_get(subreq->rreq->netfs_priv);
fsreq->vnode = vnode;
- fsreq->iter = &fsreq->def_iter;
-
- iov_iter_xarray(&fsreq->def_iter, ITER_DEST,
- &fsreq->vnode->netfs.inode.i_mapping->i_pages,
- fsreq->pos, fsreq->len);
+ fsreq->iter = &subreq->io_iter;
afs_fetch_data(fsreq->vnode, fsreq);
afs_put_read(fsreq);
@@ -359,22 +351,13 @@ static int afs_symlink_read_folio(struct file *file, struct folio *folio)
static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- rreq->netfs_priv = key_get(afs_file_key(file));
+ if (file)
+ rreq->netfs_priv = key_get(afs_file_key(file));
+ rreq->rsize = 256 * 1024;
+ rreq->wsize = 256 * 1024;
return 0;
}
-static int afs_begin_cache_operation(struct netfs_io_request *rreq)
-{
-#ifdef CONFIG_AFS_FSCACHE
- struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
-
- return fscache_begin_read_operation(&rreq->cache_resources,
- afs_vnode_cache(vnode));
-#else
- return -ENOBUFS;
-#endif
-}
-
static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
struct folio **foliop, void **_fsdata)
{
@@ -388,128 +371,37 @@ static void afs_free_request(struct netfs_io_request *rreq)
key_put(rreq->netfs_priv);
}
-const struct netfs_request_ops afs_req_ops = {
- .init_request = afs_init_request,
- .free_request = afs_free_request,
- .begin_cache_operation = afs_begin_cache_operation,
- .check_write_begin = afs_check_write_begin,
- .issue_read = afs_issue_read,
-};
-
-int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
+static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
{
- fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode)));
- return 0;
-}
-
-/*
- * Adjust the dirty region of the page on truncation or full invalidation,
- * getting rid of the markers altogether if the region is entirely invalidated.
- */
-static void afs_invalidate_dirty(struct folio *folio, size_t offset,
- size_t length)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
- unsigned long priv;
- unsigned int f, t, end = offset + length;
-
- priv = (unsigned long)folio_get_private(folio);
-
- /* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == folio_size(folio))
- goto full_invalidate;
-
- /* If the page was dirtied by page_mkwrite(), the PTE stays writable
- * and we don't get another notification to tell us to expand it
- * again.
- */
- if (afs_is_folio_dirty_mmapped(priv))
- return;
-
- /* We may need to shorten the dirty region */
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
-
- if (t <= offset || f >= end)
- return; /* Doesn't overlap */
-
- if (f < offset && t > end)
- return; /* Splits the dirty region - just absorb it */
-
- if (f >= offset && t <= end)
- goto undirty;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ loff_t i_size;
- if (f < offset)
- t = offset;
- else
- f = end;
- if (f == t)
- goto undirty;
-
- priv = afs_folio_dirty(folio, f, t);
- folio_change_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
- return;
-
-undirty:
- trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
- folio_clear_dirty_for_io(folio);
-full_invalidate:
- trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
- folio_detach_private(folio);
+ write_seqlock(&vnode->cb_lock);
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (new_i_size > i_size) {
+ i_size_write(&vnode->netfs.inode, new_i_size);
+ inode_set_bytes(&vnode->netfs.inode, new_i_size);
+ }
+ write_sequnlock(&vnode->cb_lock);
+ fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);
}
-/*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- * the entire page)
- */
-static void afs_invalidate_folio(struct folio *folio, size_t offset,
- size_t length)
+static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)
{
- _enter("{%lu},%zu,%zu", folio->index, offset, length);
-
- BUG_ON(!folio_test_locked(folio));
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
- if (folio_get_private(folio))
- afs_invalidate_dirty(folio, offset, length);
-
- folio_wait_fscache(folio);
- _leave("");
+ afs_invalidate_cache(vnode, 0);
}
-/*
- * release a page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
- */
-static bool afs_release_folio(struct folio *folio, gfp_t gfp)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{{%llx:%llu}[%lu],%lx},%x",
- vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
- gfp);
-
- /* deny if folio is being written to the cache and the caller hasn't
- * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
- if (folio_test_fscache(folio)) {
- if (current_is_kswapd() || !(gfp & __GFP_FS))
- return false;
- folio_wait_fscache(folio);
- }
- fscache_note_page_release(afs_vnode_cache(vnode));
-#endif
-
- if (folio_test_private(folio)) {
- trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
- folio_detach_private(folio);
- }
-
- /* Indicate that the folio can be released */
- _leave(" = T");
- return true;
-}
+const struct netfs_request_ops afs_req_ops = {
+ .init_request = afs_init_request,
+ .free_request = afs_free_request,
+ .check_write_begin = afs_check_write_begin,
+ .issue_read = afs_issue_read,
+ .update_i_size = afs_update_i_size,
+ .invalidate_cache = afs_netfs_invalidate_cache,
+ .create_write_requests = afs_create_write_requests,
+};
static void afs_add_open_mmap(struct afs_vnode *vnode)
{
@@ -576,28 +468,39 @@ static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pg
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_file *af = iocb->ki_filp->private_data;
- int ret;
+ ssize_t ret;
- ret = afs_validate(vnode, af->key);
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return netfs_unbuffered_read_iter(iocb, iter);
+
+ ret = netfs_start_io_read(inode);
if (ret < 0)
return ret;
-
- return generic_file_read_iter(iocb, iter);
+ ret = afs_validate(vnode, af->key);
+ if (ret == 0)
+ ret = filemap_read(iocb, iter, 0);
+ netfs_end_io_read(inode);
+ return ret;
}
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(in));
+ struct inode *inode = file_inode(in);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_file *af = in->private_data;
- int ret;
+ ssize_t ret;
- ret = afs_validate(vnode, af->key);
+ ret = netfs_start_io_read(inode);
if (ret < 0)
return ret;
-
- return filemap_splice_read(in, ppos, pipe, len, flags);
+ ret = afs_validate(vnode, af->key);
+ if (ret == 0)
+ ret = filemap_splice_read(in, ppos, pipe, len, flags);
+ netfs_end_io_read(inode);
+ return ret;
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4f04f6f33f46..94fc049aff58 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -58,7 +58,7 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
*/
static void afs_set_netfs_context(struct afs_vnode *vnode)
{
- netfs_inode_init(&vnode->netfs, &afs_req_ops);
+ netfs_inode_init(&vnode->netfs, &afs_req_ops, true);
}
/*
@@ -166,6 +166,7 @@ static void afs_apply_status(struct afs_operation *op,
struct inode *inode = &vnode->netfs.inode;
struct timespec64 t;
umode_t mode;
+ bool unexpected_jump = false;
bool data_changed = false;
bool change_size = vp->set_size;
@@ -230,6 +231,7 @@ static void afs_apply_status(struct afs_operation *op,
}
change_size = true;
data_changed = true;
+ unexpected_jump = true;
} else if (vnode->status.type == AFS_FTYPE_DIR) {
/* Expected directory change is handled elsewhere so
* that we can locally edit the directory and save on a
@@ -249,8 +251,10 @@ static void afs_apply_status(struct afs_operation *op,
* what's on the server.
*/
vnode->netfs.remote_i_size = status->size;
- if (change_size) {
+ if (change_size || status->size > i_size_read(inode)) {
afs_set_i_size(vnode, status->size);
+ if (unexpected_jump)
+ vnode->netfs.zero_point = status->size;
inode_set_ctime_to_ts(inode, t);
inode_set_atime_to_ts(inode, t);
}
@@ -647,7 +651,7 @@ void afs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
afs_set_cache_aux(vnode, &aux);
- fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux);
+ netfs_clear_inode_writeback(inode, &aux);
clear_inode(inode);
while (!list_empty(&vnode->wb_keys)) {
@@ -689,17 +693,17 @@ static void afs_setattr_success(struct afs_operation *op)
static void afs_setattr_edit_file(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
- struct inode *inode = &vp->vnode->netfs.inode;
+ struct afs_vnode *vnode = vp->vnode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size;
loff_t i_size = op->setattr.old_i_size;
- if (size < i_size)
- truncate_pagecache(inode, size);
- if (size != i_size)
- fscache_resize_cookie(afs_vnode_cache(vp->vnode),
- vp->scb.status.size);
+ if (size != i_size) {
+ truncate_setsize(&vnode->netfs.inode, size);
+ netfs_resize_file(&vnode->netfs, size, true);
+ fscache_resize_cookie(afs_vnode_cache(vnode), size);
+ }
}
}
@@ -767,11 +771,11 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
*/
if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
attr->ia_size < i_size &&
- attr->ia_size > vnode->status.size) {
- truncate_pagecache(inode, attr->ia_size);
+ attr->ia_size > vnode->netfs.remote_i_size) {
+ truncate_setsize(inode, attr->ia_size);
+ netfs_resize_file(&vnode->netfs, size, false);
fscache_resize_cookie(afs_vnode_cache(vnode),
attr->ia_size);
- i_size_write(inode, attr->ia_size);
ret = 0;
goto out_unlock;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 55aa0679d8ce..9c03fcf7ffaa 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -985,62 +985,6 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
i_size_read(&vnode->netfs.inode), flags);
}
-/*
- * We use folio->private to hold the amount of the folio that we've written to,
- * splitting the field into two parts. However, we need to represent a range
- * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
- * exceeds what we can encode.
- */
-#ifdef CONFIG_64BIT
-#define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL
-#define __AFS_FOLIO_PRIV_SHIFT 32
-#define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL
-#else
-#define __AFS_FOLIO_PRIV_MASK 0x7fffUL
-#define __AFS_FOLIO_PRIV_SHIFT 16
-#define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL
-#endif
-
-static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
-{
- int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
- return (shift > 0) ? shift : 0;
-}
-
-static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
-{
- unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
-
- /* The lower bound is inclusive */
- return x << afs_folio_dirty_resolution(folio);
-}
-
-static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
-{
- unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
-
- /* The upper bound is immediately beyond the region */
- return (x + 1) << afs_folio_dirty_resolution(folio);
-}
-
-static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
-{
- unsigned int res = afs_folio_dirty_resolution(folio);
- from >>= res;
- to = (to - 1) >> res;
- return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
-}
-
-static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
-{
- return priv | __AFS_FOLIO_PRIV_MMAPPED;
-}
-
-static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
-{
- return priv & __AFS_FOLIO_PRIV_MMAPPED;
-}
-
#include <trace/events/afs.h>
/*****************************************************************************/
@@ -1167,7 +1111,6 @@ extern int afs_release(struct inode *, struct file *);
extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
extern struct afs_read *afs_alloc_read(gfp_t);
extern void afs_put_read(struct afs_read *);
-extern int afs_write_inode(struct inode *, struct writeback_control *);
static inline struct afs_read *afs_get_read(struct afs_read *req)
{
@@ -1658,24 +1601,11 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
/*
* write.c
*/
-#ifdef CONFIG_AFS_FSCACHE
-bool afs_dirty_folio(struct address_space *, struct folio *);
-#else
-#define afs_dirty_folio filemap_dirty_folio
-#endif
-extern int afs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata);
-extern int afs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
-extern int afs_writepage(struct page *, struct writeback_control *);
extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
extern void afs_prune_wb_keys(struct afs_vnode *);
-int afs_launder_folio(struct folio *);
+void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len);
/*
* xattr.c
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ae2d66a52add..f3ba1c3e72f5 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -55,7 +55,7 @@ int afs_net_id;
static const struct super_operations afs_super_ops = {
.statfs = afs_statfs,
.alloc_inode = afs_alloc_inode,
- .write_inode = afs_write_inode,
+ .write_inode = netfs_unpin_writeback,
.drop_inode = afs_drop_inode,
.destroy_inode = afs_destroy_inode,
.free_inode = afs_free_inode,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 61d34ad2ca7d..74402d95a884 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -12,309 +12,17 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "internal.h"
-static int afs_writepages_region(struct address_space *mapping,
- struct writeback_control *wbc,
- loff_t start, loff_t end, loff_t *_next,
- bool max_one_loop);
-
-static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len,
- loff_t i_size, bool caching);
-
-#ifdef CONFIG_AFS_FSCACHE
-/*
- * Mark a page as having been made dirty and thus needing writeback. We also
- * need to pin the cache object to write back to.
- */
-bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
- return fscache_dirty_folio(mapping, folio,
- afs_vnode_cache(AFS_FS_I(mapping->host)));
-}
-static void afs_folio_start_fscache(bool caching, struct folio *folio)
-{
- if (caching)
- folio_start_fscache(folio);
-}
-#else
-static void afs_folio_start_fscache(bool caching, struct folio *folio)
-{
-}
-#endif
-
-/*
- * Flush out a conflicting write. This may extend the write to the surrounding
- * pages if also dirty and contiguous to the conflicting region..
- */
-static int afs_flush_conflicting_write(struct address_space *mapping,
- struct folio *folio)
-{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_start = folio_pos(folio),
- .range_end = LLONG_MAX,
- };
- loff_t next;
-
- return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX,
- &next, true);
-}
-
-/*
- * prepare to perform part of a write to a page
- */
-int afs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **_page, void **fsdata)
-{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- struct folio *folio;
- unsigned long priv;
- unsigned f, from;
- unsigned t, to;
- pgoff_t index;
- int ret;
-
- _enter("{%llx:%llu},%llx,%x",
- vnode->fid.vid, vnode->fid.vnode, pos, len);
-
- /* Prefetch area to be written into the cache if we're caching this
- * file. We need to do this before we get a lock on the page in case
- * there's more than one writer competing for the same cache block.
- */
- ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
- if (ret < 0)
- return ret;
-
- index = folio_index(folio);
- from = pos - index * PAGE_SIZE;
- to = from + len;
-
-try_again:
- /* See if this page is already partially written in a way that we can
- * merge the new write with.
- */
- if (folio_test_private(folio)) {
- priv = (unsigned long)folio_get_private(folio);
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- ASSERTCMP(f, <=, t);
-
- if (folio_test_writeback(folio)) {
- trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
- folio_unlock(folio);
- goto wait_for_writeback;
- }
- /* If the file is being filled locally, allow inter-write
- * spaces to be merged into writes. If it's not, only write
- * back what the user gives us.
- */
- if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
- (to < f || from > t))
- goto flush_conflicting_write;
- }
-
- *_page = folio_file_page(folio, pos / PAGE_SIZE);
- _leave(" = 0");
- return 0;
-
- /* The previous write and this write aren't adjacent or overlapping, so
- * flush the page out.
- */
-flush_conflicting_write:
- trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio);
- folio_unlock(folio);
-
- ret = afs_flush_conflicting_write(mapping, folio);
- if (ret < 0)
- goto error;
-
-wait_for_writeback:
- ret = folio_wait_writeback_killable(folio);
- if (ret < 0)
- goto error;
-
- ret = folio_lock_killable(folio);
- if (ret < 0)
- goto error;
- goto try_again;
-
-error:
- folio_put(folio);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * finalise part of a write to a page
- */
-int afs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *subpage, void *fsdata)
-{
- struct folio *folio = page_folio(subpage);
- struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- unsigned long priv;
- unsigned int f, from = offset_in_folio(folio, pos);
- unsigned int t, to = from + copied;
- loff_t i_size, write_end_pos;
-
- _enter("{%llx:%llu},{%lx}",
- vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
-
- if (!folio_test_uptodate(folio)) {
- if (copied < len) {
- copied = 0;
- goto out;
- }
-
- folio_mark_uptodate(folio);
- }
-
- if (copied == 0)
- goto out;
-
- write_end_pos = pos + copied;
-
- i_size = i_size_read(&vnode->netfs.inode);
- if (write_end_pos > i_size) {
- write_seqlock(&vnode->cb_lock);
- i_size = i_size_read(&vnode->netfs.inode);
- if (write_end_pos > i_size)
- afs_set_i_size(vnode, write_end_pos);
- write_sequnlock(&vnode->cb_lock);
- fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos);
- }
-
- if (folio_test_private(folio)) {
- priv = (unsigned long)folio_get_private(folio);
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- if (from < f)
- f = from;
- if (to > t)
- t = to;
- priv = afs_folio_dirty(folio, f, t);
- folio_change_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
- } else {
- priv = afs_folio_dirty(folio, from, to);
- folio_attach_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
- }
-
- if (folio_mark_dirty(folio))
- _debug("dirtied %lx", folio_index(folio));
-
-out:
- folio_unlock(folio);
- folio_put(folio);
- return copied;
-}
-
-/*
- * kill all the pages in the given range
- */
-static void afs_kill_pages(struct address_space *mapping,
- loff_t start, loff_t len)
-{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct folio *folio;
- pgoff_t index = start / PAGE_SIZE;
- pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
- _enter("{%llx:%llu},%llx @%llx",
- vnode->fid.vid, vnode->fid.vnode, len, start);
-
- do {
- _debug("kill %lx (to %lx)", index, last);
-
- folio = filemap_get_folio(mapping, index);
- if (IS_ERR(folio)) {
- next = index + 1;
- continue;
- }
-
- next = folio_next_index(folio);
-
- folio_clear_uptodate(folio);
- folio_end_writeback(folio);
- folio_lock(folio);
- generic_error_remove_folio(mapping, folio);
- folio_unlock(folio);
- folio_put(folio);
-
- } while (index = next, index <= last);
-
- _leave("");
-}
-
-/*
- * Redirty all the pages in a given range.
- */
-static void afs_redirty_pages(struct writeback_control *wbc,
- struct address_space *mapping,
- loff_t start, loff_t len)
-{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct folio *folio;
- pgoff_t index = start / PAGE_SIZE;
- pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
- _enter("{%llx:%llu},%llx @%llx",
- vnode->fid.vid, vnode->fid.vnode, len, start);
-
- do {
- _debug("redirty %llx @%llx", len, start);
-
- folio = filemap_get_folio(mapping, index);
- if (IS_ERR(folio)) {
- next = index + 1;
- continue;
- }
-
- next = index + folio_nr_pages(folio);
- folio_redirty_for_writepage(wbc, folio);
- folio_end_writeback(folio);
- folio_put(folio);
- } while (index = next, index <= last);
-
- _leave("");
-}
-
/*
* completion of write to server
*/
static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
{
- struct address_space *mapping = vnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t end;
-
- XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
-
_enter("{%llx:%llu},{%x @%llx}",
vnode->fid.vid, vnode->fid.vnode, len, start);
- rcu_read_lock();
-
- end = (start + len - 1) / PAGE_SIZE;
- xas_for_each(&xas, folio, end) {
- if (!folio_test_writeback(folio)) {
- kdebug("bad %x @%llx page %lx %lx",
- len, start, folio_index(folio), end);
- ASSERT(folio_test_writeback(folio));
- }
-
- trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
- folio_detach_private(folio);
- folio_end_writeback(folio);
- }
-
- rcu_read_unlock();
-
afs_prune_wb_keys(vnode);
_leave("");
}
@@ -451,363 +159,53 @@ try_next_key:
return afs_put_operation(op);
}
-/*
- * Extend the region to be written back to include subsequent contiguously
- * dirty pages if possible, but don't sleep while doing so.
- *
- * If this page holds new content, then we can include filler zeros in the
- * writeback.
- */
-static void afs_extend_writeback(struct address_space *mapping,
- struct afs_vnode *vnode,
- long *_count,
- loff_t start,
- loff_t max_len,
- bool new_content,
- bool caching,
- unsigned int *_len)
+static void afs_upload_to_server(struct netfs_io_subrequest *subreq)
{
- struct folio_batch fbatch;
- struct folio *folio;
- unsigned long priv;
- unsigned int psize, filler = 0;
- unsigned int f, t;
- loff_t len = *_len;
- pgoff_t index = (start + len) / PAGE_SIZE;
- bool stop = true;
- unsigned int i;
-
- XA_STATE(xas, &mapping->i_pages, index);
- folio_batch_init(&fbatch);
-
- do {
- /* Firstly, we gather up a batch of contiguous dirty pages
- * under the RCU read lock - but we can't clear the dirty flags
- * there if any of those pages are mapped.
- */
- rcu_read_lock();
-
- xas_for_each(&xas, folio, ULONG_MAX) {
- stop = true;
- if (xas_retry(&xas, folio))
- continue;
- if (xa_is_value(folio))
- break;
- if (folio_index(folio) != index)
- break;
-
- if (!folio_try_get_rcu(folio)) {
- xas_reset(&xas);
- continue;
- }
-
- /* Has the page moved or been split? */
- if (unlikely(folio != xas_reload(&xas))) {
- folio_put(folio);
- break;
- }
-
- if (!folio_trylock(folio)) {
- folio_put(folio);
- break;
- }
- if (!folio_test_dirty(folio) ||
- folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- folio_put(folio);
- break;
- }
-
- psize = folio_size(folio);
- priv = (unsigned long)folio_get_private(folio);
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- if (f != 0 && !new_content) {
- folio_unlock(folio);
- folio_put(folio);
- break;
- }
-
- len += filler + t;
- filler = psize - t;
- if (len >= max_len || *_count <= 0)
- stop = true;
- else if (t == psize || new_content)
- stop = false;
-
- index += folio_nr_pages(folio);
- if (!folio_batch_add(&fbatch, folio))
- break;
- if (stop)
- break;
- }
-
- if (!stop)
- xas_pause(&xas);
- rcu_read_unlock();
-
- /* Now, if we obtained any folios, we can shift them to being
- * writable and mark them for caching.
- */
- if (!folio_batch_count(&fbatch))
- break;
-
- for (i = 0; i < folio_batch_count(&fbatch); i++) {
- folio = fbatch.folios[i];
- trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
-
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- folio_start_writeback(folio);
- afs_folio_start_fscache(caching, folio);
-
- *_count -= folio_nr_pages(folio);
- folio_unlock(folio);
- }
+ struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+ ssize_t ret;
- folio_batch_release(&fbatch);
- cond_resched();
- } while (!stop);
+ _enter("%x[%x],%zx",
+ subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count);
- *_len = len;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ ret = afs_store_data(vnode, &subreq->io_iter, subreq->start,
+ subreq->rreq->origin == NETFS_LAUNDER_WRITE);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len,
+ false);
}
-/*
- * Synchronously write back the locked page and any subsequent non-locked dirty
- * pages.
- */
-static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
- struct writeback_control *wbc,
- struct folio *folio,
- loff_t start, loff_t end)
+static void afs_upload_to_server_worker(struct work_struct *work)
{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct iov_iter iter;
- unsigned long priv;
- unsigned int offset, to, len, max_len;
- loff_t i_size = i_size_read(&vnode->netfs.inode);
- bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
- long count = wbc->nr_to_write;
- int ret;
-
- _enter(",%lx,%llx-%llx", folio_index(folio), start, end);
-
- folio_start_writeback(folio);
- afs_folio_start_fscache(caching, folio);
-
- count -= folio_nr_pages(folio);
-
- /* Find all consecutive lockable dirty pages that have contiguous
- * written regions, stopping when we find a page that is not
- * immediately lockable, is not dirty or is missing, or we reach the
- * end of the range.
- */
- priv = (unsigned long)folio_get_private(folio);
- offset = afs_folio_dirty_from(folio, priv);
- to = afs_folio_dirty_to(folio, priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
-
- len = to - offset;
- start += offset;
- if (start < i_size) {
- /* Trim the write to the EOF; the extra data is ignored. Also
- * put an upper limit on the size of a single storedata op.
- */
- max_len = 65536 * 4096;
- max_len = min_t(unsigned long long, max_len, end - start + 1);
- max_len = min_t(unsigned long long, max_len, i_size - start);
-
- if (len < max_len &&
- (to == folio_size(folio) || new_content))
- afs_extend_writeback(mapping, vnode, &count,
- start, max_len, new_content,
- caching, &len);
- len = min_t(loff_t, len, max_len);
- }
-
- /* We now have a contiguous set of dirty pages, each with writeback
- * set; the first page is still locked at this point, but all the rest
- * have been unlocked.
- */
- folio_unlock(folio);
-
- if (start < i_size) {
- _debug("write back %x @%llx [%llx]", len, start, i_size);
-
- /* Speculatively write to the cache. We have to fix this up
- * later if the store fails.
- */
- afs_write_to_cache(vnode, start, len, i_size, caching);
-
- iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len);
- ret = afs_store_data(vnode, &iter, start, false);
- } else {
- _debug("write discard %x @%llx [%llx]", len, start, i_size);
-
- /* The dirty region was entirely beyond the EOF. */
- fscache_clear_page_bits(mapping, start, len, caching);
- afs_pages_written_back(vnode, start, len);
- ret = 0;
- }
-
- switch (ret) {
- case 0:
- wbc->nr_to_write = count;
- ret = len;
- break;
+ struct netfs_io_subrequest *subreq =
+ container_of(work, struct netfs_io_subrequest, work);
- default:
- pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
- fallthrough;
- case -EACCES:
- case -EPERM:
- case -ENOKEY:
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- case -EKEYREVOKED:
- case -ENETRESET:
- afs_redirty_pages(wbc, mapping, start, len);
- mapping_set_error(mapping, ret);
- break;
-
- case -EDQUOT:
- case -ENOSPC:
- afs_redirty_pages(wbc, mapping, start, len);
- mapping_set_error(mapping, -ENOSPC);
- break;
-
- case -EROFS:
- case -EIO:
- case -EREMOTEIO:
- case -EFBIG:
- case -ENOENT:
- case -ENOMEDIUM:
- case -ENXIO:
- trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
- afs_kill_pages(mapping, start, len);
- mapping_set_error(mapping, ret);
- break;
- }
-
- _leave(" = %d", ret);
- return ret;
+ afs_upload_to_server(subreq);
}
/*
- * write a region of pages back to the server
+ * Set up write requests for a writeback slice. We need to add a write request
+ * for each write we want to make.
*/
-static int afs_writepages_region(struct address_space *mapping,
- struct writeback_control *wbc,
- loff_t start, loff_t end, loff_t *_next,
- bool max_one_loop)
+void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
{
- struct folio *folio;
- struct folio_batch fbatch;
- ssize_t ret;
- unsigned int i;
- int n, skips = 0;
-
- _enter("%llx,%llx,", start, end);
- folio_batch_init(&fbatch);
-
- do {
- pgoff_t index = start / PAGE_SIZE;
-
- n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
- PAGECACHE_TAG_DIRTY, &fbatch);
-
- if (!n)
- break;
- for (i = 0; i < n; i++) {
- folio = fbatch.folios[i];
- start = folio_pos(folio); /* May regress with THPs */
-
- _debug("wback %lx", folio_index(folio));
-
- /* At this point we hold neither the i_pages lock nor the
- * page lock: the page may be truncated or invalidated
- * (changing page->mapping to NULL), or even swizzled
- * back from swapper_space to tmpfs file mapping
- */
-try_again:
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0) {
- folio_batch_release(&fbatch);
- return ret;
- }
- } else {
- if (!folio_trylock(folio))
- continue;
- }
-
- if (folio->mapping != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- continue;
- }
-
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode != WB_SYNC_NONE) {
- folio_wait_writeback(folio);
-#ifdef CONFIG_AFS_FSCACHE
- folio_wait_fscache(folio);
-#endif
- goto try_again;
- }
-
- start += folio_size(folio);
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (skips >= 5 || need_resched()) {
- *_next = start;
- folio_batch_release(&fbatch);
- _leave(" = 0 [%llx]", *_next);
- return 0;
- }
- skips++;
- }
- continue;
- }
-
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- ret = afs_write_back_from_locked_folio(mapping, wbc,
- folio, start, end);
- if (ret < 0) {
- _leave(" = %zd", ret);
- folio_batch_release(&fbatch);
- return ret;
- }
-
- start += ret;
- }
+ struct netfs_io_subrequest *subreq;
- folio_batch_release(&fbatch);
- cond_resched();
- } while (wbc->nr_to_write > 0);
+ _enter("%x,%llx-%llx", wreq->debug_id, start, start + len);
- *_next = start;
- _leave(" = 0 [%llx]", *_next);
- return 0;
+ subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
+ start, len, afs_upload_to_server_worker);
+ if (subreq)
+ netfs_queue_write_request(subreq);
}
/*
* write some of the pending data back to the server
*/
-int afs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+int afs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- loff_t start, next;
int ret;
- _enter("");
-
/* We have to be careful as we can end up racing with setattr()
* truncating the pagecache since the caller doesn't take a lock here
* to prevent it.
@@ -817,69 +215,12 @@ int afs_writepages(struct address_space *mapping,
else if (!down_read_trylock(&vnode->validate_lock))
return 0;
- if (wbc->range_cyclic) {
- start = mapping->writeback_index * PAGE_SIZE;
- ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX,
- &next, false);
- if (ret == 0) {
- mapping->writeback_index = next / PAGE_SIZE;
- if (start > 0 && wbc->nr_to_write > 0) {
- ret = afs_writepages_region(mapping, wbc, 0,
- start, &next, false);
- if (ret == 0)
- mapping->writeback_index =
- next / PAGE_SIZE;
- }
- }
- } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
- ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX,
- &next, false);
- if (wbc->nr_to_write > 0 && ret == 0)
- mapping->writeback_index = next / PAGE_SIZE;
- } else {
- ret = afs_writepages_region(mapping, wbc,
- wbc->range_start, wbc->range_end,
- &next, false);
- }
-
+ ret = netfs_writepages(mapping, wbc);
up_read(&vnode->validate_lock);
- _leave(" = %d", ret);
return ret;
}
/*
- * write to an AFS file
- */
-ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
-{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
- struct afs_file *af = iocb->ki_filp->private_data;
- ssize_t result;
- size_t count = iov_iter_count(from);
-
- _enter("{%llx:%llu},{%zu},",
- vnode->fid.vid, vnode->fid.vnode, count);
-
- if (IS_SWAPFILE(&vnode->netfs.inode)) {
- printk(KERN_INFO
- "AFS: Attempt to write to active swap file!\n");
- return -EBUSY;
- }
-
- if (!count)
- return 0;
-
- result = afs_validate(vnode, af->key);
- if (result < 0)
- return result;
-
- result = generic_file_write_iter(iocb, from);
-
- _leave(" = %zd", result);
- return result;
-}
-
-/*
* flush any dirty pages for this process, and check for write errors.
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
@@ -907,59 +248,11 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
{
- struct folio *folio = page_folio(vmf->page);
struct file *file = vmf->vma->vm_file;
- struct inode *inode = file_inode(file);
- struct afs_vnode *vnode = AFS_FS_I(inode);
- struct afs_file *af = file->private_data;
- unsigned long priv;
- vm_fault_t ret = VM_FAULT_RETRY;
-
- _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
-
- afs_validate(vnode, af->key);
- sb_start_pagefault(inode->i_sb);
-
- /* Wait for the page to be written to the cache before we allow it to
- * be modified. We then assume the entire page will need writing back.
- */
-#ifdef CONFIG_AFS_FSCACHE
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0)
- goto out;
-#endif
-
- if (folio_wait_writeback_killable(folio))
- goto out;
-
- if (folio_lock_killable(folio) < 0)
- goto out;
-
- /* We mustn't change folio->private until writeback is complete as that
- * details the portion of the page we need to write back and we might
- * need to redirty the page if there's a problem.
- */
- if (folio_wait_writeback_killable(folio) < 0) {
- folio_unlock(folio);
- goto out;
- }
-
- priv = afs_folio_dirty(folio, 0, folio_size(folio));
- priv = afs_folio_dirty_mmapped(priv);
- if (folio_test_private(folio)) {
- folio_change_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
- } else {
- folio_attach_private(folio, (void *)priv);
- trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
- }
- file_update_time(file);
-
- ret = VM_FAULT_LOCKED;
-out:
- sb_end_pagefault(inode->i_sb);
- return ret;
+ if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0)
+ return VM_FAULT_SIGBUS;
+ return netfs_page_mkwrite(vmf, NULL);
}
/*
@@ -989,64 +282,3 @@ void afs_prune_wb_keys(struct afs_vnode *vnode)
afs_put_wb_key(wbk);
}
}
-
-/*
- * Clean up a page during invalidation.
- */
-int afs_launder_folio(struct folio *folio)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
- struct iov_iter iter;
- struct bio_vec bv;
- unsigned long priv;
- unsigned int f, t;
- int ret = 0;
-
- _enter("{%lx}", folio->index);
-
- priv = (unsigned long)folio_get_private(folio);
- if (folio_clear_dirty_for_io(folio)) {
- f = 0;
- t = folio_size(folio);
- if (folio_test_private(folio)) {
- f = afs_folio_dirty_from(folio, priv);
- t = afs_folio_dirty_to(folio, priv);
- }
-
- bvec_set_folio(&bv, folio, t - f, f);
- iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, bv.bv_len);
-
- trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
- ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
- }
-
- trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
- folio_detach_private(folio);
- folio_wait_fscache(folio);
- return ret;
-}
-
-/*
- * Deal with the completion of writing the data to the cache.
- */
-static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
- bool was_async)
-{
- struct afs_vnode *vnode = priv;
-
- if (IS_ERR_VALUE(transferred_or_error) &&
- transferred_or_error != -ENOBUFS)
- afs_invalidate_cache(vnode, 0);
-}
-
-/*
- * Save the write to the cache also.
- */
-static void afs_write_to_cache(struct afs_vnode *vnode,
- loff_t start, size_t len, loff_t i_size,
- bool caching)
-{
- fscache_write_to_cache(afs_vnode_cache(vnode),
- vnode->netfs.inode.i_mapping, start, len, i_size,
- afs_write_to_cache_done, vnode, caching);
-}
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
index 8df715640a48..c5a070550ee3 100644
--- a/fs/cachefiles/Kconfig
+++ b/fs/cachefiles/Kconfig
@@ -2,7 +2,7 @@
config CACHEFILES
tristate "Filesystem caching on files"
- depends on FSCACHE && BLOCK
+ depends on NETFS_SUPPORT && FSCACHE && BLOCK
help
This permits use of a mounted filesystem as a cache for other
filesystems - primarily networking filesystems - thus allowing fast
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 4a87c9d714a9..d33169f0018b 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -246,7 +246,7 @@ extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
enum fscache_want_state want_state);
extern int __cachefiles_prepare_write(struct cachefiles_object *object,
struct file *file,
- loff_t *_start, size_t *_len,
+ loff_t *_start, size_t *_len, size_t upper_len,
bool no_space_allocated_yet);
extern int __cachefiles_write(struct cachefiles_object *object,
struct file *file,
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 5857241c5918..1d685357e67f 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -517,18 +517,26 @@ cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
*/
int __cachefiles_prepare_write(struct cachefiles_object *object,
struct file *file,
- loff_t *_start, size_t *_len,
+ loff_t *_start, size_t *_len, size_t upper_len,
bool no_space_allocated_yet)
{
struct cachefiles_cache *cache = object->volume->cache;
loff_t start = *_start, pos;
- size_t len = *_len, down;
+ size_t len = *_len;
int ret;
/* Round to DIO size */
- down = start - round_down(start, PAGE_SIZE);
- *_start = start - down;
- *_len = round_up(down + len, PAGE_SIZE);
+ start = round_down(*_start, PAGE_SIZE);
+ if (start != *_start || *_len > upper_len) {
+ /* Probably asked to cache a streaming write written into the
+ * pagecache when the cookie was temporarily out of service to
+ * culling.
+ */
+ fscache_count_dio_misfit();
+ return -ENOBUFS;
+ }
+
+ *_len = round_up(len, PAGE_SIZE);
/* We need to work out whether there's sufficient disk space to perform
* the write - but we can skip that check if we have space already
@@ -539,7 +547,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
pos = cachefiles_inject_read_error();
if (pos == 0)
- pos = vfs_llseek(file, *_start, SEEK_DATA);
+ pos = vfs_llseek(file, start, SEEK_DATA);
if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
if (pos == -ENXIO)
goto check_space; /* Unallocated tail */
@@ -547,7 +555,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
cachefiles_trace_seek_error);
return pos;
}
- if ((u64)pos >= (u64)*_start + *_len)
+ if ((u64)pos >= (u64)start + *_len)
goto check_space; /* Unallocated region */
/* We have a block that's at least partially filled - if we're low on
@@ -560,13 +568,13 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
pos = cachefiles_inject_read_error();
if (pos == 0)
- pos = vfs_llseek(file, *_start, SEEK_HOLE);
+ pos = vfs_llseek(file, start, SEEK_HOLE);
if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
trace_cachefiles_io_error(object, file_inode(file), pos,
cachefiles_trace_seek_error);
return pos;
}
- if ((u64)pos >= (u64)*_start + *_len)
+ if ((u64)pos >= (u64)start + *_len)
return 0; /* Fully allocated */
/* Partially allocated, but insufficient space: cull. */
@@ -574,7 +582,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
ret = cachefiles_inject_remove_error();
if (ret == 0)
ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- *_start, *_len);
+ start, *_len);
if (ret < 0) {
trace_cachefiles_io_error(object, file_inode(file), ret,
cachefiles_trace_fallocate_error);
@@ -591,8 +599,8 @@ check_space:
}
static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
- loff_t *_start, size_t *_len, loff_t i_size,
- bool no_space_allocated_yet)
+ loff_t *_start, size_t *_len, size_t upper_len,
+ loff_t i_size, bool no_space_allocated_yet)
{
struct cachefiles_object *object = cachefiles_cres_object(cres);
struct cachefiles_cache *cache = object->volume->cache;
@@ -608,7 +616,7 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
cachefiles_begin_secure(cache, &saved_cred);
ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
- _start, _len,
+ _start, _len, upper_len,
no_space_allocated_yet);
cachefiles_end_secure(cache, saved_cred);
return ret;
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index b8fbbb1961bb..5fd74ec60bef 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -50,7 +50,7 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
return -ENOBUFS;
cachefiles_begin_secure(cache, &saved_cred);
- ret = __cachefiles_prepare_write(object, file, &pos, &len, true);
+ ret = __cachefiles_prepare_write(object, file, &pos, &len, len, true);
cachefiles_end_secure(cache, saved_cred);
if (ret < 0)
return ret;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 13af429ab030..500a87b68a9a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -159,27 +159,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset,
ceph_put_snap_context(snapc);
}
- folio_wait_fscache(folio);
-}
-
-static bool ceph_release_folio(struct folio *folio, gfp_t gfp)
-{
- struct inode *inode = folio->mapping->host;
- struct ceph_client *cl = ceph_inode_to_client(inode);
-
- doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode),
- folio->index, folio_test_dirty(folio) ? "" : "not ");
-
- if (folio_test_private(folio))
- return false;
-
- if (folio_test_fscache(folio)) {
- if (current_is_kswapd() || !(gfp & __GFP_FS))
- return false;
- folio_wait_fscache(folio);
- }
- ceph_fscache_note_page_release(inode);
- return true;
+ netfs_invalidate_folio(folio, offset, length);
}
static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
@@ -509,7 +489,6 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
const struct netfs_request_ops ceph_netfs_ops = {
.init_request = ceph_init_request,
.free_request = ceph_netfs_free_request,
- .begin_cache_operation = ceph_begin_cache_operation,
.issue_read = ceph_netfs_issue_read,
.expand_readahead = ceph_netfs_expand_readahead,
.clamp_length = ceph_netfs_clamp_length,
@@ -1586,7 +1565,7 @@ const struct address_space_operations ceph_aops = {
.write_end = ceph_write_end,
.dirty_folio = ceph_dirty_folio,
.invalidate_folio = ceph_invalidate_folio,
- .release_folio = ceph_release_folio,
+ .release_folio = netfs_release_folio,
.direct_IO = noop_direct_IO,
};
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index dc502daac49a..20efac020394 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -43,38 +43,19 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to)
}
}
-static inline void ceph_fscache_unpin_writeback(struct inode *inode,
+static inline int ceph_fscache_unpin_writeback(struct inode *inode,
struct writeback_control *wbc)
{
- fscache_unpin_writeback(wbc, ceph_fscache_cookie(ceph_inode(inode)));
+ return netfs_unpin_writeback(inode, wbc);
}
-static inline int ceph_fscache_dirty_folio(struct address_space *mapping,
- struct folio *folio)
-{
- struct ceph_inode_info *ci = ceph_inode(mapping->host);
-
- return fscache_dirty_folio(mapping, folio, ceph_fscache_cookie(ci));
-}
-
-static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq)
-{
- struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode));
-
- return fscache_begin_read_operation(&rreq->cache_resources, cookie);
-}
+#define ceph_fscache_dirty_folio netfs_dirty_folio
static inline bool ceph_is_cache_enabled(struct inode *inode)
{
return fscache_cookie_enabled(ceph_fscache_cookie(ceph_inode(inode)));
}
-static inline void ceph_fscache_note_page_release(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
-
- fscache_note_page_release(ceph_fscache_cookie(ci));
-}
#else /* CONFIG_CEPH_FSCACHE */
static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc,
struct fs_context *fc)
@@ -119,30 +100,18 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to)
{
}
-static inline void ceph_fscache_unpin_writeback(struct inode *inode,
- struct writeback_control *wbc)
+static inline int ceph_fscache_unpin_writeback(struct inode *inode,
+ struct writeback_control *wbc)
{
+ return 0;
}
-static inline int ceph_fscache_dirty_folio(struct address_space *mapping,
- struct folio *folio)
-{
- return filemap_dirty_folio(mapping, folio);
-}
+#define ceph_fscache_dirty_folio filemap_dirty_folio
static inline bool ceph_is_cache_enabled(struct inode *inode)
{
return false;
}
-
-static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq)
-{
- return -ENOBUFS;
-}
-
-static inline void ceph_fscache_note_page_release(struct inode *inode)
-{
-}
#endif /* CONFIG_CEPH_FSCACHE */
#endif
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0679240f06db..0c25d326afc4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -574,7 +574,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
doutc(fsc->client, "%p\n", &ci->netfs.inode);
/* Set parameters for the netfs library */
- netfs_inode_init(&ci->netfs, &ceph_netfs_ops);
+ netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
spin_lock_init(&ci->i_ceph_lock);
@@ -694,7 +694,7 @@ void ceph_evict_inode(struct inode *inode)
percpu_counter_dec(&mdsc->metric.total_inodes);
truncate_inode_pages_final(&inode->i_data);
- if (inode->i_state & I_PINNING_FSCACHE_WB)
+ if (inode->i_state & I_PINNING_NETFS_WB)
ceph_fscache_unuse_cookie(inode, true);
clear_inode(inode);
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 1d318f85232d..fffd3919343e 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -114,8 +114,11 @@ config EROFS_FS_ZIP_DEFLATE
config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support"
- depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)
- default n
+ depends on EROFS_FS
+ select NETFS_SUPPORT
+ select FSCACHE
+ select CACHEFILES
+ select CACHEFILES_ONDEMAND
help
This permits EROFS to use fscache-backed data blobs with on-demand
read support.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1767493dffda..3d84fcc471c6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1675,11 +1675,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
inode->i_state |= I_DIRTY_PAGES;
- else if (unlikely(inode->i_state & I_PINNING_FSCACHE_WB)) {
+ else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) {
if (!(inode->i_state & I_DIRTY_PAGES)) {
- inode->i_state &= ~I_PINNING_FSCACHE_WB;
- wbc->unpinned_fscache_wb = true;
- dirty |= I_PINNING_FSCACHE_WB; /* Cause write_inode */
+ inode->i_state &= ~I_PINNING_NETFS_WB;
+ wbc->unpinned_netfs_wb = true;
+ dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */
}
}
@@ -1691,7 +1691,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
if (ret == 0)
ret = err;
}
- wbc->unpinned_fscache_wb = false;
+ wbc->unpinned_netfs_wb = false;
trace_writeback_single_inode(inode, wbc, nr_to_write);
return ret;
}
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
deleted file mode 100644
index b313a978ae0a..000000000000
--- a/fs/fscache/Kconfig
+++ /dev/null
@@ -1,40 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config FSCACHE
- tristate "General filesystem local caching manager"
- select NETFS_SUPPORT
- help
- This option enables a generic filesystem caching manager that can be
- used by various network and other filesystems to cache data locally.
- Different sorts of caches can be plugged in, depending on the
- resources available.
-
- See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_STATS
- bool "Gather statistical information on local caching"
- depends on FSCACHE && PROC_FS
- select NETFS_STATS
- help
- This option causes statistical information to be gathered on local
- caching and exported through file:
-
- /proc/fs/fscache/stats
-
- The gathering of statistics adds a certain amount of overhead to
- execution as there are a quite a few stats gathered, and on a
- multi-CPU system these may be on cachelines that keep bouncing
- between CPUs. On the other hand, the stats are very useful for
- debugging purposes. Saying 'Y' here is recommended.
-
- See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_DEBUG
- bool "Debug FS-Cache"
- depends on FSCACHE
- help
- This permits debugging to be dynamically enabled in the local caching
- management module. If this is set, the debugging output may be
- enabled by setting bits in /sys/modules/fscache/parameter/debug.
-
- See Documentation/filesystems/caching/fscache.rst for more information.
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
deleted file mode 100644
index afb090ea16c4..000000000000
--- a/fs/fscache/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for general filesystem caching code
-#
-
-fscache-y := \
- cache.o \
- cookie.o \
- io.o \
- main.o \
- volume.o
-
-fscache-$(CONFIG_PROC_FS) += proc.o
-fscache-$(CONFIG_FSCACHE_STATS) += stats.o
-
-obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
deleted file mode 100644
index 1336f517e9b1..000000000000
--- a/fs/fscache/internal.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Internal definitions for FS-Cache
- *
- * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-
-#define pr_fmt(fmt) "FS-Cache: " fmt
-
-#include <linux/slab.h>
-#include <linux/fscache-cache.h>
-#include <trace/events/fscache.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-
-/*
- * cache.c
- */
-#ifdef CONFIG_PROC_FS
-extern const struct seq_operations fscache_caches_seq_ops;
-#endif
-bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
-void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
-struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache);
-void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where);
-
-static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache)
-{
- return smp_load_acquire(&cache->state);
-}
-
-static inline bool fscache_cache_is_live(const struct fscache_cache *cache)
-{
- return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE;
-}
-
-static inline void fscache_set_cache_state(struct fscache_cache *cache,
- enum fscache_cache_state new_state)
-{
- smp_store_release(&cache->state, new_state);
-
-}
-
-static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
- enum fscache_cache_state old_state,
- enum fscache_cache_state new_state)
-{
- return try_cmpxchg_release(&cache->state, &old_state, new_state);
-}
-
-/*
- * cookie.c
- */
-extern struct kmem_cache *fscache_cookie_jar;
-#ifdef CONFIG_PROC_FS
-extern const struct seq_operations fscache_cookies_seq_ops;
-#endif
-extern struct timer_list fscache_cookie_lru_timer;
-
-extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
-extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
- enum fscache_access_trace why);
-
-static inline void fscache_see_cookie(struct fscache_cookie *cookie,
- enum fscache_cookie_trace where)
-{
- trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref),
- where);
-}
-
-/*
- * main.c
- */
-extern unsigned fscache_debug;
-
-extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len);
-
-/*
- * proc.c
- */
-#ifdef CONFIG_PROC_FS
-extern int __init fscache_proc_init(void);
-extern void fscache_proc_cleanup(void);
-#else
-#define fscache_proc_init() (0)
-#define fscache_proc_cleanup() do {} while (0)
-#endif
-
-/*
- * stats.c
- */
-#ifdef CONFIG_FSCACHE_STATS
-extern atomic_t fscache_n_volumes;
-extern atomic_t fscache_n_volumes_collision;
-extern atomic_t fscache_n_volumes_nomem;
-extern atomic_t fscache_n_cookies;
-extern atomic_t fscache_n_cookies_lru;
-extern atomic_t fscache_n_cookies_lru_expired;
-extern atomic_t fscache_n_cookies_lru_removed;
-extern atomic_t fscache_n_cookies_lru_dropped;
-
-extern atomic_t fscache_n_acquires;
-extern atomic_t fscache_n_acquires_ok;
-extern atomic_t fscache_n_acquires_oom;
-
-extern atomic_t fscache_n_invalidates;
-
-extern atomic_t fscache_n_relinquishes;
-extern atomic_t fscache_n_relinquishes_retire;
-extern atomic_t fscache_n_relinquishes_dropped;
-
-extern atomic_t fscache_n_resizes;
-extern atomic_t fscache_n_resizes_null;
-
-static inline void fscache_stat(atomic_t *stat)
-{
- atomic_inc(stat);
-}
-
-static inline void fscache_stat_d(atomic_t *stat)
-{
- atomic_dec(stat);
-}
-
-#define __fscache_stat(stat) (stat)
-
-int fscache_stats_show(struct seq_file *m, void *v);
-#else
-
-#define __fscache_stat(stat) (NULL)
-#define fscache_stat(stat) do {} while (0)
-#define fscache_stat_d(stat) do {} while (0)
-#endif
-
-/*
- * volume.c
- */
-#ifdef CONFIG_PROC_FS
-extern const struct seq_operations fscache_volumes_seq_ops;
-#endif
-
-struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
- enum fscache_volume_trace where);
-void fscache_put_volume(struct fscache_volume *volume,
- enum fscache_volume_trace where);
-bool fscache_begin_volume_access(struct fscache_volume *volume,
- struct fscache_cookie *cookie,
- enum fscache_access_trace why);
-void fscache_create_volume(struct fscache_volume *volume, bool wait);
-
-
-/*****************************************************************************/
-/*
- * debug tracing
- */
-#define dbgprintk(FMT, ...) \
- printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
-
-#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
-
-#define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-
-#ifdef __KDEBUG
-#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
-#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
-#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
-
-#elif defined(CONFIG_FSCACHE_DEBUG)
-#define _enter(FMT, ...) \
-do { \
- if (__do_kdebug(ENTER)) \
- kenter(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _leave(FMT, ...) \
-do { \
- if (__do_kdebug(LEAVE)) \
- kleave(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#define _debug(FMT, ...) \
-do { \
- if (__do_kdebug(DEBUG)) \
- kdebug(FMT, ##__VA_ARGS__); \
-} while (0)
-
-#else
-#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-#endif
-
-/*
- * determine whether a particular optional debugging point should be logged
- * - we need to go through three steps to persuade cpp to correctly join the
- * shorthand in FSCACHE_DEBUG_LEVEL with its prefix
- */
-#define ____do_kdebug(LEVEL, POINT) \
- unlikely((fscache_debug & \
- (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3))))
-#define ___do_kdebug(LEVEL, POINT) \
- ____do_kdebug(LEVEL, POINT)
-#define __do_kdebug(POINT) \
- ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT)
-
-#define FSCACHE_DEBUG_CACHE 0
-#define FSCACHE_DEBUG_COOKIE 1
-#define FSCACHE_DEBUG_OBJECT 2
-#define FSCACHE_DEBUG_OPERATION 3
-
-#define FSCACHE_POINT_ENTER 1
-#define FSCACHE_POINT_LEAVE 2
-#define FSCACHE_POINT_DEBUG 4
-
-#ifndef FSCACHE_DEBUG_LEVEL
-#define FSCACHE_DEBUG_LEVEL CACHE
-#endif
-
-/*
- * assertions
- */
-#if 1 /* defined(__KDEBUGALL) */
-
-#define ASSERT(X) \
-do { \
- if (unlikely(!(X))) { \
- pr_err("\n"); \
- pr_err("Assertion failed\n"); \
- BUG(); \
- } \
-} while (0)
-
-#define ASSERTCMP(X, OP, Y) \
-do { \
- if (unlikely(!((X) OP (Y)))) { \
- pr_err("\n"); \
- pr_err("Assertion failed\n"); \
- pr_err("%lx " #OP " %lx is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
- BUG(); \
- } \
-} while (0)
-
-#define ASSERTIF(C, X) \
-do { \
- if (unlikely((C) && !(X))) { \
- pr_err("\n"); \
- pr_err("Assertion failed\n"); \
- BUG(); \
- } \
-} while (0)
-
-#define ASSERTIFCMP(C, X, OP, Y) \
-do { \
- if (unlikely((C) && !((X) OP (Y)))) { \
- pr_err("\n"); \
- pr_err("Assertion failed\n"); \
- pr_err("%lx " #OP " %lx is false\n", \
- (unsigned long)(X), (unsigned long)(Y)); \
- BUG(); \
- } \
-} while (0)
-
-#else
-
-#define ASSERT(X) do {} while (0)
-#define ASSERTCMP(X, OP, Y) do {} while (0)
-#define ASSERTIF(C, X) do {} while (0)
-#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
-
-#endif /* assert or not */
diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index b4db21022cb4..bec805e0c44c 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -21,3 +21,42 @@ config NETFS_STATS
multi-CPU system these may be on cachelines that keep bouncing
between CPUs. On the other hand, the stats are very useful for
debugging purposes. Saying 'Y' here is recommended.
+
+config FSCACHE
+ bool "General filesystem local caching manager"
+ depends on NETFS_SUPPORT
+ help
+ This option enables a generic filesystem caching manager that can be
+ used by various network and other filesystems to cache data locally.
+ Different sorts of caches can be plugged in, depending on the
+ resources available.
+
+ See Documentation/filesystems/caching/fscache.rst for more information.
+
+config FSCACHE_STATS
+ bool "Gather statistical information on local caching"
+ depends on FSCACHE && PROC_FS
+ select NETFS_STATS
+ help
+ This option causes statistical information to be gathered on local
+ caching and exported through file:
+
+ /proc/fs/fscache/stats
+
+ The gathering of statistics adds a certain amount of overhead to
+ execution as there are a quite a few stats gathered, and on a
+ multi-CPU system these may be on cachelines that keep bouncing
+ between CPUs. On the other hand, the stats are very useful for
+ debugging purposes. Saying 'Y' here is recommended.
+
+ See Documentation/filesystems/caching/fscache.rst for more information.
+
+config FSCACHE_DEBUG
+ bool "Debug FS-Cache"
+ depends on FSCACHE
+ help
+ This permits debugging to be dynamically enabled in the local caching
+ management module. If this is set, the debugging output may be
+ enabled by setting bits in /sys/modules/fscache/parameter/debug.
+
+ See Documentation/filesystems/caching/fscache.rst for more information.
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index 386d6fb92793..d4d1d799819e 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -2,11 +2,29 @@
netfs-y := \
buffered_read.o \
+ buffered_write.o \
+ direct_read.o \
+ direct_write.o \
io.o \
iterator.o \
+ locking.o \
main.o \
- objects.o
+ misc.o \
+ objects.o \
+ output.o
netfs-$(CONFIG_NETFS_STATS) += stats.o
-obj-$(CONFIG_NETFS_SUPPORT) := netfs.o
+netfs-$(CONFIG_FSCACHE) += \
+ fscache_cache.o \
+ fscache_cookie.o \
+ fscache_io.o \
+ fscache_main.o \
+ fscache_volume.o
+
+ifeq ($(CONFIG_PROC_FS),y)
+netfs-$(CONFIG_FSCACHE) += fscache_proc.o
+endif
+netfs-$(CONFIG_FSCACHE_STATS) += fscache_stats.o
+
+obj-$(CONFIG_NETFS_SUPPORT) += netfs.o
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 2cd3ccf4c439..a59e7b2edaac 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -16,6 +16,7 @@
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
+ struct netfs_folio *finfo;
struct folio *folio;
pgoff_t start_page = rreq->start / PAGE_SIZE;
pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
@@ -63,6 +64,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
break;
}
if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_start_fscache(folio);
folio_started = true;
}
@@ -86,6 +88,15 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!pg_failed) {
flush_dcache_folio(folio);
+ finfo = netfs_folio_info(folio);
+ if (finfo) {
+ trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
+ if (finfo->netfs_group)
+ folio_change_private(folio, finfo->netfs_group);
+ else
+ folio_detach_private(folio);
+ kfree(finfo);
+ }
folio_mark_uptodate(folio);
}
@@ -147,6 +158,15 @@ static void netfs_rreq_expand(struct netfs_io_request *rreq,
}
}
+/*
+ * Begin an operation, and fetch the stored zero point value from the cookie if
+ * available.
+ */
+static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
+{
+ return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
+}
+
/**
* netfs_readahead - Helper to manage a read request
* @ractl: The description of the readahead request
@@ -180,11 +200,9 @@ void netfs_readahead(struct readahead_control *ractl)
if (IS_ERR(rreq))
return;
- if (ctx->ops->begin_cache_operation) {
- ret = ctx->ops->begin_cache_operation(rreq);
- if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
- goto cleanup_free;
- }
+ ret = netfs_begin_cache_read(rreq, ctx);
+ if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+ goto cleanup_free;
netfs_stat(&netfs_n_rh_readahead);
trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
@@ -192,6 +210,10 @@ void netfs_readahead(struct readahead_control *ractl)
netfs_rreq_expand(rreq, ractl);
+ /* Set up the output buffer */
+ iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages,
+ rreq->start, rreq->len);
+
/* Drop the refs on the folios here rather than in the cache or
* filesystem. The locks will be dropped in netfs_rreq_unlock().
*/
@@ -199,6 +221,7 @@ void netfs_readahead(struct readahead_control *ractl)
;
netfs_begin_read(rreq, false);
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
return;
cleanup_free:
@@ -226,6 +249,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct address_space *mapping = folio_file_mapping(folio);
struct netfs_io_request *rreq;
struct netfs_inode *ctx = netfs_inode(mapping->host);
+ struct folio *sink = NULL;
int ret;
_enter("%lx", folio_index(folio));
@@ -238,15 +262,64 @@ int netfs_read_folio(struct file *file, struct folio *folio)
goto alloc_error;
}
- if (ctx->ops->begin_cache_operation) {
- ret = ctx->ops->begin_cache_operation(rreq);
- if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
- goto discard;
- }
+ ret = netfs_begin_cache_read(rreq, ctx);
+ if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+ goto discard;
netfs_stat(&netfs_n_rh_readpage);
trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
- return netfs_begin_read(rreq, true);
+
+ /* Set up the output buffer */
+ if (folio_test_dirty(folio)) {
+ /* Handle someone trying to read from an unflushed streaming
+ * write. We fiddle the buffer so that a gap at the beginning
+ * and/or a gap at the end get copied to, but the middle is
+ * discarded.
+ */
+ struct netfs_folio *finfo = netfs_folio_info(folio);
+ struct bio_vec *bvec;
+ unsigned int from = finfo->dirty_offset;
+ unsigned int to = from + finfo->dirty_len;
+ unsigned int off = 0, i = 0;
+ size_t flen = folio_size(folio);
+ size_t nr_bvec = flen / PAGE_SIZE + 2;
+ size_t part;
+
+ ret = -ENOMEM;
+ bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
+ if (!bvec)
+ goto discard;
+
+ sink = folio_alloc(GFP_KERNEL, 0);
+ if (!sink)
+ goto discard;
+
+ trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
+
+ rreq->direct_bv = bvec;
+ rreq->direct_bv_count = nr_bvec;
+ if (from > 0) {
+ bvec_set_folio(&bvec[i++], folio, from, 0);
+ off = from;
+ }
+ while (off < to) {
+ part = min_t(size_t, to - off, PAGE_SIZE);
+ bvec_set_folio(&bvec[i++], sink, part, 0);
+ off += part;
+ }
+ if (to < flen)
+ bvec_set_folio(&bvec[i++], folio, flen - to, to);
+ iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
+ } else {
+ iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+ rreq->start, rreq->len);
+ }
+
+ ret = netfs_begin_read(rreq, true);
+ if (sink)
+ folio_put(sink);
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ return ret < 0 ? ret : 0;
discard:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
@@ -390,11 +463,9 @@ retry:
rreq->no_unlock_folio = folio_index(folio);
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
- if (ctx->ops->begin_cache_operation) {
- ret = ctx->ops->begin_cache_operation(rreq);
- if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
- goto error_put;
- }
+ ret = netfs_begin_cache_read(rreq, ctx);
+ if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+ goto error_put;
netfs_stat(&netfs_n_rh_write_begin);
trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
@@ -405,6 +476,10 @@ retry:
ractl._nr_pages = folio_nr_pages(folio);
netfs_rreq_expand(rreq, &ractl);
+ /* Set up the output buffer */
+ iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+ rreq->start, rreq->len);
+
/* We hold the folio locks, so we can drop the references */
folio_get(folio);
while (readahead_folio(&ractl))
@@ -413,6 +488,7 @@ retry:
ret = netfs_begin_read(rreq, true);
if (ret < 0)
goto error;
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
have_folio:
ret = folio_wait_fscache_killable(folio);
@@ -434,3 +510,124 @@ error:
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
+
+/*
+ * Preload the data into a page we're proposing to write into.
+ */
+int netfs_prefetch_for_write(struct file *file, struct folio *folio,
+ size_t offset, size_t len)
+{
+ struct netfs_io_request *rreq;
+ struct address_space *mapping = folio_file_mapping(folio);
+ struct netfs_inode *ctx = netfs_inode(mapping->host);
+ unsigned long long start = folio_pos(folio);
+ size_t flen = folio_size(folio);
+ int ret;
+
+ _enter("%zx @%llx", flen, start);
+
+ ret = -ENOMEM;
+
+ rreq = netfs_alloc_request(mapping, file, start, flen,
+ NETFS_READ_FOR_WRITE);
+ if (IS_ERR(rreq)) {
+ ret = PTR_ERR(rreq);
+ goto error;
+ }
+
+ rreq->no_unlock_folio = folio_index(folio);
+ __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
+ ret = netfs_begin_cache_read(rreq, ctx);
+ if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+ goto error_put;
+
+ netfs_stat(&netfs_n_rh_write_begin);
+ trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
+
+ /* Set up the output buffer */
+ iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+ rreq->start, rreq->len);
+
+ ret = netfs_begin_read(rreq, true);
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ return ret;
+
+error_put:
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * netfs_buffered_read_iter - Filesystem buffered I/O read routine
+ * @iocb: kernel I/O control block
+ * @iter: destination for the data read
+ *
+ * This is the ->read_iter() routine for all filesystems that can use the page
+ * cache directly.
+ *
+ * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
+ * returned when no data can be read without waiting for I/O requests to
+ * complete; it doesn't prevent readahead.
+ *
+ * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
+ * shall be made for the read or for readahead. When no data can be read,
+ * -EAGAIN shall be returned. When readahead would be triggered, a partial,
+ * possibly empty read shall be returned.
+ *
+ * Return:
+ * * number of bytes copied, even for partial reads
+ * * negative error code (or 0 if IOCB_NOIO) if nothing was read
+ */
+ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct netfs_inode *ictx = netfs_inode(inode);
+ ssize_t ret;
+
+ if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
+ test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
+ return -EINVAL;
+
+ ret = netfs_start_io_read(inode);
+ if (ret == 0) {
+ ret = filemap_read(iocb, iter, 0);
+ netfs_end_io_read(inode);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(netfs_buffered_read_iter);
+
+/**
+ * netfs_file_read_iter - Generic filesystem read routine
+ * @iocb: kernel I/O control block
+ * @iter: destination for the data read
+ *
+ * This is the ->read_iter() routine for all filesystems that can use the page
+ * cache directly.
+ *
+ * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
+ * returned when no data can be read without waiting for I/O requests to
+ * complete; it doesn't prevent readahead.
+ *
+ * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
+ * shall be made for the read or for readahead. When no data can be read,
+ * -EAGAIN shall be returned. When readahead would be triggered, a partial,
+ * possibly empty read shall be returned.
+ *
+ * Return:
+ * * number of bytes copied, even for partial reads
+ * * negative error code (or 0 if IOCB_NOIO) if nothing was read
+ */
+ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
+
+ if ((iocb->ki_flags & IOCB_DIRECT) ||
+ test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
+ return netfs_unbuffered_read_iter(iocb, iter);
+
+ return netfs_buffered_read_iter(iocb, iter);
+}
+EXPORT_SYMBOL(netfs_file_read_iter);
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
new file mode 100644
index 000000000000..93dc76f34e39
--- /dev/null
+++ b/fs/netfs/buffered_write.c
@@ -0,0 +1,1253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem high-level write support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/pagevec.h>
+#include "internal.h"
+
+/*
+ * Determined write method. Adjust netfs_folio_traces if this is changed.
+ */
+enum netfs_how_to_modify {
+ NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */
+ NETFS_JUST_PREFETCH, /* We have to read the folio anyway */
+ NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */
+ NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */
+ NETFS_STREAMING_WRITE, /* Store incomplete data in non-uptodate page. */
+ NETFS_STREAMING_WRITE_CONT, /* Continue streaming write. */
+ NETFS_FLUSH_CONTENT, /* Flush incompatible content. */
+};
+
+static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq);
+
+static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+ if (netfs_group && !folio_get_private(folio))
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+}
+
+#if IS_ENABLED(CONFIG_FSCACHE)
+static void netfs_folio_start_fscache(bool caching, struct folio *folio)
+{
+ if (caching)
+ folio_start_fscache(folio);
+}
+#else
+static void netfs_folio_start_fscache(bool caching, struct folio *folio)
+{
+}
+#endif
+
+/*
+ * Decide how we should modify a folio. We might be attempting to do
+ * write-streaming, in which case we don't want to a local RMW cycle if we can
+ * avoid it. If we're doing local caching or content crypto, we award that
+ * priority over avoiding RMW. If the file is open readably, then we also
+ * assume that we may want to read what we wrote.
+ */
+static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
+ struct file *file,
+ struct folio *folio,
+ void *netfs_group,
+ size_t flen,
+ size_t offset,
+ size_t len,
+ bool maybe_trouble)
+{
+ struct netfs_folio *finfo = netfs_folio_info(folio);
+ loff_t pos = folio_file_pos(folio);
+
+ _enter("");
+
+ if (netfs_folio_group(folio) != netfs_group)
+ return NETFS_FLUSH_CONTENT;
+
+ if (folio_test_uptodate(folio))
+ return NETFS_FOLIO_IS_UPTODATE;
+
+ if (pos >= ctx->zero_point)
+ return NETFS_MODIFY_AND_CLEAR;
+
+ if (!maybe_trouble && offset == 0 && len >= flen)
+ return NETFS_WHOLE_FOLIO_MODIFY;
+
+ if (file->f_mode & FMODE_READ)
+ goto no_write_streaming;
+ if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags))
+ goto no_write_streaming;
+
+ if (netfs_is_cache_enabled(ctx)) {
+ /* We don't want to get a streaming write on a file that loses
+ * caching service temporarily because the backing store got
+ * culled.
+ */
+ if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags))
+ set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags);
+ goto no_write_streaming;
+ }
+
+ if (!finfo)
+ return NETFS_STREAMING_WRITE;
+
+ /* We can continue a streaming write only if it continues on from the
+ * previous. If it overlaps, we must flush lest we suffer a partial
+ * copy and disjoint dirty regions.
+ */
+ if (offset == finfo->dirty_offset + finfo->dirty_len)
+ return NETFS_STREAMING_WRITE_CONT;
+ return NETFS_FLUSH_CONTENT;
+
+no_write_streaming:
+ if (finfo) {
+ netfs_stat(&netfs_n_wh_wstream_conflict);
+ return NETFS_FLUSH_CONTENT;
+ }
+ return NETFS_JUST_PREFETCH;
+}
+
+/*
+ * Grab a folio for writing and lock it. Attempt to allocate as large a folio
+ * as possible to hold as much of the remaining length as possible in one go.
+ */
+static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
+ loff_t pos, size_t part)
+{
+ pgoff_t index = pos / PAGE_SIZE;
+ fgf_t fgp_flags = FGP_WRITEBEGIN;
+
+ if (mapping_large_folio_support(mapping))
+ fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part);
+
+ return __filemap_get_folio(mapping, index, fgp_flags,
+ mapping_gfp_mask(mapping));
+}
+
+/**
+ * netfs_perform_write - Copy data into the pagecache.
+ * @iocb: The operation parameters
+ * @iter: The source buffer
+ * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ *
+ * Copy data into pagecache pages attached to the inode specified by @iocb.
+ * The caller must hold appropriate inode locks.
+ *
+ * Dirty pages are tagged with a netfs_folio struct if they're not up to date
+ * to indicate the range modified. Dirty pages may also be tagged with a
+ * netfs-specific grouping such that data from an old group gets flushed before
+ * a new one is started.
+ */
+ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct netfs_group *netfs_group)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = inode->i_mapping;
+ struct netfs_inode *ctx = netfs_inode(inode);
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .for_sync = true,
+ .nr_to_write = LONG_MAX,
+ .range_start = iocb->ki_pos,
+ .range_end = iocb->ki_pos + iter->count,
+ };
+ struct netfs_io_request *wreq = NULL;
+ struct netfs_folio *finfo;
+ struct folio *folio;
+ enum netfs_how_to_modify howto;
+ enum netfs_folio_trace trace;
+ unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
+ ssize_t written = 0, ret;
+ loff_t i_size, pos = iocb->ki_pos, from, to;
+ size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
+ bool maybe_trouble = false;
+
+ if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
+ iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
+ ) {
+ if (pos < i_size_read(inode)) {
+ ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ wbc_attach_fdatawrite_inode(&wbc, mapping->host);
+
+ wreq = netfs_begin_writethrough(iocb, iter->count);
+ if (IS_ERR(wreq)) {
+ wbc_detach_inode(&wbc);
+ ret = PTR_ERR(wreq);
+ wreq = NULL;
+ goto out;
+ }
+ if (!is_sync_kiocb(iocb))
+ wreq->iocb = iocb;
+ wreq->cleanup = netfs_cleanup_buffered_write;
+ }
+
+ do {
+ size_t flen;
+ size_t offset; /* Offset into pagecache folio */
+ size_t part; /* Bytes to write to folio */
+ size_t copied; /* Bytes copied from user */
+
+ ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags);
+ if (unlikely(ret < 0))
+ break;
+
+ offset = pos & (max_chunk - 1);
+ part = min(max_chunk - offset, iov_iter_count(iter));
+
+ /* Bring in the user pages that we will copy from _first_ lest
+ * we hit a nasty deadlock on copying from the same page as
+ * we're writing to, without it being marked uptodate.
+ *
+ * Not only is this an optimisation, but it is also required to
+ * check that the address is actually valid, when atomic
+ * usercopies are used below.
+ *
+ * We rely on the page being held onto long enough by the LRU
+ * that we can grab it below if this causes it to be read.
+ */
+ ret = -EFAULT;
+ if (unlikely(fault_in_iov_iter_readable(iter, part) == part))
+ break;
+
+ ret = -ENOMEM;
+ folio = netfs_grab_folio_for_write(mapping, pos, part);
+ if (!folio)
+ break;
+
+ flen = folio_size(folio);
+ offset = pos & (flen - 1);
+ part = min_t(size_t, flen - offset, part);
+
+ if (signal_pending(current)) {
+ ret = written ? -EINTR : -ERESTARTSYS;
+ goto error_folio_unlock;
+ }
+
+ /* See if we need to prefetch the area we're going to modify.
+ * We need to do this before we get a lock on the folio in case
+ * there's more than one writer competing for the same cache
+ * block.
+ */
+ howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
+ flen, offset, part, maybe_trouble);
+ _debug("howto %u", howto);
+ switch (howto) {
+ case NETFS_JUST_PREFETCH:
+ ret = netfs_prefetch_for_write(file, folio, offset, part);
+ if (ret < 0) {
+ _debug("prefetch = %zd", ret);
+ goto error_folio_unlock;
+ }
+ break;
+ case NETFS_FOLIO_IS_UPTODATE:
+ case NETFS_WHOLE_FOLIO_MODIFY:
+ case NETFS_STREAMING_WRITE_CONT:
+ break;
+ case NETFS_MODIFY_AND_CLEAR:
+ zero_user_segment(&folio->page, 0, offset);
+ break;
+ case NETFS_STREAMING_WRITE:
+ ret = -EIO;
+ if (WARN_ON(folio_get_private(folio)))
+ goto error_folio_unlock;
+ break;
+ case NETFS_FLUSH_CONTENT:
+ trace_netfs_folio(folio, netfs_flush_content);
+ from = folio_pos(folio);
+ to = from + folio_size(folio) - 1;
+ folio_unlock(folio);
+ folio_put(folio);
+ ret = filemap_write_and_wait_range(mapping, from, to);
+ if (ret < 0)
+ goto error_folio_unlock;
+ continue;
+ }
+
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_folio(folio);
+
+ copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
+
+ flush_dcache_folio(folio);
+
+ /* Deal with a (partially) failed copy */
+ if (copied == 0) {
+ ret = -EFAULT;
+ goto error_folio_unlock;
+ }
+
+ trace = (enum netfs_folio_trace)howto;
+ switch (howto) {
+ case NETFS_FOLIO_IS_UPTODATE:
+ case NETFS_JUST_PREFETCH:
+ netfs_set_group(folio, netfs_group);
+ break;
+ case NETFS_MODIFY_AND_CLEAR:
+ zero_user_segment(&folio->page, offset + copied, flen);
+ netfs_set_group(folio, netfs_group);
+ folio_mark_uptodate(folio);
+ break;
+ case NETFS_WHOLE_FOLIO_MODIFY:
+ if (unlikely(copied < part)) {
+ maybe_trouble = true;
+ iov_iter_revert(iter, copied);
+ copied = 0;
+ goto retry;
+ }
+ netfs_set_group(folio, netfs_group);
+ folio_mark_uptodate(folio);
+ break;
+ case NETFS_STREAMING_WRITE:
+ if (offset == 0 && copied == flen) {
+ netfs_set_group(folio, netfs_group);
+ folio_mark_uptodate(folio);
+ trace = netfs_streaming_filled_page;
+ break;
+ }
+ finfo = kzalloc(sizeof(*finfo), GFP_KERNEL);
+ if (!finfo) {
+ iov_iter_revert(iter, copied);
+ ret = -ENOMEM;
+ goto error_folio_unlock;
+ }
+ finfo->netfs_group = netfs_get_group(netfs_group);
+ finfo->dirty_offset = offset;
+ finfo->dirty_len = copied;
+ folio_attach_private(folio, (void *)((unsigned long)finfo |
+ NETFS_FOLIO_INFO));
+ break;
+ case NETFS_STREAMING_WRITE_CONT:
+ finfo = netfs_folio_info(folio);
+ finfo->dirty_len += copied;
+ if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) {
+ if (finfo->netfs_group)
+ folio_change_private(folio, finfo->netfs_group);
+ else
+ folio_detach_private(folio);
+ folio_mark_uptodate(folio);
+ kfree(finfo);
+ trace = netfs_streaming_cont_filled_page;
+ }
+ break;
+ default:
+ WARN(true, "Unexpected modify type %u ix=%lx\n",
+ howto, folio_index(folio));
+ ret = -EIO;
+ goto error_folio_unlock;
+ }
+
+ trace_netfs_folio(folio, trace);
+
+ /* Update the inode size if we moved the EOF marker */
+ i_size = i_size_read(inode);
+ pos += copied;
+ if (pos > i_size) {
+ if (ctx->ops->update_i_size) {
+ ctx->ops->update_i_size(inode, pos);
+ } else {
+ i_size_write(inode, pos);
+#if IS_ENABLED(CONFIG_FSCACHE)
+ fscache_update_cookie(ctx->cache, NULL, &pos);
+#endif
+ }
+ }
+ written += copied;
+
+ if (likely(!wreq)) {
+ folio_mark_dirty(folio);
+ } else {
+ if (folio_test_dirty(folio))
+ /* Sigh. mmap. */
+ folio_clear_dirty_for_io(folio);
+ /* We make multiple writes to the folio... */
+ if (!folio_test_writeback(folio)) {
+ folio_wait_fscache(folio);
+ folio_start_writeback(folio);
+ folio_start_fscache(folio);
+ if (wreq->iter.count == 0)
+ trace_netfs_folio(folio, netfs_folio_trace_wthru);
+ else
+ trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
+ }
+ netfs_advance_writethrough(wreq, copied,
+ offset + copied == flen);
+ }
+ retry:
+ folio_unlock(folio);
+ folio_put(folio);
+ folio = NULL;
+
+ cond_resched();
+ } while (iov_iter_count(iter));
+
+out:
+ if (unlikely(wreq)) {
+ ret = netfs_end_writethrough(wreq, iocb);
+ wbc_detach_inode(&wbc);
+ if (ret == -EIOCBQUEUED)
+ return ret;
+ }
+
+ iocb->ki_pos += written;
+ _leave(" = %zd [%zd]", written, ret);
+ return written ? written : ret;
+
+error_folio_unlock:
+ folio_unlock(folio);
+ folio_put(folio);
+ goto out;
+}
+EXPORT_SYMBOL(netfs_perform_write);
+
+/**
+ * netfs_buffered_write_iter_locked - write data to a file
+ * @iocb: IO state structure (file, offset, etc.)
+ * @from: iov_iter with data to write
+ * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ *
+ * This function does all the work needed for actually writing data to a
+ * file. It does all basic checks, removes SUID from the file, updates
+ * modification times and calls proper subroutines depending on whether we
+ * do direct IO or a standard buffered write.
+ *
+ * The caller must hold appropriate locks around this function and have called
+ * generic_write_checks() already. The caller is also responsible for doing
+ * any necessary syncing afterwards.
+ *
+ * This function does *not* take care of syncing data in case of O_SYNC write.
+ * A caller has to handle it. This is mainly due to the fact that we want to
+ * avoid syncing under i_rwsem.
+ *
+ * Return:
+ * * number of bytes written, even for truncated writes
+ * * negative error code if no data has been written at all
+ */
+ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
+ struct netfs_group *netfs_group)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t ret;
+
+ trace_netfs_write_iter(iocb, from);
+
+ ret = file_remove_privs(file);
+ if (ret)
+ return ret;
+
+ ret = file_update_time(file);
+ if (ret)
+ return ret;
+
+ return netfs_perform_write(iocb, from, netfs_group);
+}
+EXPORT_SYMBOL(netfs_buffered_write_iter_locked);
+
+/**
+ * netfs_file_write_iter - write data to a file
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Perform a write to a file, writing into the pagecache if possible and doing
+ * an unbuffered write instead if not.
+ *
+ * Return:
+ * * Negative error code if no data has been written at all of
+ * vfs_fsync_range() failed for a synchronous write
+ * * Number of bytes written, even for truncated writes
+ */
+ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct netfs_inode *ictx = netfs_inode(inode);
+ ssize_t ret;
+
+ _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+
+ if ((iocb->ki_flags & IOCB_DIRECT) ||
+ test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
+ return netfs_unbuffered_write_iter(iocb, from);
+
+ ret = netfs_start_io_write(inode);
+ if (ret < 0)
+ return ret;
+
+ ret = generic_write_checks(iocb, from);
+ if (ret > 0)
+ ret = netfs_buffered_write_iter_locked(iocb, from, NULL);
+ netfs_end_io_write(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_file_write_iter);
+
+/*
+ * Notification that a previously read-only page is about to become writable.
+ * Note that the caller indicates a single page of a multipage folio.
+ */
+vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
+{
+ struct folio *folio = page_folio(vmf->page);
+ struct file *file = vmf->vma->vm_file;
+ struct inode *inode = file_inode(file);
+ vm_fault_t ret = VM_FAULT_RETRY;
+ int err;
+
+ _enter("%lx", folio->index);
+
+ sb_start_pagefault(inode->i_sb);
+
+ if (folio_wait_writeback_killable(folio))
+ goto out;
+
+ if (folio_lock_killable(folio) < 0)
+ goto out;
+
+ /* Can we see a streaming write here? */
+ if (WARN_ON(!folio_test_uptodate(folio))) {
+ ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED;
+ goto out;
+ }
+
+ if (netfs_folio_group(folio) != netfs_group) {
+ folio_unlock(folio);
+ err = filemap_fdatawait_range(inode->i_mapping,
+ folio_pos(folio),
+ folio_pos(folio) + folio_size(folio));
+ switch (err) {
+ case 0:
+ ret = VM_FAULT_RETRY;
+ goto out;
+ case -ENOMEM:
+ ret = VM_FAULT_OOM;
+ goto out;
+ default:
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ }
+
+ if (folio_test_dirty(folio))
+ trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus);
+ else
+ trace_netfs_folio(folio, netfs_folio_trace_mkwrite);
+ netfs_set_group(folio, netfs_group);
+ file_update_time(file);
+ ret = VM_FAULT_LOCKED;
+out:
+ sb_end_pagefault(inode->i_sb);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_page_mkwrite);
+
+/*
+ * Kill all the pages in the given range
+ */
+static void netfs_kill_pages(struct address_space *mapping,
+ loff_t start, loff_t len)
+{
+ struct folio *folio;
+ pgoff_t index = start / PAGE_SIZE;
+ pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
+
+ _enter("%llx-%llx", start, start + len - 1);
+
+ do {
+ _debug("kill %lx (to %lx)", index, last);
+
+ folio = filemap_get_folio(mapping, index);
+ if (IS_ERR(folio)) {
+ next = index + 1;
+ continue;
+ }
+
+ next = folio_next_index(folio);
+
+ trace_netfs_folio(folio, netfs_folio_trace_kill);
+ folio_clear_uptodate(folio);
+ if (folio_test_fscache(folio))
+ folio_end_fscache(folio);
+ folio_end_writeback(folio);
+ folio_lock(folio);
+ generic_error_remove_folio(mapping, folio);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ } while (index = next, index <= last);
+
+ _leave("");
+}
+
+/*
+ * Redirty all the pages in a given range.
+ */
+static void netfs_redirty_pages(struct address_space *mapping,
+ loff_t start, loff_t len)
+{
+ struct folio *folio;
+ pgoff_t index = start / PAGE_SIZE;
+ pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
+
+ _enter("%llx-%llx", start, start + len - 1);
+
+ do {
+ _debug("redirty %llx @%llx", len, start);
+
+ folio = filemap_get_folio(mapping, index);
+ if (IS_ERR(folio)) {
+ next = index + 1;
+ continue;
+ }
+
+ next = folio_next_index(folio);
+ trace_netfs_folio(folio, netfs_folio_trace_redirty);
+ filemap_dirty_folio(mapping, folio);
+ if (folio_test_fscache(folio))
+ folio_end_fscache(folio);
+ folio_end_writeback(folio);
+ folio_put(folio);
+ } while (index = next, index <= last);
+
+ balance_dirty_pages_ratelimited(mapping);
+
+ _leave("");
+}
+
+/*
+ * Completion of write to server
+ */
+static void netfs_pages_written_back(struct netfs_io_request *wreq)
+{
+ struct address_space *mapping = wreq->mapping;
+ struct netfs_folio *finfo;
+ struct netfs_group *group = NULL;
+ struct folio *folio;
+ pgoff_t last;
+ int gcount = 0;
+
+ XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE);
+
+ _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+
+ rcu_read_lock();
+
+ last = (wreq->start + wreq->len - 1) / PAGE_SIZE;
+ xas_for_each(&xas, folio, last) {
+ WARN(!folio_test_writeback(folio),
+ "bad %zx @%llx page %lx %lx\n",
+ wreq->len, wreq->start, folio_index(folio), last);
+
+ if ((finfo = netfs_folio_info(folio))) {
+ /* Streaming writes cannot be redirtied whilst under
+ * writeback, so discard the streaming record.
+ */
+ folio_detach_private(folio);
+ group = finfo->netfs_group;
+ gcount++;
+ trace_netfs_folio(folio, netfs_folio_trace_clear_s);
+ kfree(finfo);
+ } else if ((group = netfs_folio_group(folio))) {
+ /* Need to detach the group pointer if the page didn't
+ * get redirtied. If it has been redirtied, then it
+ * must be within the same group.
+ */
+ if (folio_test_dirty(folio)) {
+ trace_netfs_folio(folio, netfs_folio_trace_redirtied);
+ goto end_wb;
+ }
+ if (folio_trylock(folio)) {
+ if (!folio_test_dirty(folio)) {
+ folio_detach_private(folio);
+ gcount++;
+ trace_netfs_folio(folio, netfs_folio_trace_clear_g);
+ } else {
+ trace_netfs_folio(folio, netfs_folio_trace_redirtied);
+ }
+ folio_unlock(folio);
+ goto end_wb;
+ }
+
+ xas_pause(&xas);
+ rcu_read_unlock();
+ folio_lock(folio);
+ if (!folio_test_dirty(folio)) {
+ folio_detach_private(folio);
+ gcount++;
+ trace_netfs_folio(folio, netfs_folio_trace_clear_g);
+ } else {
+ trace_netfs_folio(folio, netfs_folio_trace_redirtied);
+ }
+ folio_unlock(folio);
+ rcu_read_lock();
+ } else {
+ trace_netfs_folio(folio, netfs_folio_trace_clear);
+ }
+ end_wb:
+ if (folio_test_fscache(folio))
+ folio_end_fscache(folio);
+ xas_advance(&xas, folio_next_index(folio) - 1);
+ folio_end_writeback(folio);
+ }
+
+ rcu_read_unlock();
+ netfs_put_group_many(group, gcount);
+ _leave("");
+}
+
+/*
+ * Deal with the disposition of the folios that are under writeback to close
+ * out the operation.
+ */
+static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq)
+{
+ struct address_space *mapping = wreq->mapping;
+
+ _enter("");
+
+ switch (wreq->error) {
+ case 0:
+ netfs_pages_written_back(wreq);
+ break;
+
+ default:
+ pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error);
+ fallthrough;
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ case -ENETRESET:
+ case -EDQUOT:
+ case -ENOSPC:
+ netfs_redirty_pages(mapping, wreq->start, wreq->len);
+ break;
+
+ case -EROFS:
+ case -EIO:
+ case -EREMOTEIO:
+ case -EFBIG:
+ case -ENOENT:
+ case -ENOMEDIUM:
+ case -ENXIO:
+ netfs_kill_pages(mapping, wreq->start, wreq->len);
+ break;
+ }
+
+ if (wreq->error)
+ mapping_set_error(mapping, wreq->error);
+ if (wreq->netfs_ops->done)
+ wreq->netfs_ops->done(wreq);
+}
+
+/*
+ * Extend the region to be written back to include subsequent contiguously
+ * dirty pages if possible, but don't sleep while doing so.
+ *
+ * If this page holds new content, then we can include filler zeros in the
+ * writeback.
+ */
+static void netfs_extend_writeback(struct address_space *mapping,
+ struct netfs_group *group,
+ struct xa_state *xas,
+ long *_count,
+ loff_t start,
+ loff_t max_len,
+ bool caching,
+ size_t *_len,
+ size_t *_top)
+{
+ struct netfs_folio *finfo;
+ struct folio_batch fbatch;
+ struct folio *folio;
+ unsigned int i;
+ pgoff_t index = (start + *_len) / PAGE_SIZE;
+ size_t len;
+ void *priv;
+ bool stop = true;
+
+ folio_batch_init(&fbatch);
+
+ do {
+ /* Firstly, we gather up a batch of contiguous dirty pages
+ * under the RCU read lock - but we can't clear the dirty flags
+ * there if any of those pages are mapped.
+ */
+ rcu_read_lock();
+
+ xas_for_each(xas, folio, ULONG_MAX) {
+ stop = true;
+ if (xas_retry(xas, folio))
+ continue;
+ if (xa_is_value(folio))
+ break;
+ if (folio_index(folio) != index) {
+ xas_reset(xas);
+ break;
+ }
+
+ if (!folio_try_get_rcu(folio)) {
+ xas_reset(xas);
+ continue;
+ }
+
+ /* Has the folio moved or been split? */
+ if (unlikely(folio != xas_reload(xas))) {
+ folio_put(folio);
+ xas_reset(xas);
+ break;
+ }
+
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ xas_reset(xas);
+ break;
+ }
+ if (!folio_test_dirty(folio) ||
+ folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ xas_reset(xas);
+ break;
+ }
+
+ stop = false;
+ len = folio_size(folio);
+ priv = folio_get_private(folio);
+ if ((const struct netfs_group *)priv != group) {
+ stop = true;
+ finfo = netfs_folio_info(folio);
+ if (finfo->netfs_group != group ||
+ finfo->dirty_offset > 0) {
+ folio_unlock(folio);
+ folio_put(folio);
+ xas_reset(xas);
+ break;
+ }
+ len = finfo->dirty_len;
+ }
+
+ *_top += folio_size(folio);
+ index += folio_nr_pages(folio);
+ *_count -= folio_nr_pages(folio);
+ *_len += len;
+ if (*_len >= max_len || *_count <= 0)
+ stop = true;
+
+ if (!folio_batch_add(&fbatch, folio))
+ break;
+ if (stop)
+ break;
+ }
+
+ xas_pause(xas);
+ rcu_read_unlock();
+
+ /* Now, if we obtained any folios, we can shift them to being
+ * writable and mark them for caching.
+ */
+ if (!folio_batch_count(&fbatch))
+ break;
+
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ folio = fbatch.folios[i];
+ trace_netfs_folio(folio, netfs_folio_trace_store_plus);
+
+ if (!folio_clear_dirty_for_io(folio))
+ BUG();
+ folio_start_writeback(folio);
+ netfs_folio_start_fscache(caching, folio);
+ folio_unlock(folio);
+ }
+
+ folio_batch_release(&fbatch);
+ cond_resched();
+ } while (!stop);
+}
+
+/*
+ * Synchronously write back the locked page and any subsequent non-locked dirty
+ * pages.
+ */
+static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group,
+ struct xa_state *xas,
+ struct folio *folio,
+ unsigned long long start,
+ unsigned long long end)
+{
+ struct netfs_io_request *wreq;
+ struct netfs_folio *finfo;
+ struct netfs_inode *ctx = netfs_inode(mapping->host);
+ unsigned long long i_size = i_size_read(&ctx->inode);
+ size_t len, max_len;
+ bool caching = netfs_is_cache_enabled(ctx);
+ long count = wbc->nr_to_write;
+ int ret;
+
+ _enter(",%lx,%llx-%llx,%u", folio_index(folio), start, end, caching);
+
+ wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio),
+ NETFS_WRITEBACK);
+ if (IS_ERR(wreq)) {
+ folio_unlock(folio);
+ return PTR_ERR(wreq);
+ }
+
+ if (!folio_clear_dirty_for_io(folio))
+ BUG();
+ folio_start_writeback(folio);
+ netfs_folio_start_fscache(caching, folio);
+
+ count -= folio_nr_pages(folio);
+
+ /* Find all consecutive lockable dirty pages that have contiguous
+ * written regions, stopping when we find a page that is not
+ * immediately lockable, is not dirty or is missing, or we reach the
+ * end of the range.
+ */
+ trace_netfs_folio(folio, netfs_folio_trace_store);
+
+ len = wreq->len;
+ finfo = netfs_folio_info(folio);
+ if (finfo) {
+ start += finfo->dirty_offset;
+ if (finfo->dirty_offset + finfo->dirty_len != len) {
+ len = finfo->dirty_len;
+ goto cant_expand;
+ }
+ len = finfo->dirty_len;
+ }
+
+ if (start < i_size) {
+ /* Trim the write to the EOF; the extra data is ignored. Also
+ * put an upper limit on the size of a single storedata op.
+ */
+ max_len = 65536 * 4096;
+ max_len = min_t(unsigned long long, max_len, end - start + 1);
+ max_len = min_t(unsigned long long, max_len, i_size - start);
+
+ if (len < max_len)
+ netfs_extend_writeback(mapping, group, xas, &count, start,
+ max_len, caching, &len, &wreq->upper_len);
+ }
+
+cant_expand:
+ len = min_t(unsigned long long, len, i_size - start);
+
+ /* We now have a contiguous set of dirty pages, each with writeback
+ * set; the first page is still locked at this point, but all the rest
+ * have been unlocked.
+ */
+ folio_unlock(folio);
+ wreq->start = start;
+ wreq->len = len;
+
+ if (start < i_size) {
+ _debug("write back %zx @%llx [%llx]", len, start, i_size);
+
+ /* Speculatively write to the cache. We have to fix this up
+ * later if the store fails.
+ */
+ wreq->cleanup = netfs_cleanup_buffered_write;
+
+ iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start,
+ wreq->upper_len);
+ __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+ ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback);
+ if (ret == 0 || ret == -EIOCBQUEUED)
+ wbc->nr_to_write -= len / PAGE_SIZE;
+ } else {
+ _debug("write discard %zx @%llx [%llx]", len, start, i_size);
+
+ /* The dirty region was entirely beyond the EOF. */
+ fscache_clear_page_bits(mapping, start, len, caching);
+ netfs_pages_written_back(wreq);
+ ret = 0;
+ }
+
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ _leave(" = 1");
+ return 1;
+}
+
+/*
+ * Write a region of pages back to the server
+ */
+static ssize_t netfs_writepages_begin(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group,
+ struct xa_state *xas,
+ unsigned long long *_start,
+ unsigned long long end)
+{
+ const struct netfs_folio *finfo;
+ struct folio *folio;
+ unsigned long long start = *_start;
+ ssize_t ret;
+ void *priv;
+ int skips = 0;
+
+ _enter("%llx,%llx,", start, end);
+
+search_again:
+ /* Find the first dirty page in the group. */
+ rcu_read_lock();
+
+ for (;;) {
+ folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
+ if (xas_retry(xas, folio) || xa_is_value(folio))
+ continue;
+ if (!folio)
+ break;
+
+ if (!folio_try_get_rcu(folio)) {
+ xas_reset(xas);
+ continue;
+ }
+
+ if (unlikely(folio != xas_reload(xas))) {
+ folio_put(folio);
+ xas_reset(xas);
+ continue;
+ }
+
+ /* Skip any dirty folio that's not in the group of interest. */
+ priv = folio_get_private(folio);
+ if ((const struct netfs_group *)priv != group) {
+ finfo = netfs_folio_info(folio);
+ if (finfo->netfs_group != group) {
+ folio_put(folio);
+ continue;
+ }
+ }
+
+ xas_pause(xas);
+ break;
+ }
+ rcu_read_unlock();
+ if (!folio)
+ return 0;
+
+ start = folio_pos(folio); /* May regress with THPs */
+
+ _debug("wback %lx", folio_index(folio));
+
+ /* At this point we hold neither the i_pages lock nor the page lock:
+ * the page may be truncated or invalidated (changing page->mapping to
+ * NULL), or even swizzled back from swapper_space to tmpfs file
+ * mapping
+ */
+lock_again:
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ ret = folio_lock_killable(folio);
+ if (ret < 0)
+ return ret;
+ } else {
+ if (!folio_trylock(folio))
+ goto search_again;
+ }
+
+ if (folio->mapping != mapping ||
+ !folio_test_dirty(folio)) {
+ start += folio_size(folio);
+ folio_unlock(folio);
+ goto search_again;
+ }
+
+ if (folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ folio_wait_writeback(folio);
+#ifdef CONFIG_FSCACHE
+ folio_wait_fscache(folio);
+#endif
+ goto lock_again;
+ }
+
+ start += folio_size(folio);
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ if (skips >= 5 || need_resched()) {
+ ret = 0;
+ goto out;
+ }
+ skips++;
+ }
+ goto search_again;
+ }
+
+ ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas,
+ folio, start, end);
+out:
+ if (ret > 0)
+ *_start = start + ret;
+ _leave(" = %zd [%llx]", ret, *_start);
+ return ret;
+}
+
+/*
+ * Write a region of pages back to the server
+ */
+static int netfs_writepages_region(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group,
+ unsigned long long *_start,
+ unsigned long long end)
+{
+ ssize_t ret;
+
+ XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
+
+ do {
+ ret = netfs_writepages_begin(mapping, wbc, group, &xas,
+ _start, end);
+ if (ret > 0 && wbc->nr_to_write > 0)
+ cond_resched();
+ } while (ret > 0 && wbc->nr_to_write > 0);
+
+ return ret > 0 ? 0 : ret;
+}
+
+/*
+ * write some of the pending data back to the server
+ */
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct netfs_group *group = NULL;
+ loff_t start, end;
+ int ret;
+
+ _enter("");
+
+ /* We have to be careful as we can end up racing with setattr()
+ * truncating the pagecache since the caller doesn't take a lock here
+ * to prevent it.
+ */
+
+ if (wbc->range_cyclic && mapping->writeback_index) {
+ start = mapping->writeback_index * PAGE_SIZE;
+ ret = netfs_writepages_region(mapping, wbc, group,
+ &start, LLONG_MAX);
+ if (ret < 0)
+ goto out;
+
+ if (wbc->nr_to_write <= 0) {
+ mapping->writeback_index = start / PAGE_SIZE;
+ goto out;
+ }
+
+ start = 0;
+ end = mapping->writeback_index * PAGE_SIZE;
+ mapping->writeback_index = 0;
+ ret = netfs_writepages_region(mapping, wbc, group, &start, end);
+ if (ret == 0)
+ mapping->writeback_index = start / PAGE_SIZE;
+ } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+ start = 0;
+ ret = netfs_writepages_region(mapping, wbc, group,
+ &start, LLONG_MAX);
+ if (wbc->nr_to_write > 0 && ret == 0)
+ mapping->writeback_index = start / PAGE_SIZE;
+ } else {
+ start = wbc->range_start;
+ ret = netfs_writepages_region(mapping, wbc, group,
+ &start, wbc->range_end);
+ }
+
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_writepages);
+
+/*
+ * Deal with the disposition of a laundered folio.
+ */
+static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq)
+{
+ if (wreq->error) {
+ pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error);
+ mapping_set_error(wreq->mapping, wreq->error);
+ }
+}
+
+/**
+ * netfs_launder_folio - Clean up a dirty folio that's being invalidated
+ * @folio: The folio to clean
+ *
+ * This is called to write back a folio that's being invalidated when an inode
+ * is getting torn down. Ideally, writepages would be used instead.
+ */
+int netfs_launder_folio(struct folio *folio)
+{
+ struct netfs_io_request *wreq;
+ struct address_space *mapping = folio->mapping;
+ struct netfs_folio *finfo = netfs_folio_info(folio);
+ struct netfs_group *group = netfs_folio_group(folio);
+ struct bio_vec bvec;
+ unsigned long long i_size = i_size_read(mapping->host);
+ unsigned long long start = folio_pos(folio);
+ size_t offset = 0, len;
+ int ret = 0;
+
+ if (finfo) {
+ offset = finfo->dirty_offset;
+ start += offset;
+ len = finfo->dirty_len;
+ } else {
+ len = folio_size(folio);
+ }
+ len = min_t(unsigned long long, len, i_size - start);
+
+ wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE);
+ if (IS_ERR(wreq)) {
+ ret = PTR_ERR(wreq);
+ goto out;
+ }
+
+ if (!folio_clear_dirty_for_io(folio))
+ goto out_put;
+
+ trace_netfs_folio(folio, netfs_folio_trace_launder);
+
+ _debug("launder %llx-%llx", start, start + len - 1);
+
+ /* Speculatively write to the cache. We have to fix this up later if
+ * the store fails.
+ */
+ wreq->cleanup = netfs_cleanup_launder_folio;
+
+ bvec_set_folio(&bvec, folio, len, offset);
+ iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len);
+ __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+ ret = netfs_begin_write(wreq, true, netfs_write_trace_launder);
+
+out_put:
+ folio_detach_private(folio);
+ netfs_put_group(group);
+ kfree(finfo);
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+out:
+ folio_wait_fscache(folio);
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_launder_folio);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
new file mode 100644
index 000000000000..ad4370b3935d
--- /dev/null
+++ b/fs/netfs/direct_read.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Direct I/O support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/sched/mm.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+/**
+ * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
+ * @iocb: The I/O control descriptor describing the read
+ * @iter: The output buffer (also specifies read length)
+ *
+ * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
+ * output buffer. No use is made of the pagecache.
+ *
+ * The caller must hold any appropriate locks.
+ */
+static ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct netfs_io_request *rreq;
+ ssize_t ret;
+ size_t orig_count = iov_iter_count(iter);
+ bool async = !is_sync_kiocb(iocb);
+
+ _enter("");
+
+ if (!orig_count)
+ return 0; /* Don't update atime */
+
+ ret = kiocb_write_and_wait(iocb, orig_count);
+ if (ret < 0)
+ return ret;
+ file_accessed(iocb->ki_filp);
+
+ rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
+ iocb->ki_pos, orig_count,
+ NETFS_DIO_READ);
+ if (IS_ERR(rreq))
+ return PTR_ERR(rreq);
+
+ netfs_stat(&netfs_n_rh_dio_read);
+ trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read);
+
+ /* If this is an async op, we have to keep track of the destination
+ * buffer for ourselves as the caller's iterator will be trashed when
+ * we return.
+ *
+ * In such a case, extract an iterator to represent as much of the the
+ * output buffer as we can manage. Note that the extraction might not
+ * be able to allocate a sufficiently large bvec array and may shorten
+ * the request.
+ */
+ if (user_backed_iter(iter)) {
+ ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
+ if (ret < 0)
+ goto out;
+ rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
+ rreq->direct_bv_count = ret;
+ rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
+ rreq->len = iov_iter_count(&rreq->iter);
+ } else {
+ rreq->iter = *iter;
+ rreq->len = orig_count;
+ rreq->direct_bv_unpin = false;
+ iov_iter_advance(iter, orig_count);
+ }
+
+ // TODO: Set up bounce buffer if needed
+
+ if (async)
+ rreq->iocb = iocb;
+
+ ret = netfs_begin_read(rreq, is_sync_kiocb(iocb));
+ if (ret < 0)
+ goto out; /* May be -EIOCBQUEUED */
+ if (!async) {
+ // TODO: Copy from bounce buffer
+ iocb->ki_pos += rreq->transferred;
+ ret = rreq->transferred;
+ }
+
+out:
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ if (ret > 0)
+ orig_count -= ret;
+ if (ret != -EIOCBQUEUED)
+ iov_iter_revert(iter, orig_count - iov_iter_count(iter));
+ return ret;
+}
+
+/**
+ * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read
+ * @iocb: The I/O control descriptor describing the read
+ * @iter: The output buffer (also specifies read length)
+ *
+ * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
+ * output buffer. No use is made of the pagecache.
+ */
+ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ if (!iter->count)
+ return 0; /* Don't update atime */
+
+ ret = netfs_start_io_direct(inode);
+ if (ret == 0) {
+ ret = netfs_unbuffered_read_iter_locked(iocb, iter);
+ netfs_end_io_direct(inode);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(netfs_unbuffered_read_iter);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
new file mode 100644
index 000000000000..60a40d293c87
--- /dev/null
+++ b/fs/netfs/direct_write.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Unbuffered and direct write support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/uio.h>
+#include "internal.h"
+
+static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
+{
+ struct inode *inode = wreq->inode;
+ unsigned long long end = wreq->start + wreq->len;
+
+ if (!wreq->error &&
+ i_size_read(inode) < end) {
+ if (wreq->netfs_ops->update_i_size)
+ wreq->netfs_ops->update_i_size(inode, end);
+ else
+ i_size_write(inode, end);
+ }
+}
+
+/*
+ * Perform an unbuffered write where we may have to do an RMW operation on an
+ * encrypted file. This can also be used for direct I/O writes.
+ */
+static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
+ struct netfs_group *netfs_group)
+{
+ struct netfs_io_request *wreq;
+ unsigned long long start = iocb->ki_pos;
+ unsigned long long end = start + iov_iter_count(iter);
+ ssize_t ret, n;
+ bool async = !is_sync_kiocb(iocb);
+
+ _enter("");
+
+ /* We're going to need a bounce buffer if what we transmit is going to
+ * be different in some way to the source buffer, e.g. because it gets
+ * encrypted/compressed or because it needs expanding to a block size.
+ */
+ // TODO
+
+ _debug("uw %llx-%llx", start, end);
+
+ wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
+ start, end - start,
+ iocb->ki_flags & IOCB_DIRECT ?
+ NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE);
+ if (IS_ERR(wreq))
+ return PTR_ERR(wreq);
+
+ {
+ /* If this is an async op and we're not using a bounce buffer,
+ * we have to save the source buffer as the iterator is only
+ * good until we return. In such a case, extract an iterator
+ * to represent as much of the the output buffer as we can
+ * manage. Note that the extraction might not be able to
+ * allocate a sufficiently large bvec array and may shorten the
+ * request.
+ */
+ if (async || user_backed_iter(iter)) {
+ n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+ wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
+ wreq->direct_bv_count = n;
+ wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
+ wreq->len = iov_iter_count(&wreq->iter);
+ } else {
+ wreq->iter = *iter;
+ }
+
+ wreq->io_iter = wreq->iter;
+ }
+
+ /* Copy the data into the bounce buffer and encrypt it. */
+ // TODO
+
+ /* Dispatch the write. */
+ __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+ if (async)
+ wreq->iocb = iocb;
+ wreq->cleanup = netfs_cleanup_dio_write;
+ ret = netfs_begin_write(wreq, is_sync_kiocb(iocb),
+ iocb->ki_flags & IOCB_DIRECT ?
+ netfs_write_trace_dio_write :
+ netfs_write_trace_unbuffered_write);
+ if (ret < 0) {
+ _debug("begin = %zd", ret);
+ goto out;
+ }
+
+ if (!async) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
+ wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+
+ ret = wreq->error;
+ _debug("waited = %zd", ret);
+ if (ret == 0) {
+ ret = wreq->transferred;
+ iocb->ki_pos += ret;
+ }
+ } else {
+ ret = -EIOCBQUEUED;
+ }
+
+out:
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ return ret;
+}
+
+/**
+ * netfs_unbuffered_write_iter - Unbuffered write to a file
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Do an unbuffered write to a file, writing the data directly to the server
+ * and not lodging the data in the pagecache.
+ *
+ * Return:
+ * * Negative error code if no data has been written at all of
+ * vfs_fsync_range() failed for a synchronous write
+ * * Number of bytes written, even for truncated writes
+ */
+ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct netfs_inode *ictx = netfs_inode(inode);
+ unsigned long long end;
+ ssize_t ret;
+
+ _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+
+ trace_netfs_write_iter(iocb, from);
+ netfs_stat(&netfs_n_rh_dio_write);
+
+ ret = netfs_start_io_direct(inode);
+ if (ret < 0)
+ return ret;
+ ret = generic_write_checks(iocb, from);
+ if (ret < 0)
+ goto out;
+ ret = file_remove_privs(file);
+ if (ret < 0)
+ goto out;
+ ret = file_update_time(file);
+ if (ret < 0)
+ goto out;
+ ret = kiocb_invalidate_pages(iocb, iov_iter_count(from));
+ if (ret < 0)
+ goto out;
+ end = iocb->ki_pos + iov_iter_count(from);
+ if (end > ictx->zero_point)
+ ictx->zero_point = end;
+
+ fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode),
+ FSCACHE_INVAL_DIO_WRITE);
+ ret = netfs_unbuffered_write_iter_locked(iocb, from, NULL);
+out:
+ netfs_end_io_direct(inode);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_unbuffered_write_iter);
diff --git a/fs/fscache/cache.c b/fs/netfs/fscache_cache.c
index d645f8b302a2..d645f8b302a2 100644
--- a/fs/fscache/cache.c
+++ b/fs/netfs/fscache_cache.c
diff --git a/fs/fscache/cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..bce2492186d0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/netfs/fscache_cookie.c
diff --git a/fs/netfs/fscache_internal.h b/fs/netfs/fscache_internal.h
new file mode 100644
index 000000000000..a09b948fcef2
--- /dev/null
+++ b/fs/netfs/fscache_internal.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Internal definitions for FS-Cache
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "internal.h"
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "FS-Cache: " fmt
diff --git a/fs/fscache/io.c b/fs/netfs/fscache_io.c
index 0d2b8dec8f82..ad572f7ee897 100644
--- a/fs/fscache/io.c
+++ b/fs/netfs/fscache_io.c
@@ -158,46 +158,6 @@ int __fscache_begin_write_operation(struct netfs_cache_resources *cres,
}
EXPORT_SYMBOL(__fscache_begin_write_operation);
-/**
- * fscache_dirty_folio - Mark folio dirty and pin a cache object for writeback
- * @mapping: The mapping the folio belongs to.
- * @folio: The folio being dirtied.
- * @cookie: The cookie referring to the cache object
- *
- * Set the dirty flag on a folio and pin an in-use cache object in memory
- * so that writeback can later write to it. This is intended
- * to be called from the filesystem's ->dirty_folio() method.
- *
- * Return: true if the dirty flag was set on the folio, false otherwise.
- */
-bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
- struct fscache_cookie *cookie)
-{
- struct inode *inode = mapping->host;
- bool need_use = false;
-
- _enter("");
-
- if (!filemap_dirty_folio(mapping, folio))
- return false;
- if (!fscache_cookie_valid(cookie))
- return true;
-
- if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
- spin_lock(&inode->i_lock);
- if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
- inode->i_state |= I_PINNING_FSCACHE_WB;
- need_use = true;
- }
- spin_unlock(&inode->i_lock);
-
- if (need_use)
- fscache_use_cookie(cookie, true);
- }
- return true;
-}
-EXPORT_SYMBOL(fscache_dirty_folio);
-
struct fscache_write_request {
struct netfs_cache_resources cache_resources;
struct address_space *mapping;
@@ -277,7 +237,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
fscache_access_io_write) < 0)
goto abandon_free;
- ret = cres->ops->prepare_write(cres, &start, &len, i_size, false);
+ ret = cres->ops->prepare_write(cres, &start, &len, len, i_size, false);
if (ret < 0)
goto abandon_end;
diff --git a/fs/fscache/main.c b/fs/netfs/fscache_main.c
index dad85fd84f6f..42e98bb523e3 100644
--- a/fs/fscache/main.c
+++ b/fs/netfs/fscache_main.c
@@ -8,18 +8,9 @@
#define FSCACHE_DEBUG_LEVEL CACHE
#include <linux/module.h>
#include <linux/init.h>
-#define CREATE_TRACE_POINTS
#include "internal.h"
-
-MODULE_DESCRIPTION("FS Cache Manager");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
-unsigned fscache_debug;
-module_param_named(debug, fscache_debug, uint,
- S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(fscache_debug,
- "FS-Cache debugging mask");
+#define CREATE_TRACE_POINTS
+#include <trace/events/fscache.h>
EXPORT_TRACEPOINT_SYMBOL(fscache_access_cache);
EXPORT_TRACEPOINT_SYMBOL(fscache_access_volume);
@@ -71,7 +62,7 @@ unsigned int fscache_hash(unsigned int salt, const void *data, size_t len)
/*
* initialise the fs caching module
*/
-static int __init fscache_init(void)
+int __init fscache_init(void)
{
int ret = -ENOMEM;
@@ -92,7 +83,7 @@ static int __init fscache_init(void)
goto error_cookie_jar;
}
- pr_notice("Loaded\n");
+ pr_notice("FS-Cache loaded\n");
return 0;
error_cookie_jar:
@@ -103,19 +94,15 @@ error_wq:
return ret;
}
-fs_initcall(fscache_init);
-
/*
* clean up on module removal
*/
-static void __exit fscache_exit(void)
+void __exit fscache_exit(void)
{
_enter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
destroy_workqueue(fscache_wq);
- pr_notice("Unloaded\n");
+ pr_notice("FS-Cache unloaded\n");
}
-
-module_exit(fscache_exit);
diff --git a/fs/fscache/proc.c b/fs/netfs/fscache_proc.c
index dc3b0e9c8cce..874d951bc390 100644
--- a/fs/fscache/proc.c
+++ b/fs/netfs/fscache_proc.c
@@ -12,41 +12,34 @@
#include "internal.h"
/*
- * initialise the /proc/fs/fscache/ directory
+ * Add files to /proc/fs/netfs/.
*/
int __init fscache_proc_init(void)
{
- if (!proc_mkdir("fs/fscache", NULL))
- goto error_dir;
+ if (!proc_symlink("fs/fscache", NULL, "netfs"))
+ goto error_sym;
- if (!proc_create_seq("fs/fscache/caches", S_IFREG | 0444, NULL,
+ if (!proc_create_seq("fs/netfs/caches", S_IFREG | 0444, NULL,
&fscache_caches_seq_ops))
goto error;
- if (!proc_create_seq("fs/fscache/volumes", S_IFREG | 0444, NULL,
+ if (!proc_create_seq("fs/netfs/volumes", S_IFREG | 0444, NULL,
&fscache_volumes_seq_ops))
goto error;
- if (!proc_create_seq("fs/fscache/cookies", S_IFREG | 0444, NULL,
+ if (!proc_create_seq("fs/netfs/cookies", S_IFREG | 0444, NULL,
&fscache_cookies_seq_ops))
goto error;
-
-#ifdef CONFIG_FSCACHE_STATS
- if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
- fscache_stats_show))
- goto error;
-#endif
-
return 0;
error:
remove_proc_entry("fs/fscache", NULL);
-error_dir:
+error_sym:
return -ENOMEM;
}
/*
- * clean up the /proc/fs/fscache/ directory
+ * Clean up the /proc/fs/fscache symlink.
*/
void fscache_proc_cleanup(void)
{
diff --git a/fs/fscache/stats.c b/fs/netfs/fscache_stats.c
index fc94e5e79f1c..add21abdf713 100644
--- a/fs/fscache/stats.c
+++ b/fs/netfs/fscache_stats.c
@@ -48,13 +48,15 @@ atomic_t fscache_n_no_create_space;
EXPORT_SYMBOL(fscache_n_no_create_space);
atomic_t fscache_n_culled;
EXPORT_SYMBOL(fscache_n_culled);
+atomic_t fscache_n_dio_misfit;
+EXPORT_SYMBOL(fscache_n_dio_misfit);
/*
* display the general statistics
*/
-int fscache_stats_show(struct seq_file *m, void *v)
+int fscache_stats_show(struct seq_file *m)
{
- seq_puts(m, "FS-Cache statistics\n");
+ seq_puts(m, "-- FS-Cache statistics --\n");
seq_printf(m, "Cookies: n=%d v=%d vcol=%u voom=%u\n",
atomic_read(&fscache_n_cookies),
atomic_read(&fscache_n_volumes),
@@ -93,10 +95,9 @@ int fscache_stats_show(struct seq_file *m, void *v)
atomic_read(&fscache_n_no_create_space),
atomic_read(&fscache_n_culled));
- seq_printf(m, "IO : rd=%u wr=%u\n",
+ seq_printf(m, "IO : rd=%u wr=%u mis=%u\n",
atomic_read(&fscache_n_read),
- atomic_read(&fscache_n_write));
-
- netfs_stats_show(m);
+ atomic_read(&fscache_n_write),
+ atomic_read(&fscache_n_dio_misfit));
return 0;
}
diff --git a/fs/fscache/volume.c b/fs/netfs/fscache_volume.c
index cdf991bdd9de..cdf991bdd9de 100644
--- a/fs/fscache/volume.c
+++ b/fs/netfs/fscache_volume.c
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 43fac1b14e40..ec7045d24400 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -5,9 +5,13 @@
* Written by David Howells (dhowells@redhat.com)
*/
+#include <linux/slab.h>
+#include <linux/seq_file.h>
#include <linux/netfs.h>
#include <linux/fscache.h>
+#include <linux/fscache-cache.h>
#include <trace/events/netfs.h>
+#include <trace/events/fscache.h>
#ifdef pr_fmt
#undef pr_fmt
@@ -19,6 +23,8 @@
* buffered_read.c
*/
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq);
+int netfs_prefetch_for_write(struct file *file, struct folio *folio,
+ size_t offset, size_t len);
/*
* io.c
@@ -29,6 +35,41 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
* main.c
*/
extern unsigned int netfs_debug;
+extern struct list_head netfs_io_requests;
+extern spinlock_t netfs_proc_lock;
+
+#ifdef CONFIG_PROC_FS
+static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq)
+{
+ spin_lock(&netfs_proc_lock);
+ list_add_tail_rcu(&rreq->proc_link, &netfs_io_requests);
+ spin_unlock(&netfs_proc_lock);
+}
+static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq)
+{
+ if (!list_empty(&rreq->proc_link)) {
+ spin_lock(&netfs_proc_lock);
+ list_del_rcu(&rreq->proc_link);
+ spin_unlock(&netfs_proc_lock);
+ }
+}
+#else
+static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) {}
+static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
+#endif
+
+/*
+ * misc.c
+ */
+#define NETFS_FLAG_PUT_MARK BIT(0)
+#define NETFS_FLAG_PAGECACHE_MARK BIT(1)
+int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
+ struct folio *folio, unsigned int flags,
+ gfp_t gfp_mask);
+int netfs_add_folios_to_buffer(struct xarray *buffer,
+ struct address_space *mapping,
+ pgoff_t index, pgoff_t to, gfp_t gfp_mask);
+void netfs_clear_buffer(struct xarray *buffer);
/*
* objects.c
@@ -50,9 +91,20 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
}
/*
+ * output.c
+ */
+int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
+ enum netfs_write_trace what);
+struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
+int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end);
+int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb);
+
+/*
* stats.c
*/
#ifdef CONFIG_NETFS_STATS
+extern atomic_t netfs_n_rh_dio_read;
+extern atomic_t netfs_n_rh_dio_write;
extern atomic_t netfs_n_rh_readahead;
extern atomic_t netfs_n_rh_readpage;
extern atomic_t netfs_n_rh_rreq;
@@ -71,7 +123,15 @@ extern atomic_t netfs_n_rh_write_begin;
extern atomic_t netfs_n_rh_write_done;
extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
+extern atomic_t netfs_n_wh_wstream_conflict;
+extern atomic_t netfs_n_wh_upload;
+extern atomic_t netfs_n_wh_upload_done;
+extern atomic_t netfs_n_wh_upload_failed;
+extern atomic_t netfs_n_wh_write;
+extern atomic_t netfs_n_wh_write_done;
+extern atomic_t netfs_n_wh_write_failed;
+int netfs_stats_show(struct seq_file *m, void *v);
static inline void netfs_stat(atomic_t *stat)
{
@@ -103,6 +163,176 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
#endif
}
+/*
+ * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
+{
+ if (netfs_group)
+ refcount_inc(&netfs_group->ref);
+ return netfs_group;
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group(struct netfs_group *netfs_group)
+{
+ if (netfs_group && refcount_dec_and_test(&netfs_group->ref))
+ netfs_group->free(netfs_group);
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
+{
+ if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref))
+ netfs_group->free(netfs_group);
+}
+
+/*
+ * fscache-cache.c
+ */
+#ifdef CONFIG_PROC_FS
+extern const struct seq_operations fscache_caches_seq_ops;
+#endif
+bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
+void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
+struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache);
+void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where);
+
+static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache)
+{
+ return smp_load_acquire(&cache->state);
+}
+
+static inline bool fscache_cache_is_live(const struct fscache_cache *cache)
+{
+ return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE;
+}
+
+static inline void fscache_set_cache_state(struct fscache_cache *cache,
+ enum fscache_cache_state new_state)
+{
+ smp_store_release(&cache->state, new_state);
+
+}
+
+static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
+ enum fscache_cache_state old_state,
+ enum fscache_cache_state new_state)
+{
+ return try_cmpxchg_release(&cache->state, &old_state, new_state);
+}
+
+/*
+ * fscache-cookie.c
+ */
+extern struct kmem_cache *fscache_cookie_jar;
+#ifdef CONFIG_PROC_FS
+extern const struct seq_operations fscache_cookies_seq_ops;
+#endif
+extern struct timer_list fscache_cookie_lru_timer;
+
+extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
+extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
+ enum fscache_access_trace why);
+
+static inline void fscache_see_cookie(struct fscache_cookie *cookie,
+ enum fscache_cookie_trace where)
+{
+ trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref),
+ where);
+}
+
+/*
+ * fscache-main.c
+ */
+extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len);
+#ifdef CONFIG_FSCACHE
+int __init fscache_init(void);
+void __exit fscache_exit(void);
+#else
+static inline int fscache_init(void) { return 0; }
+static inline void fscache_exit(void) {}
+#endif
+
+/*
+ * fscache-proc.c
+ */
+#ifdef CONFIG_PROC_FS
+extern int __init fscache_proc_init(void);
+extern void fscache_proc_cleanup(void);
+#else
+#define fscache_proc_init() (0)
+#define fscache_proc_cleanup() do {} while (0)
+#endif
+
+/*
+ * fscache-stats.c
+ */
+#ifdef CONFIG_FSCACHE_STATS
+extern atomic_t fscache_n_volumes;
+extern atomic_t fscache_n_volumes_collision;
+extern atomic_t fscache_n_volumes_nomem;
+extern atomic_t fscache_n_cookies;
+extern atomic_t fscache_n_cookies_lru;
+extern atomic_t fscache_n_cookies_lru_expired;
+extern atomic_t fscache_n_cookies_lru_removed;
+extern atomic_t fscache_n_cookies_lru_dropped;
+
+extern atomic_t fscache_n_acquires;
+extern atomic_t fscache_n_acquires_ok;
+extern atomic_t fscache_n_acquires_oom;
+
+extern atomic_t fscache_n_invalidates;
+
+extern atomic_t fscache_n_relinquishes;
+extern atomic_t fscache_n_relinquishes_retire;
+extern atomic_t fscache_n_relinquishes_dropped;
+
+extern atomic_t fscache_n_resizes;
+extern atomic_t fscache_n_resizes_null;
+
+static inline void fscache_stat(atomic_t *stat)
+{
+ atomic_inc(stat);
+}
+
+static inline void fscache_stat_d(atomic_t *stat)
+{
+ atomic_dec(stat);
+}
+
+#define __fscache_stat(stat) (stat)
+
+int fscache_stats_show(struct seq_file *m);
+#else
+
+#define __fscache_stat(stat) (NULL)
+#define fscache_stat(stat) do {} while (0)
+#define fscache_stat_d(stat) do {} while (0)
+
+static inline int fscache_stats_show(struct seq_file *m) { return 0; }
+#endif
+
+/*
+ * fscache-volume.c
+ */
+#ifdef CONFIG_PROC_FS
+extern const struct seq_operations fscache_volumes_seq_ops;
+#endif
+
+struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
+void fscache_put_volume(struct fscache_volume *volume,
+ enum fscache_volume_trace where);
+bool fscache_begin_volume_access(struct fscache_volume *volume,
+ struct fscache_cookie *cookie,
+ enum fscache_access_trace why);
+void fscache_create_volume(struct fscache_volume *volume, bool wait);
+
/*****************************************************************************/
/*
* debug tracing
@@ -143,3 +373,57 @@ do { \
#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
#endif
+
+/*
+ * assertions
+ */
+#if 1 /* defined(__KDEBUGALL) */
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ if (unlikely(!((X) OP (Y)))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ if (unlikely((C) && !((X) OP (Y)))) { \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
+ (unsigned long)(X), (unsigned long)(Y)); \
+ BUG(); \
+ } \
+} while (0)
+
+#else
+
+#define ASSERT(X) do {} while (0)
+#define ASSERTCMP(X, OP, Y) do {} while (0)
+#define ASSERTIF(C, X) do {} while (0)
+#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
+
+#endif /* assert or not */
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 7f753380e047..4309edf33862 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -21,12 +21,7 @@
*/
static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
{
- struct iov_iter iter;
-
- iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages,
- subreq->start + subreq->transferred,
- subreq->len - subreq->transferred);
- iov_iter_zero(iov_iter_count(&iter), &iter);
+ iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
}
static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
@@ -46,14 +41,9 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq,
enum netfs_read_from_hole read_hole)
{
struct netfs_cache_resources *cres = &rreq->cache_resources;
- struct iov_iter iter;
netfs_stat(&netfs_n_rh_read);
- iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
- subreq->start + subreq->transferred,
- subreq->len - subreq->transferred);
-
- cres->ops->read(cres, subreq->start, &iter, read_hole,
+ cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole,
netfs_cache_read_terminated, subreq);
}
@@ -88,6 +78,13 @@ static void netfs_read_from_server(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
netfs_stat(&netfs_n_rh_download);
+
+ if (rreq->origin != NETFS_DIO_READ &&
+ iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
+ pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n",
+ rreq->debug_id, subreq->debug_index,
+ iov_iter_count(&subreq->io_iter), subreq->len,
+ subreq->transferred, subreq->flags);
rreq->netfs_ops->issue_read(subreq);
}
@@ -129,7 +126,8 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
*/
if (have_unlocked && folio_index(folio) <= unlocked)
continue;
- unlocked = folio_index(folio);
+ unlocked = folio_next_index(folio) - 1;
+ trace_netfs_folio(folio, netfs_folio_trace_end_copy);
folio_end_fscache(folio);
have_unlocked = true;
}
@@ -201,7 +199,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
}
ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
- rreq->i_size, true);
+ subreq->len, rreq->i_size, true);
if (ret < 0) {
trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
@@ -260,6 +258,30 @@ static void netfs_rreq_short_read(struct netfs_io_request *rreq,
}
/*
+ * Reset the subrequest iterator prior to resubmission.
+ */
+static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ size_t remaining = subreq->len - subreq->transferred;
+ size_t count = iov_iter_count(&subreq->io_iter);
+
+ if (count == remaining)
+ return;
+
+ _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ rreq->debug_id, subreq->debug_index,
+ iov_iter_count(&subreq->io_iter), subreq->transferred,
+ subreq->len, rreq->i_size,
+ subreq->io_iter.iter_type);
+
+ if (count < remaining)
+ iov_iter_revert(&subreq->io_iter, remaining - count);
+ else
+ iov_iter_advance(&subreq->io_iter, count - remaining);
+}
+
+/*
* Resubmit any short or failed operations. Returns true if we got the rreq
* ref back.
*/
@@ -287,6 +309,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
atomic_inc(&rreq->nr_outstanding);
+ netfs_reset_subreq_iter(rreq, subreq);
netfs_read_from_server(rreq, subreq);
} else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
netfs_rreq_short_read(rreq, subreq);
@@ -321,6 +344,43 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
}
/*
+ * Determine how much we can admit to having read from a DIO read.
+ */
+static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ unsigned int i;
+ size_t transferred = 0;
+
+ for (i = 0; i < rreq->direct_bv_count; i++)
+ flush_dcache_page(rreq->direct_bv[i].bv_page);
+
+ list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ if (subreq->error || subreq->transferred == 0)
+ break;
+ transferred += subreq->transferred;
+ if (subreq->transferred < subreq->len)
+ break;
+ }
+
+ for (i = 0; i < rreq->direct_bv_count; i++)
+ flush_dcache_page(rreq->direct_bv[i].bv_page);
+
+ rreq->transferred = transferred;
+ task_io_account_read(transferred);
+
+ if (rreq->iocb) {
+ rreq->iocb->ki_pos += transferred;
+ if (rreq->iocb->ki_complete)
+ rreq->iocb->ki_complete(
+ rreq->iocb, rreq->error ? rreq->error : transferred);
+ }
+ if (rreq->netfs_ops->done)
+ rreq->netfs_ops->done(rreq);
+ inode_dio_end(rreq->inode);
+}
+
+/*
* Assess the state of a read request and decide what to do next.
*
* Note that we could be in an ordinary kernel thread, on a workqueue or in
@@ -340,8 +400,12 @@ again:
return;
}
- netfs_rreq_unlock_folios(rreq);
+ if (rreq->origin != NETFS_DIO_READ)
+ netfs_rreq_unlock_folios(rreq);
+ else
+ netfs_rreq_assess_dio(rreq);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
@@ -399,9 +463,9 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
- _enter("[%u]{%llx,%lx},%zd",
- subreq->debug_index, subreq->start, subreq->flags,
- transferred_or_error);
+ _enter("R=%x[%x]{%llx,%lx},%zd",
+ rreq->debug_id, subreq->debug_index,
+ subreq->start, subreq->flags, transferred_or_error);
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -501,15 +565,20 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest
*/
static enum netfs_io_source
netfs_rreq_prepare_read(struct netfs_io_request *rreq,
- struct netfs_io_subrequest *subreq)
+ struct netfs_io_subrequest *subreq,
+ struct iov_iter *io_iter)
{
- enum netfs_io_source source;
+ enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
+ struct netfs_inode *ictx = netfs_inode(rreq->inode);
+ size_t lsize;
_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
- source = netfs_cache_prepare_read(subreq, rreq->i_size);
- if (source == NETFS_INVALID_READ)
- goto out;
+ if (rreq->origin != NETFS_DIO_READ) {
+ source = netfs_cache_prepare_read(subreq, rreq->i_size);
+ if (source == NETFS_INVALID_READ)
+ goto out;
+ }
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
/* Call out to the netfs to let it shrink the request to fit
@@ -518,19 +587,52 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
* to make serial calls, it can indicate a short read and then
* we will call it again.
*/
+ if (rreq->origin != NETFS_DIO_READ) {
+ if (subreq->start >= ictx->zero_point) {
+ source = NETFS_FILL_WITH_ZEROES;
+ goto set;
+ }
+ if (subreq->len > ictx->zero_point - subreq->start)
+ subreq->len = ictx->zero_point - subreq->start;
+ }
if (subreq->len > rreq->i_size - subreq->start)
subreq->len = rreq->i_size - subreq->start;
+ if (rreq->rsize && subreq->len > rreq->rsize)
+ subreq->len = rreq->rsize;
if (rreq->netfs_ops->clamp_length &&
!rreq->netfs_ops->clamp_length(subreq)) {
source = NETFS_INVALID_READ;
goto out;
}
+
+ if (subreq->max_nr_segs) {
+ lsize = netfs_limit_iter(io_iter, 0, subreq->len,
+ subreq->max_nr_segs);
+ if (subreq->len > lsize) {
+ subreq->len = lsize;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
+ }
+ }
}
- if (WARN_ON(subreq->len == 0))
+set:
+ if (subreq->len > rreq->len)
+ pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n",
+ rreq->debug_id, subreq->debug_index,
+ subreq->len, rreq->len);
+
+ if (WARN_ON(subreq->len == 0)) {
source = NETFS_INVALID_READ;
+ goto out;
+ }
+ subreq->source = source;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+
+ subreq->io_iter = *io_iter;
+ iov_iter_truncate(&subreq->io_iter, subreq->len);
+ iov_iter_advance(io_iter, subreq->len);
out:
subreq->source = source;
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
@@ -541,6 +643,7 @@ out:
* Slice off a piece of a read request and submit an I/O request for it.
*/
static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
+ struct iov_iter *io_iter,
unsigned int *_debug_index)
{
struct netfs_io_subrequest *subreq;
@@ -552,7 +655,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->debug_index = (*_debug_index)++;
subreq->start = rreq->start + rreq->submitted;
- subreq->len = rreq->len - rreq->submitted;
+ subreq->len = io_iter->count;
_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
@@ -565,7 +668,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
* (the starts must coincide), in which case, we go around the loop
* again and ask it to download the next piece.
*/
- source = netfs_rreq_prepare_read(rreq, subreq);
+ source = netfs_rreq_prepare_read(rreq, subreq, io_iter);
if (source == NETFS_INVALID_READ)
goto subreq_failed;
@@ -603,6 +706,7 @@ subreq_failed:
*/
int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
{
+ struct iov_iter io_iter;
unsigned int debug_index = 0;
int ret;
@@ -611,50 +715,71 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
if (rreq->len == 0) {
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len);
return -EIO;
}
- INIT_WORK(&rreq->work, netfs_rreq_work);
+ if (rreq->origin == NETFS_DIO_READ)
+ inode_dio_begin(rreq->inode);
- if (sync)
- netfs_get_request(rreq, netfs_rreq_trace_get_hold);
+ // TODO: Use bounce buffer if requested
+ rreq->io_iter = rreq->iter;
+
+ INIT_WORK(&rreq->work, netfs_rreq_work);
/* Chop the read into slices according to what the cache and the netfs
* want and submit each one.
*/
+ netfs_get_request(rreq, netfs_rreq_trace_get_for_outstanding);
atomic_set(&rreq->nr_outstanding, 1);
+ io_iter = rreq->io_iter;
do {
- if (!netfs_rreq_submit_slice(rreq, &debug_index))
+ _debug("submit %llx + %zx >= %llx",
+ rreq->start, rreq->submitted, rreq->i_size);
+ if (rreq->origin == NETFS_DIO_READ &&
+ rreq->start + rreq->submitted >= rreq->i_size)
+ break;
+ if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index))
+ break;
+ if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
+ test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
} while (rreq->submitted < rreq->len);
+ if (!rreq->submitted) {
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ ret = 0;
+ goto out;
+ }
+
if (sync) {
- /* Keep nr_outstanding incremented so that the ref always belongs to
- * us, and the service code isn't punted off to a random thread pool to
- * process.
+ /* Keep nr_outstanding incremented so that the ref always
+ * belongs to us, and the service code isn't punted off to a
+ * random thread pool to process. Note that this might start
+ * further work, such as writing to the cache.
*/
- for (;;) {
- wait_var_event(&rreq->nr_outstanding,
- atomic_read(&rreq->nr_outstanding) == 1);
+ wait_var_event(&rreq->nr_outstanding,
+ atomic_read(&rreq->nr_outstanding) == 1);
+ if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_assess(rreq, false);
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- break;
- cond_resched();
- }
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
+ wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len) {
+ if (ret == 0 && rreq->submitted < rreq->len &&
+ rreq->origin != NETFS_DIO_READ) {
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
ret = -EIO;
}
- netfs_put_request(rreq, false, netfs_rreq_trace_put_hold);
} else {
/* If we decrement nr_outstanding to 0, the ref belongs to us. */
if (atomic_dec_and_test(&rreq->nr_outstanding))
netfs_rreq_assess(rreq, false);
- ret = 0;
+ ret = -EIOCBQUEUED;
}
+
+out:
return ret;
}
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 2ff07ba655a0..b781bbbf1d8d 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -101,3 +101,100 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
return npages;
}
EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
+
+/*
+ * Select the span of a bvec iterator we're going to use. Limit it by both maximum
+ * size and maximum number of segments. Returns the size of the span in bytes.
+ */
+static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs)
+{
+ const struct bio_vec *bvecs = iter->bvec;
+ unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
+ size_t len, span = 0, n = iter->count;
+ size_t skip = iter->iov_offset + start_offset;
+
+ if (WARN_ON(!iov_iter_is_bvec(iter)) ||
+ WARN_ON(start_offset > n) ||
+ n == 0)
+ return 0;
+
+ while (n && ix < nbv && skip) {
+ len = bvecs[ix].bv_len;
+ if (skip < len)
+ break;
+ skip -= len;
+ n -= len;
+ ix++;
+ }
+
+ while (n && ix < nbv) {
+ len = min3(n, bvecs[ix].bv_len - skip, max_size);
+ span += len;
+ nsegs++;
+ ix++;
+ if (span >= max_size || nsegs >= max_segs)
+ break;
+ skip = 0;
+ n -= len;
+ }
+
+ return min(span, max_size);
+}
+
+/*
+ * Select the span of an xarray iterator we're going to use. Limit it by both
+ * maximum size and maximum number of segments. It is assumed that segments
+ * can be larger than a page in size, provided they're physically contiguous.
+ * Returns the size of the span in bytes.
+ */
+static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs)
+{
+ struct folio *folio;
+ unsigned int nsegs = 0;
+ loff_t pos = iter->xarray_start + iter->iov_offset;
+ pgoff_t index = pos / PAGE_SIZE;
+ size_t span = 0, n = iter->count;
+
+ XA_STATE(xas, iter->xarray, index);
+
+ if (WARN_ON(!iov_iter_is_xarray(iter)) ||
+ WARN_ON(start_offset > n) ||
+ n == 0)
+ return 0;
+ max_size = min(max_size, n - start_offset);
+
+ rcu_read_lock();
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ size_t offset, flen, len;
+ if (xas_retry(&xas, folio))
+ continue;
+ if (WARN_ON(xa_is_value(folio)))
+ break;
+ if (WARN_ON(folio_test_hugetlb(folio)))
+ break;
+
+ flen = folio_size(folio);
+ offset = offset_in_folio(folio, pos);
+ len = min(max_size, flen - offset);
+ span += len;
+ nsegs++;
+ if (span >= max_size || nsegs >= max_segs)
+ break;
+ }
+
+ rcu_read_unlock();
+ return min(span, max_size);
+}
+
+size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs)
+{
+ if (iov_iter_is_bvec(iter))
+ return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
+ if (iov_iter_is_xarray(iter))
+ return netfs_limit_xarray(iter, start_offset, max_size, max_segs);
+ BUG();
+}
+EXPORT_SYMBOL(netfs_limit_iter);
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
new file mode 100644
index 000000000000..75dc52a49b3a
--- /dev/null
+++ b/fs/netfs/locking.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I/O and data path helper functionality.
+ *
+ * Borrowed from NFS Copyright (c) 2016 Trond Myklebust
+ */
+
+#include <linux/kernel.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+/*
+ * inode_dio_wait_interruptible - wait for outstanding DIO requests to finish
+ * @inode: inode to wait for
+ *
+ * Waits for all pending direct I/O requests to finish so that we can
+ * proceed with a truncate or equivalent operation.
+ *
+ * Must be called under a lock that serializes taking new references
+ * to i_dio_count, usually by inode->i_mutex.
+ */
+static int inode_dio_wait_interruptible(struct inode *inode)
+{
+ if (!atomic_read(&inode->i_dio_count))
+ return 0;
+
+ wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
+ DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
+
+ for (;;) {
+ prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
+ if (!atomic_read(&inode->i_dio_count))
+ break;
+ if (signal_pending(current))
+ break;
+ schedule();
+ }
+ finish_wait(wq, &q.wq_entry);
+
+ return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+}
+
+/* Call with exclusively locked inode->i_rwsem */
+static int netfs_block_o_direct(struct netfs_inode *ictx)
+{
+ if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags))
+ return 0;
+ clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+ return inode_dio_wait_interruptible(&ictx->inode);
+}
+
+/**
+ * netfs_start_io_read - declare the file is being used for buffered reads
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * NETFS_ICTX_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+int netfs_start_io_read(struct inode *inode)
+ __acquires(inode->i_rwsem)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+ /* Be an optimist! */
+ if (down_read_interruptible(&inode->i_rwsem) < 0)
+ return -ERESTARTSYS;
+ if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) == 0)
+ return 0;
+ up_read(&inode->i_rwsem);
+
+ /* Slow path.... */
+ if (down_write_killable(&inode->i_rwsem) < 0)
+ return -ERESTARTSYS;
+ if (netfs_block_o_direct(ictx) < 0) {
+ up_write(&inode->i_rwsem);
+ return -ERESTARTSYS;
+ }
+ downgrade_write(&inode->i_rwsem);
+ return 0;
+}
+EXPORT_SYMBOL(netfs_start_io_read);
+
+/**
+ * netfs_end_io_read - declare that the buffered read operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_read(struct inode *inode)
+ __releases(inode->i_rwsem)
+{
+ up_read(&inode->i_rwsem);
+}
+EXPORT_SYMBOL(netfs_end_io_read);
+
+/**
+ * netfs_start_io_write - declare the file is being used for buffered writes
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+int netfs_start_io_write(struct inode *inode)
+ __acquires(inode->i_rwsem)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+
+ if (down_write_killable(&inode->i_rwsem) < 0)
+ return -ERESTARTSYS;
+ if (netfs_block_o_direct(ictx) < 0) {
+ up_write(&inode->i_rwsem);
+ return -ERESTARTSYS;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(netfs_start_io_write);
+
+/**
+ * netfs_end_io_write - declare that the buffered write operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_write(struct inode *inode)
+ __releases(inode->i_rwsem)
+{
+ up_write(&inode->i_rwsem);
+}
+EXPORT_SYMBOL(netfs_end_io_write);
+
+/* Call with exclusively locked inode->i_rwsem */
+static int netfs_block_buffered(struct inode *inode)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+ int ret;
+
+ if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) {
+ set_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+ if (inode->i_mapping->nrpages != 0) {
+ unmap_mapping_range(inode->i_mapping, 0, 0, 0);
+ ret = filemap_fdatawait(inode->i_mapping);
+ if (ret < 0) {
+ clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * netfs_start_io_direct - declare the file is being used for direct i/o
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * NETFS_ICTX_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+int netfs_start_io_direct(struct inode *inode)
+ __acquires(inode->i_rwsem)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+ int ret;
+
+ /* Be an optimist! */
+ if (down_read_interruptible(&inode->i_rwsem) < 0)
+ return -ERESTARTSYS;
+ if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) != 0)
+ return 0;
+ up_read(&inode->i_rwsem);
+
+ /* Slow path.... */
+ if (down_write_killable(&inode->i_rwsem) < 0)
+ return -ERESTARTSYS;
+ ret = netfs_block_buffered(inode);
+ if (ret < 0) {
+ up_write(&inode->i_rwsem);
+ return ret;
+ }
+ downgrade_write(&inode->i_rwsem);
+ return 0;
+}
+EXPORT_SYMBOL(netfs_start_io_direct);
+
+/**
+ * netfs_end_io_direct - declare that the direct i/o operation is done
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_direct(struct inode *inode)
+ __releases(inode->i_rwsem)
+{
+ up_read(&inode->i_rwsem);
+}
+EXPORT_SYMBOL(netfs_end_io_direct);
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 068568702957..5e77618a7940 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -7,6 +7,8 @@
#include <linux/module.h>
#include <linux/export.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/netfs.h>
@@ -15,6 +17,113 @@ MODULE_DESCRIPTION("Network fs support");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");
+EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
+
unsigned netfs_debug;
module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
+
+#ifdef CONFIG_PROC_FS
+LIST_HEAD(netfs_io_requests);
+DEFINE_SPINLOCK(netfs_proc_lock);
+
+static const char *netfs_origins[nr__netfs_io_origin] = {
+ [NETFS_READAHEAD] = "RA",
+ [NETFS_READPAGE] = "RP",
+ [NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_WRITEBACK] = "WB",
+ [NETFS_WRITETHROUGH] = "WT",
+ [NETFS_LAUNDER_WRITE] = "LW",
+ [NETFS_UNBUFFERED_WRITE] = "UW",
+ [NETFS_DIO_READ] = "DR",
+ [NETFS_DIO_WRITE] = "DW",
+};
+
+/*
+ * Generate a list of I/O requests in /proc/fs/netfs/requests
+ */
+static int netfs_requests_seq_show(struct seq_file *m, void *v)
+{
+ struct netfs_io_request *rreq;
+
+ if (v == &netfs_io_requests) {
+ seq_puts(m,
+ "REQUEST OR REF FL ERR OPS COVERAGE\n"
+ "======== == === == ==== === =========\n"
+ );
+ return 0;
+ }
+
+ rreq = list_entry(v, struct netfs_io_request, proc_link);
+ seq_printf(m,
+ "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx",
+ rreq->debug_id,
+ netfs_origins[rreq->origin],
+ refcount_read(&rreq->ref),
+ rreq->flags,
+ rreq->error,
+ atomic_read(&rreq->nr_outstanding),
+ rreq->start, rreq->submitted, rreq->len);
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static void *netfs_requests_seq_start(struct seq_file *m, loff_t *_pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+ return seq_list_start_head(&netfs_io_requests, *_pos);
+}
+
+static void *netfs_requests_seq_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+ return seq_list_next(v, &netfs_io_requests, _pos);
+}
+
+static void netfs_requests_seq_stop(struct seq_file *m, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+static const struct seq_operations netfs_requests_seq_ops = {
+ .start = netfs_requests_seq_start,
+ .next = netfs_requests_seq_next,
+ .stop = netfs_requests_seq_stop,
+ .show = netfs_requests_seq_show,
+};
+#endif /* CONFIG_PROC_FS */
+
+static int __init netfs_init(void)
+{
+ int ret = -ENOMEM;
+
+ if (!proc_mkdir("fs/netfs", NULL))
+ goto error;
+ if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL,
+ &netfs_requests_seq_ops))
+ goto error_proc;
+#ifdef CONFIG_FSCACHE_STATS
+ if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL,
+ netfs_stats_show))
+ goto error_proc;
+#endif
+
+ ret = fscache_init();
+ if (ret < 0)
+ goto error_proc;
+ return 0;
+
+error_proc:
+ remove_proc_entry("fs/netfs", NULL);
+error:
+ return ret;
+}
+fs_initcall(netfs_init);
+
+static void __exit netfs_exit(void)
+{
+ fscache_exit();
+ remove_proc_entry("fs/netfs", NULL);
+}
+module_exit(netfs_exit);
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
new file mode 100644
index 000000000000..0e3af37fc924
--- /dev/null
+++ b/fs/netfs/misc.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Miscellaneous routines.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/swap.h>
+#include "internal.h"
+
+/*
+ * Attach a folio to the buffer and maybe set marks on it to say that we need
+ * to put the folio later and twiddle the pagecache flags.
+ */
+int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
+ struct folio *folio, unsigned int flags,
+ gfp_t gfp_mask)
+{
+ XA_STATE_ORDER(xas, xa, index, folio_order(folio));
+
+retry:
+ xas_lock(&xas);
+ for (;;) {
+ xas_store(&xas, folio);
+ if (!xas_error(&xas))
+ break;
+ xas_unlock(&xas);
+ if (!xas_nomem(&xas, gfp_mask))
+ return xas_error(&xas);
+ goto retry;
+ }
+
+ if (flags & NETFS_FLAG_PUT_MARK)
+ xas_set_mark(&xas, NETFS_BUF_PUT_MARK);
+ if (flags & NETFS_FLAG_PAGECACHE_MARK)
+ xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK);
+ xas_unlock(&xas);
+ return xas_error(&xas);
+}
+
+/*
+ * Create the specified range of folios in the buffer attached to the read
+ * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that
+ * these need freeing later.
+ */
+int netfs_add_folios_to_buffer(struct xarray *buffer,
+ struct address_space *mapping,
+ pgoff_t index, pgoff_t to, gfp_t gfp_mask)
+{
+ struct folio *folio;
+ int ret;
+
+ if (to + 1 == index) /* Page range is inclusive */
+ return 0;
+
+ do {
+ /* TODO: Figure out what order folio can be allocated here */
+ folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0);
+ if (!folio)
+ return -ENOMEM;
+ folio->index = index;
+ ret = netfs_xa_store_and_mark(buffer, index, folio,
+ NETFS_FLAG_PUT_MARK, gfp_mask);
+ if (ret < 0) {
+ folio_put(folio);
+ return ret;
+ }
+
+ index += folio_nr_pages(folio);
+ } while (index <= to && index != 0);
+
+ return 0;
+}
+
+/*
+ * Clear an xarray buffer, putting a ref on the folios that have
+ * NETFS_BUF_PUT_MARK set.
+ */
+void netfs_clear_buffer(struct xarray *buffer)
+{
+ struct folio *folio;
+ XA_STATE(xas, buffer, 0);
+
+ rcu_read_lock();
+ xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) {
+ folio_put(folio);
+ }
+ rcu_read_unlock();
+ xa_destroy(buffer);
+}
+
+/**
+ * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback
+ * @mapping: The mapping the folio belongs to.
+ * @folio: The folio being dirtied.
+ *
+ * Set the dirty flag on a folio and pin an in-use cache object in memory so
+ * that writeback can later write to it. This is intended to be called from
+ * the filesystem's ->dirty_folio() method.
+ *
+ * Return: true if the dirty flag was set on the folio, false otherwise.
+ */
+bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+ struct inode *inode = mapping->host;
+ struct netfs_inode *ictx = netfs_inode(inode);
+ struct fscache_cookie *cookie = netfs_i_cookie(ictx);
+ bool need_use = false;
+
+ _enter("");
+
+ if (!filemap_dirty_folio(mapping, folio))
+ return false;
+ if (!fscache_cookie_valid(cookie))
+ return true;
+
+ if (!(inode->i_state & I_PINNING_NETFS_WB)) {
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_state & I_PINNING_NETFS_WB)) {
+ inode->i_state |= I_PINNING_NETFS_WB;
+ need_use = true;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (need_use)
+ fscache_use_cookie(cookie, true);
+ }
+ return true;
+}
+EXPORT_SYMBOL(netfs_dirty_folio);
+
+/**
+ * netfs_unpin_writeback - Unpin writeback resources
+ * @inode: The inode on which the cookie resides
+ * @wbc: The writeback control
+ *
+ * Unpin the writeback resources pinned by netfs_dirty_folio(). This is
+ * intended to be called as/by the netfs's ->write_inode() method.
+ */
+int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc)
+{
+ struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode));
+
+ if (wbc->unpinned_netfs_wb)
+ fscache_unuse_cookie(cookie, NULL, NULL);
+ return 0;
+}
+EXPORT_SYMBOL(netfs_unpin_writeback);
+
+/**
+ * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode
+ * @inode: The inode to clean up
+ * @aux: Auxiliary data to apply to the inode
+ *
+ * Clear any writeback resources held by an inode when the inode is evicted.
+ * This must be called before clear_inode() is called.
+ */
+void netfs_clear_inode_writeback(struct inode *inode, const void *aux)
+{
+ struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode));
+
+ if (inode->i_state & I_PINNING_NETFS_WB) {
+ loff_t i_size = i_size_read(inode);
+ fscache_unuse_cookie(cookie, aux, &i_size);
+ }
+}
+EXPORT_SYMBOL(netfs_clear_inode_writeback);
+
+/**
+ * netfs_invalidate_folio - Invalidate or partially invalidate a folio
+ * @folio: Folio proposed for release
+ * @offset: Offset of the invalidated region
+ * @length: Length of the invalidated region
+ *
+ * Invalidate part or all of a folio for a network filesystem. The folio will
+ * be removed afterwards if the invalidated region covers the entire folio.
+ */
+void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
+{
+ struct netfs_folio *finfo = NULL;
+ size_t flen = folio_size(folio);
+
+ _enter("{%lx},%zx,%zx", folio_index(folio), offset, length);
+
+ folio_wait_fscache(folio);
+
+ if (!folio_test_private(folio))
+ return;
+
+ finfo = netfs_folio_info(folio);
+
+ if (offset == 0 && length >= flen)
+ goto erase_completely;
+
+ if (finfo) {
+ /* We have a partially uptodate page from a streaming write. */
+ unsigned int fstart = finfo->dirty_offset;
+ unsigned int fend = fstart + finfo->dirty_len;
+ unsigned int end = offset + length;
+
+ if (offset >= fend)
+ return;
+ if (end <= fstart)
+ return;
+ if (offset <= fstart && end >= fend)
+ goto erase_completely;
+ if (offset <= fstart && end > fstart)
+ goto reduce_len;
+ if (offset > fstart && end >= fend)
+ goto move_start;
+ /* A partial write was split. The caller has already zeroed
+ * it, so just absorb the hole.
+ */
+ }
+ return;
+
+erase_completely:
+ netfs_put_group(netfs_folio_group(folio));
+ folio_detach_private(folio);
+ folio_clear_uptodate(folio);
+ kfree(finfo);
+ return;
+reduce_len:
+ finfo->dirty_len = offset + length - finfo->dirty_offset;
+ return;
+move_start:
+ finfo->dirty_len -= offset - finfo->dirty_offset;
+ finfo->dirty_offset = offset;
+}
+EXPORT_SYMBOL(netfs_invalidate_folio);
+
+/**
+ * netfs_release_folio - Try to release a folio
+ * @folio: Folio proposed for release
+ * @gfp: Flags qualifying the release
+ *
+ * Request release of a folio and clean up its private state if it's not busy.
+ * Returns true if the folio can now be released, false if not
+ */
+bool netfs_release_folio(struct folio *folio, gfp_t gfp)
+{
+ struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
+ unsigned long long end;
+
+ end = folio_pos(folio) + folio_size(folio);
+ if (end > ctx->zero_point)
+ ctx->zero_point = end;
+
+ if (folio_test_private(folio))
+ return false;
+ if (folio_test_fscache(folio)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ folio_wait_fscache(folio);
+ }
+
+ fscache_note_page_release(netfs_i_cookie(ctx));
+ return true;
+}
+EXPORT_SYMBOL(netfs_release_folio);
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index e17cdf53f6a7..610ceb5bd86c 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -20,14 +20,20 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
struct inode *inode = file ? file_inode(file) : mapping->host;
struct netfs_inode *ctx = netfs_inode(inode);
struct netfs_io_request *rreq;
+ bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
+ origin == NETFS_DIO_READ ||
+ origin == NETFS_DIO_WRITE);
+ bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
int ret;
- rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
+ rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),
+ GFP_KERNEL);
if (!rreq)
return ERR_PTR(-ENOMEM);
rreq->start = start;
rreq->len = len;
+ rreq->upper_len = len;
rreq->origin = origin;
rreq->netfs_ops = ctx->ops;
rreq->mapping = mapping;
@@ -35,8 +41,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
rreq->i_size = i_size_read(inode);
rreq->debug_id = atomic_inc_return(&debug_ids);
INIT_LIST_HEAD(&rreq->subrequests);
+ INIT_WORK(&rreq->work, NULL);
refcount_set(&rreq->ref, 1);
+
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+ if (cached)
+ __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
+ if (file && file->f_flags & O_NONBLOCK)
+ __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
ret = rreq->netfs_ops->init_request(rreq, file);
if (ret < 0) {
@@ -45,6 +57,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
}
+ trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
+ netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
return rreq;
}
@@ -74,33 +88,47 @@ static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
+ unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+ netfs_proc_del_rreq(rreq);
netfs_clear_subrequests(rreq, false);
if (rreq->netfs_ops->free_request)
rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
- kfree(rreq);
+ if (rreq->direct_bv) {
+ for (i = 0; i < rreq->direct_bv_count; i++) {
+ if (rreq->direct_bv[i].bv_page) {
+ if (rreq->direct_bv_unpin)
+ unpin_user_page(rreq->direct_bv[i].bv_page);
+ }
+ }
+ kvfree(rreq->direct_bv);
+ }
+ kfree_rcu(rreq, rcu);
netfs_stat_d(&netfs_n_rh_rreq);
}
void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
enum netfs_rreq_ref_trace what)
{
- unsigned int debug_id = rreq->debug_id;
+ unsigned int debug_id;
bool dead;
int r;
- dead = __refcount_dec_and_test(&rreq->ref, &r);
- trace_netfs_rreq_ref(debug_id, r - 1, what);
- if (dead) {
- if (was_async) {
- rreq->work.func = netfs_free_request;
- if (!queue_work(system_unbound_wq, &rreq->work))
- BUG();
- } else {
- netfs_free_request(&rreq->work);
+ if (rreq) {
+ debug_id = rreq->debug_id;
+ dead = __refcount_dec_and_test(&rreq->ref, &r);
+ trace_netfs_rreq_ref(debug_id, r - 1, what);
+ if (dead) {
+ if (was_async) {
+ rreq->work.func = netfs_free_request;
+ if (!queue_work(system_unbound_wq, &rreq->work))
+ BUG();
+ } else {
+ netfs_free_request(&rreq->work);
+ }
}
}
}
@@ -112,8 +140,11 @@ struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq
{
struct netfs_io_subrequest *subreq;
- subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL);
+ subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?:
+ sizeof(struct netfs_io_subrequest),
+ GFP_KERNEL);
if (subreq) {
+ INIT_WORK(&subreq->work, NULL);
INIT_LIST_HEAD(&subreq->rreq_link);
refcount_set(&subreq->ref, 2);
subreq->rreq = rreq;
@@ -140,6 +171,8 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
trace_netfs_sreq(subreq, netfs_sreq_trace_free);
+ if (rreq->netfs_ops->free_subrequest)
+ rreq->netfs_ops->free_subrequest(subreq);
kfree(subreq);
netfs_stat_d(&netfs_n_rh_sreq);
netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
diff --git a/fs/netfs/output.c b/fs/netfs/output.c
new file mode 100644
index 000000000000..625eb68f3e5a
--- /dev/null
+++ b/fs/netfs/output.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem high-level write support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include "internal.h"
+
+/**
+ * netfs_create_write_request - Create a write operation.
+ * @wreq: The write request this is storing from.
+ * @dest: The destination type
+ * @start: Start of the region this write will modify
+ * @len: Length of the modification
+ * @worker: The worker function to handle the write(s)
+ *
+ * Allocate a write operation, set it up and add it to the list on a write
+ * request.
+ */
+struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
+ enum netfs_io_source dest,
+ loff_t start, size_t len,
+ work_func_t worker)
+{
+ struct netfs_io_subrequest *subreq;
+
+ subreq = netfs_alloc_subrequest(wreq);
+ if (subreq) {
+ INIT_WORK(&subreq->work, worker);
+ subreq->source = dest;
+ subreq->start = start;
+ subreq->len = len;
+ subreq->debug_index = wreq->subreq_counter++;
+
+ switch (subreq->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload);
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write);
+ break;
+ default:
+ BUG();
+ }
+
+ subreq->io_iter = wreq->io_iter;
+ iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start);
+ iov_iter_truncate(&subreq->io_iter, subreq->len);
+
+ trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+ atomic_inc(&wreq->nr_outstanding);
+ list_add_tail(&subreq->rreq_link, &wreq->subrequests);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+ }
+
+ return subreq;
+}
+EXPORT_SYMBOL(netfs_create_write_request);
+
+/*
+ * Process a completed write request once all the component operations have
+ * been completed.
+ */
+static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_inode *ctx = netfs_inode(wreq->inode);
+ size_t transferred = 0;
+
+ _enter("R=%x[]", wreq->debug_id);
+
+ trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
+
+ list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
+ if (subreq->error || subreq->transferred == 0)
+ break;
+ transferred += subreq->transferred;
+ if (subreq->transferred < subreq->len)
+ break;
+ }
+ wreq->transferred = transferred;
+
+ list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
+ if (!subreq->error)
+ continue;
+ switch (subreq->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ /* Depending on the type of failure, this may prevent
+ * writeback completion unless we're in disconnected
+ * mode.
+ */
+ if (!wreq->error)
+ wreq->error = subreq->error;
+ break;
+
+ case NETFS_WRITE_TO_CACHE:
+ /* Failure doesn't prevent writeback completion unless
+ * we're in disconnected mode.
+ */
+ if (subreq->error != -ENOBUFS)
+ ctx->ops->invalidate_cache(wreq);
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ if (!wreq->error)
+ wreq->error = -EIO;
+ return;
+ }
+ }
+
+ wreq->cleanup(wreq);
+
+ if (wreq->origin == NETFS_DIO_WRITE &&
+ wreq->mapping->nrpages) {
+ pgoff_t first = wreq->start >> PAGE_SHIFT;
+ pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
+ invalidate_inode_pages2_range(wreq->mapping, first, last);
+ }
+
+ if (wreq->origin == NETFS_DIO_WRITE)
+ inode_dio_end(wreq->inode);
+
+ _debug("finished");
+ trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
+ clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
+ wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
+
+ if (wreq->iocb) {
+ wreq->iocb->ki_pos += transferred;
+ if (wreq->iocb->ki_complete)
+ wreq->iocb->ki_complete(
+ wreq->iocb, wreq->error ? wreq->error : transferred);
+ }
+
+ netfs_clear_subrequests(wreq, was_async);
+ netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
+}
+
+/*
+ * Deal with the completion of writing the data to the cache.
+ */
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
+ bool was_async)
+{
+ struct netfs_io_subrequest *subreq = _op;
+ struct netfs_io_request *wreq = subreq->rreq;
+ unsigned int u;
+
+ _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+
+ switch (subreq->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload_done);
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write_done);
+ break;
+ case NETFS_INVALID_WRITE:
+ break;
+ default:
+ BUG();
+ }
+
+ if (IS_ERR_VALUE(transferred_or_error)) {
+ subreq->error = transferred_or_error;
+ trace_netfs_failure(wreq, subreq, transferred_or_error,
+ netfs_fail_write);
+ goto failed;
+ }
+
+ if (WARN(transferred_or_error > subreq->len - subreq->transferred,
+ "Subreq excess write: R%x[%x] %zd > %zu - %zu",
+ wreq->debug_id, subreq->debug_index,
+ transferred_or_error, subreq->len, subreq->transferred))
+ transferred_or_error = subreq->len - subreq->transferred;
+
+ subreq->error = 0;
+ subreq->transferred += transferred_or_error;
+
+ if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
+ pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
+ wreq->debug_id, subreq->debug_index,
+ iov_iter_count(&subreq->io_iter), subreq->len,
+ subreq->transferred, subreq->io_iter.iter_type);
+
+ if (subreq->transferred < subreq->len)
+ goto incomplete;
+
+ __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
+out:
+ trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
+
+ /* If we decrement nr_outstanding to 0, the ref belongs to us. */
+ u = atomic_dec_return(&wreq->nr_outstanding);
+ if (u == 0)
+ netfs_write_terminated(wreq, was_async);
+ else if (u == 1)
+ wake_up_var(&wreq->nr_outstanding);
+
+ netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+ return;
+
+incomplete:
+ if (transferred_or_error == 0) {
+ if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
+ subreq->error = -ENODATA;
+ goto failed;
+ }
+ } else {
+ __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
+ }
+
+ __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
+ set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
+ goto out;
+
+failed:
+ switch (subreq->source) {
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write_failed);
+ set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
+ break;
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload_failed);
+ set_bit(NETFS_RREQ_FAILED, &wreq->flags);
+ wreq->error = subreq->error;
+ break;
+ default:
+ break;
+ }
+ goto out;
+}
+EXPORT_SYMBOL(netfs_write_subrequest_terminated);
+
+static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct netfs_cache_resources *cres = &wreq->cache_resources;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+
+ cres->ops->write(cres, subreq->start, &subreq->io_iter,
+ netfs_write_subrequest_terminated, subreq);
+}
+
+static void netfs_write_to_cache_op_worker(struct work_struct *work)
+{
+ struct netfs_io_subrequest *subreq =
+ container_of(work, struct netfs_io_subrequest, work);
+
+ netfs_write_to_cache_op(subreq);
+}
+
+/**
+ * netfs_queue_write_request - Queue a write request for attention
+ * @subreq: The write request to be queued
+ *
+ * Queue the specified write request for processing by a worker thread. We
+ * pass the caller's ref on the request to the worker thread.
+ */
+void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
+{
+ if (!queue_work(system_unbound_wq, &subreq->work))
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip);
+}
+EXPORT_SYMBOL(netfs_queue_write_request);
+
+/*
+ * Set up a op for writing to the cache.
+ */
+static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
+{
+ struct netfs_cache_resources *cres = &wreq->cache_resources;
+ struct netfs_io_subrequest *subreq;
+ struct netfs_inode *ctx = netfs_inode(wreq->inode);
+ struct fscache_cookie *cookie = netfs_i_cookie(ctx);
+ loff_t start = wreq->start;
+ size_t len = wreq->len;
+ int ret;
+
+ if (!fscache_cookie_enabled(cookie)) {
+ clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags);
+ return;
+ }
+
+ _debug("write to cache");
+ ret = fscache_begin_write_operation(cres, cookie);
+ if (ret < 0)
+ return;
+
+ ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len,
+ i_size_read(wreq->inode), true);
+ if (ret < 0)
+ return;
+
+ subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
+ netfs_write_to_cache_op_worker);
+ if (!subreq)
+ return;
+
+ netfs_write_to_cache_op(subreq);
+}
+
+/*
+ * Begin the process of writing out a chunk of data.
+ *
+ * We are given a write request that holds a series of dirty regions and
+ * (partially) covers a sequence of folios, all of which are present. The
+ * pages must have been marked as writeback as appropriate.
+ *
+ * We need to perform the following steps:
+ *
+ * (1) If encrypting, create an output buffer and encrypt each block of the
+ * data into it, otherwise the output buffer will point to the original
+ * folios.
+ *
+ * (2) If the data is to be cached, set up a write op for the entire output
+ * buffer to the cache, if the cache wants to accept it.
+ *
+ * (3) If the data is to be uploaded (ie. not merely cached):
+ *
+ * (a) If the data is to be compressed, create a compression buffer and
+ * compress the data into it.
+ *
+ * (b) For each destination we want to upload to, set up write ops to write
+ * to that destination. We may need multiple writes if the data is not
+ * contiguous or the span exceeds wsize for a server.
+ */
+int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
+ enum netfs_write_trace what)
+{
+ struct netfs_inode *ctx = netfs_inode(wreq->inode);
+
+ _enter("R=%x %llx-%llx f=%lx",
+ wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
+ wreq->flags);
+
+ trace_netfs_write(wreq, what);
+ if (wreq->len == 0 || wreq->iter.count == 0) {
+ pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
+ return -EIO;
+ }
+
+ if (wreq->origin == NETFS_DIO_WRITE)
+ inode_dio_begin(wreq->inode);
+
+ wreq->io_iter = wreq->iter;
+
+ /* ->outstanding > 0 carries a ref */
+ netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
+ atomic_set(&wreq->nr_outstanding, 1);
+
+ /* Start the encryption/compression going. We can do that in the
+ * background whilst we generate a list of write ops that we want to
+ * perform.
+ */
+ // TODO: Encrypt or compress the region as appropriate
+
+ /* We need to write all of the region to the cache */
+ if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+ netfs_set_up_write_to_cache(wreq);
+
+ /* However, we don't necessarily write all of the region to the server.
+ * Caching of reads is being managed this way also.
+ */
+ if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
+ ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
+
+ if (atomic_dec_and_test(&wreq->nr_outstanding))
+ netfs_write_terminated(wreq, false);
+
+ if (!may_wait)
+ return -EIOCBQUEUED;
+
+ wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+ return wreq->error;
+}
+
+/*
+ * Begin a write operation for writing through the pagecache.
+ */
+struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
+{
+ struct netfs_io_request *wreq;
+ struct file *file = iocb->ki_filp;
+
+ wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len,
+ NETFS_WRITETHROUGH);
+ if (IS_ERR(wreq))
+ return wreq;
+
+ trace_netfs_write(wreq, netfs_write_trace_writethrough);
+
+ __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+ iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0);
+ wreq->io_iter = wreq->iter;
+
+ /* ->outstanding > 0 carries a ref */
+ netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
+ atomic_set(&wreq->nr_outstanding, 1);
+ return wreq;
+}
+
+static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
+{
+ struct netfs_inode *ictx = netfs_inode(wreq->inode);
+ unsigned long long start;
+ size_t len;
+
+ if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
+ return;
+
+ start = wreq->start + wreq->submitted;
+ len = wreq->iter.count - wreq->submitted;
+ if (!final) {
+ len /= wreq->wsize; /* Round to number of maximum packets */
+ len *= wreq->wsize;
+ }
+
+ ictx->ops->create_write_requests(wreq, start, len);
+ wreq->submitted += len;
+}
+
+/*
+ * Advance the state of the write operation used when writing through the
+ * pagecache. Data has been copied into the pagecache that we need to append
+ * to the request. If we've added more than wsize then we need to create a new
+ * subrequest.
+ */
+int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
+{
+ _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
+ wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
+
+ wreq->iter.count += copied;
+ wreq->io_iter.count += copied;
+ if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
+ netfs_submit_writethrough(wreq, false);
+
+ return wreq->error;
+}
+
+/*
+ * End a write operation used when writing through the pagecache.
+ */
+int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
+{
+ int ret = -EIOCBQUEUED;
+
+ _enter("ic=%zu sb=%zu ws=%u",
+ wreq->iter.count, wreq->submitted, wreq->wsize);
+
+ if (wreq->submitted < wreq->io_iter.count)
+ netfs_submit_writethrough(wreq, true);
+
+ if (atomic_dec_and_test(&wreq->nr_outstanding))
+ netfs_write_terminated(wreq, false);
+
+ if (is_sync_kiocb(iocb)) {
+ wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+ ret = wreq->error;
+ }
+
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ return ret;
+}
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index 5510a7a14a40..deeba9f9dcf5 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -9,6 +9,8 @@
#include <linux/seq_file.h>
#include "internal.h"
+atomic_t netfs_n_rh_dio_read;
+atomic_t netfs_n_rh_dio_write;
atomic_t netfs_n_rh_readahead;
atomic_t netfs_n_rh_readpage;
atomic_t netfs_n_rh_rreq;
@@ -27,32 +29,48 @@ atomic_t netfs_n_rh_write_begin;
atomic_t netfs_n_rh_write_done;
atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
+atomic_t netfs_n_wh_wstream_conflict;
+atomic_t netfs_n_wh_upload;
+atomic_t netfs_n_wh_upload_done;
+atomic_t netfs_n_wh_upload_failed;
+atomic_t netfs_n_wh_write;
+atomic_t netfs_n_wh_write_done;
+atomic_t netfs_n_wh_write_failed;
-void netfs_stats_show(struct seq_file *m)
+int netfs_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "RdHelp : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n",
+ seq_printf(m, "Netfs : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n",
+ atomic_read(&netfs_n_rh_dio_read),
+ atomic_read(&netfs_n_rh_dio_write),
atomic_read(&netfs_n_rh_readahead),
atomic_read(&netfs_n_rh_readpage),
atomic_read(&netfs_n_rh_write_begin),
- atomic_read(&netfs_n_rh_write_zskip),
- atomic_read(&netfs_n_rh_rreq),
- atomic_read(&netfs_n_rh_sreq));
- seq_printf(m, "RdHelp : ZR=%u sh=%u sk=%u\n",
+ atomic_read(&netfs_n_rh_write_zskip));
+ seq_printf(m, "Netfs : ZR=%u sh=%u sk=%u\n",
atomic_read(&netfs_n_rh_zero),
atomic_read(&netfs_n_rh_short_read),
atomic_read(&netfs_n_rh_write_zskip));
- seq_printf(m, "RdHelp : DL=%u ds=%u df=%u di=%u\n",
+ seq_printf(m, "Netfs : DL=%u ds=%u df=%u di=%u\n",
atomic_read(&netfs_n_rh_download),
atomic_read(&netfs_n_rh_download_done),
atomic_read(&netfs_n_rh_download_failed),
atomic_read(&netfs_n_rh_download_instead));
- seq_printf(m, "RdHelp : RD=%u rs=%u rf=%u\n",
+ seq_printf(m, "Netfs : RD=%u rs=%u rf=%u\n",
atomic_read(&netfs_n_rh_read),
atomic_read(&netfs_n_rh_read_done),
atomic_read(&netfs_n_rh_read_failed));
- seq_printf(m, "RdHelp : WR=%u ws=%u wf=%u\n",
- atomic_read(&netfs_n_rh_write),
- atomic_read(&netfs_n_rh_write_done),
- atomic_read(&netfs_n_rh_write_failed));
+ seq_printf(m, "Netfs : UL=%u us=%u uf=%u\n",
+ atomic_read(&netfs_n_wh_upload),
+ atomic_read(&netfs_n_wh_upload_done),
+ atomic_read(&netfs_n_wh_upload_failed));
+ seq_printf(m, "Netfs : WR=%u ws=%u wf=%u\n",
+ atomic_read(&netfs_n_wh_write),
+ atomic_read(&netfs_n_wh_write_done),
+ atomic_read(&netfs_n_wh_write_failed));
+ seq_printf(m, "Netfs : rr=%u sr=%u wsc=%u\n",
+ atomic_read(&netfs_n_rh_rreq),
+ atomic_read(&netfs_n_rh_sreq),
+ atomic_read(&netfs_n_wh_wstream_conflict));
+ return fscache_stats_show(m);
}
EXPORT_SYMBOL(netfs_stats_show);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 01ac733a6320..f7e32d76e34d 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -169,8 +169,8 @@ config ROOT_NFS
config NFS_FSCACHE
bool "Provide NFS client caching support"
- depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
- select NETFS_SUPPORT
+ depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y
+ select FSCACHE
help
Say Y here if you want NFS data to be cached locally on disc through
the general filesystem cache manager
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index b05717fe0d4e..2d1bfee225c3 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -274,12 +274,6 @@ static void nfs_netfs_free_request(struct netfs_io_request *rreq)
put_nfs_open_context(rreq->netfs_priv);
}
-static inline int nfs_netfs_begin_cache_operation(struct netfs_io_request *rreq)
-{
- return fscache_begin_read_operation(&rreq->cache_resources,
- netfs_i_cookie(netfs_inode(rreq->inode)));
-}
-
static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq)
{
struct nfs_netfs_io_data *netfs;
@@ -387,7 +381,6 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
const struct netfs_request_ops nfs_netfs_ops = {
.init_request = nfs_netfs_init_request,
.free_request = nfs_netfs_free_request,
- .begin_cache_operation = nfs_netfs_begin_cache_operation,
.issue_read = nfs_netfs_issue_read,
.clamp_length = nfs_netfs_clamp_length
};
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 5407ab8c8783..e3cb4923316b 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -80,7 +80,7 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
}
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
{
- netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops);
+ netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
}
extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 99b0ade833aa..aa8450c9b849 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -430,7 +430,7 @@ static void
cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
- if (inode->i_state & I_PINNING_FSCACHE_WB)
+ if (inode->i_state & I_PINNING_NETFS_WB)
cifs_fscache_unuse_inode_cookie(inode, true);
cifs_fscache_release_inode_cookie(inode);
clear_inode(inode);
@@ -793,8 +793,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- fscache_unpin_writeback(wbc, cifs_inode_cookie(inode));
- return 0;
+ return netfs_unpin_writeback(inode, wbc);
}
static int cifs_drop_inode(struct inode *inode)
@@ -1222,7 +1221,7 @@ static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *s
if (rc < 0)
goto set_failed;
- netfs_resize_file(&src_cifsi->netfs, src_end);
+ netfs_resize_file(&src_cifsi->netfs, src_end, true);
fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end);
return 0;
@@ -1353,7 +1352,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
smb_file_src, smb_file_target, off, len, destoff);
if (rc == 0 && new_size > i_size_read(target_inode)) {
truncate_setsize(target_inode, new_size);
- netfs_resize_file(&target_cifsi->netfs, new_size);
+ netfs_resize_file(&target_cifsi->netfs, new_size, true);
fscache_resize_cookie(cifs_inode_cookie(target_inode),
new_size);
}
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 1b4262aff8fa..3a213432775b 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -5043,27 +5043,13 @@ static void cifs_swap_deactivate(struct file *file)
/* do we need to unpin (or unlock) the file */
}
-/*
- * Mark a page as having been made dirty and thus needing writeback. We also
- * need to pin the cache object to write back to.
- */
-#ifdef CONFIG_CIFS_FSCACHE
-static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
- return fscache_dirty_folio(mapping, folio,
- cifs_inode_cookie(mapping->host));
-}
-#else
-#define cifs_dirty_folio filemap_dirty_folio
-#endif
-
const struct address_space_operations cifs_addr_ops = {
.read_folio = cifs_read_folio,
.readahead = cifs_readahead,
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .dirty_folio = cifs_dirty_folio,
+ .dirty_folio = netfs_dirty_folio,
.release_folio = cifs_release_folio,
.direct_IO = cifs_direct_io,
.invalidate_folio = cifs_invalidate_folio,
@@ -5087,7 +5073,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .dirty_folio = cifs_dirty_folio,
+ .dirty_folio = netfs_dirty_folio,
.release_folio = cifs_release_folio,
.invalidate_folio = cifs_invalidate_folio,
.launder_folio = cifs_launder_folio,
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index e5cad149f5a2..c4a3cb736881 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -180,7 +180,7 @@ static int fscache_fallback_write_pages(struct inode *inode, loff_t start, size_
if (ret < 0)
return ret;
- ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode),
+ ret = cres.ops->prepare_write(&cres, &start, &len, len, i_size_read(inode),
no_space_allocated_yet);
if (ret == 0)
ret = fscache_write(&cres, start, &iter, NULL, NULL);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e6ba0cc6f2ee..ed5966a70495 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2371,7 +2371,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
#define I_CREATING (1 << 15)
#define I_DONTCACHE (1 << 16)
#define I_SYNC_QUEUED (1 << 17)
-#define I_PINNING_FSCACHE_WB (1 << 18)
+#define I_PINNING_NETFS_WB (1 << 18)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index a174cedf4d90..bdf7f3eddf0a 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -189,17 +189,20 @@ extern atomic_t fscache_n_write;
extern atomic_t fscache_n_no_write_space;
extern atomic_t fscache_n_no_create_space;
extern atomic_t fscache_n_culled;
+extern atomic_t fscache_n_dio_misfit;
#define fscache_count_read() atomic_inc(&fscache_n_read)
#define fscache_count_write() atomic_inc(&fscache_n_write)
#define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space)
#define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space)
#define fscache_count_culled() atomic_inc(&fscache_n_culled)
+#define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit)
#else
#define fscache_count_read() do {} while(0)
#define fscache_count_write() do {} while(0)
#define fscache_count_no_write_space() do {} while(0)
#define fscache_count_no_create_space() do {} while(0)
#define fscache_count_culled() do {} while(0)
+#define fscache_count_dio_misfit() do {} while(0)
#endif
#endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 8e312c8323a8..6e8562cbcc43 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -437,9 +437,6 @@ const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_r
* indicates the cache resources to which the operation state should be
* attached; @cookie indicates the cache object that will be accessed.
*
- * This is intended to be called from the ->begin_cache_operation() netfs lib
- * operation as implemented by the network filesystem.
- *
* @cres->inval_counter is set from @cookie->inval_counter for comparison at
* the end of the operation. This allows invalidation during the operation to
* be detected by the caller.
@@ -629,48 +626,6 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
}
-#if __fscache_available
-bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
- struct fscache_cookie *cookie);
-#else
-#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \
- filemap_dirty_folio(MAPPING, FOLIO)
-#endif
-
-/**
- * fscache_unpin_writeback - Unpin writeback resources
- * @wbc: The writeback control
- * @cookie: The cookie referring to the cache object
- *
- * Unpin the writeback resources pinned by fscache_dirty_folio(). This is
- * intended to be called by the netfs's ->write_inode() method.
- */
-static inline void fscache_unpin_writeback(struct writeback_control *wbc,
- struct fscache_cookie *cookie)
-{
- if (wbc->unpinned_fscache_wb)
- fscache_unuse_cookie(cookie, NULL, NULL);
-}
-
-/**
- * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode
- * @cookie: The cookie referring to the cache object
- * @inode: The inode to clean up
- * @aux: Auxiliary data to apply to the inode
- *
- * Clear any writeback resources held by an inode when the inode is evicted.
- * This must be called before clear_inode() is called.
- */
-static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
- struct inode *inode,
- const void *aux)
-{
- if (inode->i_state & I_PINNING_FSCACHE_WB) {
- loff_t i_size = i_size_read(inode);
- fscache_unuse_cookie(cookie, aux, &i_size);
- }
-}
-
/**
* fscache_note_page_release - Note that a netfs page got released
* @cookie: The cookie corresponding to the file
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index b11a84f6c32b..100cbb261269 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -109,11 +109,18 @@ static inline int wait_on_page_fscache_killable(struct page *page)
return folio_wait_private_2_killable(page_folio(page));
}
+/* Marks used on xarray-based buffers */
+#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */
+#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */
+
enum netfs_io_source {
NETFS_FILL_WITH_ZEROES,
NETFS_DOWNLOAD_FROM_SERVER,
NETFS_READ_FROM_CACHE,
NETFS_INVALID_READ,
+ NETFS_UPLOAD_TO_SERVER,
+ NETFS_WRITE_TO_CACHE,
+ NETFS_INVALID_WRITE,
} __mode(byte);
typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
@@ -129,9 +136,57 @@ struct netfs_inode {
struct fscache_cookie *cache;
#endif
loff_t remote_i_size; /* Size of the remote file */
+ loff_t zero_point; /* Size after which we assume there's no data
+ * on the server */
+ unsigned long flags;
+#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
+#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
+#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
+#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */
+};
+
+/*
+ * A netfs group - for instance a ceph snap. This is marked on dirty pages and
+ * pages marked with a group must be flushed before they can be written under
+ * the domain of another group.
+ */
+struct netfs_group {
+ refcount_t ref;
+ void (*free)(struct netfs_group *netfs_group);
};
/*
+ * Information about a dirty page (attached only if necessary).
+ * folio->private
+ */
+struct netfs_folio {
+ struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */
+ unsigned int dirty_offset; /* Write-streaming dirty data offset */
+ unsigned int dirty_len; /* Write-streaming dirty data length */
+};
+#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
+
+static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
+{
+ void *priv = folio_get_private(folio);
+
+ if ((unsigned long)priv & NETFS_FOLIO_INFO)
+ return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
+ return NULL;
+}
+
+static inline struct netfs_group *netfs_folio_group(struct folio *folio)
+{
+ struct netfs_folio *finfo;
+ void *priv = folio_get_private(folio);
+
+ finfo = netfs_folio_info(folio);
+ if (finfo)
+ return finfo->netfs_group;
+ return priv;
+}
+
+/*
* Resources required to do operations on a cache.
*/
struct netfs_cache_resources {
@@ -143,17 +198,24 @@ struct netfs_cache_resources {
};
/*
- * Descriptor for a single component subrequest.
+ * Descriptor for a single component subrequest. Each operation represents an
+ * individual read/write from/to a server, a cache, a journal, etc..
+ *
+ * The buffer iterator is persistent for the life of the subrequest struct and
+ * the pages it points to can be relied on to exist for the duration.
*/
struct netfs_io_subrequest {
struct netfs_io_request *rreq; /* Supervising I/O request */
+ struct work_struct work;
struct list_head rreq_link; /* Link in rreq->subrequests */
+ struct iov_iter io_iter; /* Iterator for this subrequest */
loff_t start; /* Where to start the I/O */
size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */
refcount_t ref;
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
+ unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */
enum netfs_io_source source; /* Where to read from/write to */
unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
@@ -168,6 +230,13 @@ enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
+ NETFS_WRITEBACK, /* This write was triggered by writepages */
+ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
+ NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */
+ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
+ NETFS_DIO_READ, /* This is a direct I/O read */
+ NETFS_DIO_WRITE, /* This is a direct I/O write */
+ nr__netfs_io_origin
} __mode(byte);
/*
@@ -175,19 +244,34 @@ enum netfs_io_origin {
* operations to a variety of data stores and then stitch the result together.
*/
struct netfs_io_request {
- struct work_struct work;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
struct inode *inode; /* The file being accessed */
struct address_space *mapping; /* The mapping being accessed */
+ struct kiocb *iocb; /* AIO completion vector */
struct netfs_cache_resources cache_resources;
+ struct list_head proc_link; /* Link in netfs_iorequests */
struct list_head subrequests; /* Contributory I/O operations */
+ struct iov_iter iter; /* Unencrypted-side iterator */
+ struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
void *netfs_priv; /* Private data for the netfs */
+ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
+ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
unsigned int debug_id;
+ unsigned int rsize; /* Maximum read size (0 for none) */
+ unsigned int wsize; /* Maximum write size (0 for none) */
+ unsigned int subreq_counter; /* Next subreq->debug_index */
atomic_t nr_outstanding; /* Number of ops in progress */
atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
size_t submitted; /* Amount submitted for I/O so far */
size_t len; /* Length of the request */
+ size_t upper_len; /* Length can be extended to here */
+ size_t transferred; /* Amount to be indicated as transferred */
short error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */
+ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
loff_t i_size; /* Size of the file */
loff_t start; /* Start position */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
@@ -199,17 +283,25 @@ struct netfs_io_request {
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
+#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */
+#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
+#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
+#define NETFS_RREQ_BLOCKED 10 /* We blocked */
const struct netfs_request_ops *netfs_ops;
+ void (*cleanup)(struct netfs_io_request *req);
};
/*
* Operations the network filesystem can/must provide to the helpers.
*/
struct netfs_request_ops {
+ unsigned int io_request_size; /* Alloc size for netfs_io_request struct */
+ unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */
int (*init_request)(struct netfs_io_request *rreq, struct file *file);
void (*free_request)(struct netfs_io_request *rreq);
- int (*begin_cache_operation)(struct netfs_io_request *rreq);
+ void (*free_subrequest)(struct netfs_io_subrequest *rreq);
+ /* Read request handling */
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
@@ -217,6 +309,14 @@ struct netfs_request_ops {
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
+
+ /* Modification handling */
+ void (*update_i_size)(struct inode *inode, loff_t i_size);
+
+ /* Write request handling */
+ void (*create_write_requests)(struct netfs_io_request *wreq,
+ loff_t start, size_t len);
+ void (*invalidate_cache)(struct netfs_io_request *wreq);
};
/*
@@ -229,8 +329,7 @@ enum netfs_read_from_hole {
};
/*
- * Table of operations for access to a cache. This is obtained by
- * rreq->ops->begin_cache_operation().
+ * Table of operations for access to a cache.
*/
struct netfs_cache_ops {
/* End an operation */
@@ -265,8 +364,8 @@ struct netfs_cache_ops {
* actually do.
*/
int (*prepare_write)(struct netfs_cache_resources *cres,
- loff_t *_start, size_t *_len, loff_t i_size,
- bool no_space_allocated_yet);
+ loff_t *_start, size_t *_len, size_t upper_len,
+ loff_t i_size, bool no_space_allocated_yet);
/* Prepare an on-demand read operation, shortening it to a cached/uncached
* boundary as appropriate.
@@ -284,22 +383,62 @@ struct netfs_cache_ops {
loff_t *_data_start, size_t *_data_len);
};
+/* High-level read API. */
+ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+
+/* High-level write API */
+ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct netfs_group *netfs_group);
+ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
+ struct netfs_group *netfs_group);
+ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+
+/* Address operations API */
struct readahead_control;
void netfs_readahead(struct readahead_control *);
int netfs_read_folio(struct file *, struct folio *);
int netfs_write_begin(struct netfs_inode *, struct file *,
- struct address_space *, loff_t pos, unsigned int len,
- struct folio **, void **fsdata);
-
+ struct address_space *, loff_t pos, unsigned int len,
+ struct folio **, void **fsdata);
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
+bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
+int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
+void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
+void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
+bool netfs_release_folio(struct folio *folio, gfp_t gfp);
+int netfs_launder_folio(struct folio *folio);
+
+/* VMA operations API. */
+vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
+
+/* (Sub)request management API. */
void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what);
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
bool was_async, enum netfs_sreq_ref_trace what);
-void netfs_stats_show(struct seq_file *);
ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
struct iov_iter *new,
iov_iter_extraction_t extraction_flags);
+size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs);
+struct netfs_io_subrequest *netfs_create_write_request(
+ struct netfs_io_request *wreq, enum netfs_io_source dest,
+ loff_t start, size_t len, work_func_t worker);
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
+ bool was_async);
+void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
+
+int netfs_start_io_read(struct inode *inode);
+void netfs_end_io_read(struct inode *inode);
+int netfs_start_io_write(struct inode *inode);
+void netfs_end_io_write(struct inode *inode);
+int netfs_start_io_direct(struct inode *inode);
+void netfs_end_io_direct(struct inode *inode);
/**
* netfs_inode - Get the netfs inode context from the inode
@@ -317,30 +456,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
* netfs_inode_init - Initialise a netfslib inode context
* @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
+ * @use_zero_point: True to use the zero_point read optimisation
*
* Initialise the netfs library context struct. This is expected to follow on
* directly from the VFS inode struct.
*/
static inline void netfs_inode_init(struct netfs_inode *ctx,
- const struct netfs_request_ops *ops)
+ const struct netfs_request_ops *ops,
+ bool use_zero_point)
{
ctx->ops = ops;
ctx->remote_i_size = i_size_read(&ctx->inode);
+ ctx->zero_point = LLONG_MAX;
+ ctx->flags = 0;
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
+ /* ->releasepage() drives zero_point */
+ if (use_zero_point) {
+ ctx->zero_point = ctx->remote_i_size;
+ mapping_set_release_always(ctx->inode.i_mapping);
+ }
}
/**
* netfs_resize_file - Note that a file got resized
* @ctx: The netfs inode being resized
* @new_i_size: The new file size
+ * @changed_on_server: The change was applied to the server
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
-static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
+static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
+ bool changed_on_server)
{
- ctx->remote_i_size = new_i_size;
+ if (changed_on_server)
+ ctx->remote_i_size = new_i_size;
+ if (new_i_size < ctx->zero_point)
+ ctx->zero_point = new_i_size;
}
/**
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 6d0a14f7019d..453736fd1d23 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -60,7 +60,7 @@ struct writeback_control {
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
- unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */
+ unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */
/*
* When writeback IOs are bounced through async layers, only the
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5194b7e6dc8d..8d73171cb9f0 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -902,37 +902,6 @@ TRACE_EVENT(afs_dir_check_failed,
__entry->vnode, __entry->off, __entry->i_size)
);
-TRACE_EVENT(afs_folio_dirty,
- TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
-
- TP_ARGS(vnode, where, folio),
-
- TP_STRUCT__entry(
- __field(struct afs_vnode *, vnode)
- __field(const char *, where)
- __field(pgoff_t, index)
- __field(unsigned long, from)
- __field(unsigned long, to)
- ),
-
- TP_fast_assign(
- unsigned long priv = (unsigned long)folio_get_private(folio);
- __entry->vnode = vnode;
- __entry->where = where;
- __entry->index = folio_index(folio);
- __entry->from = afs_folio_dirty_from(folio, priv);
- __entry->to = afs_folio_dirty_to(folio, priv);
- __entry->to |= (afs_is_folio_dirty_mmapped(priv) ?
- (1UL << (BITS_PER_LONG - 1)) : 0);
- ),
-
- TP_printk("vn=%p %lx %s %lx-%lx%s",
- __entry->vnode, __entry->index, __entry->where,
- __entry->from,
- __entry->to & ~(1UL << (BITS_PER_LONG - 1)),
- __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
- );
-
TRACE_EVENT(afs_call_state,
TP_PROTO(struct afs_call *call,
enum afs_call_state from,
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index beec534cbaab..447a8c21cf57 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -16,34 +16,57 @@
* Define enums for tracing information.
*/
#define netfs_read_traces \
+ EM(netfs_read_trace_dio_read, "DIO-READ ") \
EM(netfs_read_trace_expanded, "EXPANDED ") \
EM(netfs_read_trace_readahead, "READAHEAD") \
EM(netfs_read_trace_readpage, "READPAGE ") \
+ EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \
E_(netfs_read_trace_write_begin, "WRITEBEGN")
+#define netfs_write_traces \
+ EM(netfs_write_trace_dio_write, "DIO-WRITE") \
+ EM(netfs_write_trace_launder, "LAUNDER ") \
+ EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \
+ EM(netfs_write_trace_writeback, "WRITEBACK") \
+ E_(netfs_write_trace_writethrough, "WRITETHRU")
+
#define netfs_rreq_origins \
EM(NETFS_READAHEAD, "RA") \
EM(NETFS_READPAGE, "RP") \
- E_(NETFS_READ_FOR_WRITE, "RW")
+ EM(NETFS_READ_FOR_WRITE, "RW") \
+ EM(NETFS_WRITEBACK, "WB") \
+ EM(NETFS_WRITETHROUGH, "WT") \
+ EM(NETFS_LAUNDER_WRITE, "LW") \
+ EM(NETFS_UNBUFFERED_WRITE, "UW") \
+ EM(NETFS_DIO_READ, "DR") \
+ E_(NETFS_DIO_WRITE, "DW")
#define netfs_rreq_traces \
EM(netfs_rreq_trace_assess, "ASSESS ") \
EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_free, "FREE ") \
+ EM(netfs_rreq_trace_redirty, "REDIRTY") \
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
- E_(netfs_rreq_trace_unmark, "UNMARK ")
+ EM(netfs_rreq_trace_unmark, "UNMARK ") \
+ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
+ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
+ E_(netfs_rreq_trace_write_done, "WR-DONE")
#define netfs_sreq_sources \
EM(NETFS_FILL_WITH_ZEROES, "ZERO") \
EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \
EM(NETFS_READ_FROM_CACHE, "READ") \
- E_(NETFS_INVALID_READ, "INVL") \
+ EM(NETFS_INVALID_READ, "INVL") \
+ EM(NETFS_UPLOAD_TO_SERVER, "UPLD") \
+ EM(NETFS_WRITE_TO_CACHE, "WRIT") \
+ E_(NETFS_INVALID_WRITE, "INVL")
#define netfs_sreq_traces \
EM(netfs_sreq_trace_download_instead, "RDOWN") \
EM(netfs_sreq_trace_free, "FREE ") \
+ EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_prepare, "PREP ") \
EM(netfs_sreq_trace_resubmit_short, "SHORT") \
EM(netfs_sreq_trace_submit, "SUBMT") \
@@ -55,19 +78,24 @@
#define netfs_failures \
EM(netfs_fail_check_write_begin, "check-write-begin") \
EM(netfs_fail_copy_to_cache, "copy-to-cache") \
+ EM(netfs_fail_dio_read_short, "dio-read-short") \
+ EM(netfs_fail_dio_read_zero, "dio-read-zero") \
EM(netfs_fail_read, "read") \
EM(netfs_fail_short_read, "short-read") \
- E_(netfs_fail_prepare_write, "prep-write")
+ EM(netfs_fail_prepare_write, "prep-write") \
+ E_(netfs_fail_write, "write")
#define netfs_rreq_ref_traces \
- EM(netfs_rreq_trace_get_hold, "GET HOLD ") \
+ EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \
EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \
EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \
EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \
EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \
- EM(netfs_rreq_trace_put_hold, "PUT HOLD ") \
+ EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \
+ EM(netfs_rreq_trace_put_return, "PUT RETURN ") \
EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \
- EM(netfs_rreq_trace_put_zero_len, "PUT ZEROLEN") \
+ EM(netfs_rreq_trace_put_work, "PUT WORK ") \
+ EM(netfs_rreq_trace_see_work, "SEE WORK ") \
E_(netfs_rreq_trace_new, "NEW ")
#define netfs_sreq_ref_traces \
@@ -76,11 +104,44 @@
EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \
EM(netfs_sreq_trace_new, "NEW ") \
EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \
+ EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \
EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \
EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \
EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \
+ EM(netfs_sreq_trace_put_wip, "PUT WIP ") \
+ EM(netfs_sreq_trace_put_work, "PUT WORK ") \
E_(netfs_sreq_trace_put_terminated, "PUT TERM ")
+#define netfs_folio_traces \
+ /* The first few correspond to enum netfs_how_to_modify */ \
+ EM(netfs_folio_is_uptodate, "mod-uptodate") \
+ EM(netfs_just_prefetch, "mod-prefetch") \
+ EM(netfs_whole_folio_modify, "mod-whole-f") \
+ EM(netfs_modify_and_clear, "mod-n-clear") \
+ EM(netfs_streaming_write, "mod-streamw") \
+ EM(netfs_streaming_write_cont, "mod-streamw+") \
+ EM(netfs_flush_content, "flush") \
+ EM(netfs_streaming_filled_page, "mod-streamw-f") \
+ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \
+ /* The rest are for writeback */ \
+ EM(netfs_folio_trace_clear, "clear") \
+ EM(netfs_folio_trace_clear_s, "clear-s") \
+ EM(netfs_folio_trace_clear_g, "clear-g") \
+ EM(netfs_folio_trace_copy_to_cache, "copy") \
+ EM(netfs_folio_trace_end_copy, "end-copy") \
+ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \
+ EM(netfs_folio_trace_kill, "kill") \
+ EM(netfs_folio_trace_launder, "launder") \
+ EM(netfs_folio_trace_mkwrite, "mkwrite") \
+ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \
+ EM(netfs_folio_trace_read_gaps, "read-gaps") \
+ EM(netfs_folio_trace_redirty, "redirty") \
+ EM(netfs_folio_trace_redirtied, "redirtied") \
+ EM(netfs_folio_trace_store, "store") \
+ EM(netfs_folio_trace_store_plus, "store+") \
+ EM(netfs_folio_trace_wthru, "wthru") \
+ E_(netfs_folio_trace_wthru_plus, "wthru+")
+
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
@@ -90,11 +151,13 @@
#define E_(a, b) a
enum netfs_read_trace { netfs_read_traces } __mode(byte);
+enum netfs_write_trace { netfs_write_traces } __mode(byte);
enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte);
enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte);
enum netfs_failure { netfs_failures } __mode(byte);
enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte);
enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
+enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
#endif
@@ -107,6 +170,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
#define E_(a, b) TRACE_DEFINE_ENUM(a);
netfs_read_traces;
+netfs_write_traces;
netfs_rreq_origins;
netfs_rreq_traces;
netfs_sreq_sources;
@@ -114,6 +178,7 @@ netfs_sreq_traces;
netfs_failures;
netfs_rreq_ref_traces;
netfs_sreq_ref_traces;
+netfs_folio_traces;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -314,6 +379,82 @@ TRACE_EVENT(netfs_sreq_ref,
__entry->ref)
);
+TRACE_EVENT(netfs_folio,
+ TP_PROTO(struct folio *folio, enum netfs_folio_trace why),
+
+ TP_ARGS(folio, why),
+
+ TP_STRUCT__entry(
+ __field(ino_t, ino)
+ __field(pgoff_t, index)
+ __field(unsigned int, nr)
+ __field(enum netfs_folio_trace, why)
+ ),
+
+ TP_fast_assign(
+ __entry->ino = folio->mapping->host->i_ino;
+ __entry->why = why;
+ __entry->index = folio_index(folio);
+ __entry->nr = folio_nr_pages(folio);
+ ),
+
+ TP_printk("i=%05lx ix=%05lx-%05lx %s",
+ __entry->ino, __entry->index, __entry->index + __entry->nr - 1,
+ __print_symbolic(__entry->why, netfs_folio_traces))
+ );
+
+TRACE_EVENT(netfs_write_iter,
+ TP_PROTO(const struct kiocb *iocb, const struct iov_iter *from),
+
+ TP_ARGS(iocb, from),
+
+ TP_STRUCT__entry(
+ __field(unsigned long long, start )
+ __field(size_t, len )
+ __field(unsigned int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->start = iocb->ki_pos;
+ __entry->len = iov_iter_count(from);
+ __entry->flags = iocb->ki_flags;
+ ),
+
+ TP_printk("WRITE-ITER s=%llx l=%zx f=%x",
+ __entry->start, __entry->len, __entry->flags)
+ );
+
+TRACE_EVENT(netfs_write,
+ TP_PROTO(const struct netfs_io_request *wreq,
+ enum netfs_write_trace what),
+
+ TP_ARGS(wreq, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(unsigned int, cookie )
+ __field(enum netfs_write_trace, what )
+ __field(unsigned long long, start )
+ __field(size_t, len )
+ ),
+
+ TP_fast_assign(
+ struct netfs_inode *__ctx = netfs_inode(wreq->inode);
+ struct fscache_cookie *__cookie = netfs_i_cookie(__ctx);
+ __entry->wreq = wreq->debug_id;
+ __entry->cookie = __cookie ? __cookie->debug_id : 0;
+ __entry->what = what;
+ __entry->start = wreq->start;
+ __entry->len = wreq->len;
+ ),
+
+ TP_printk("R=%08x %s c=%08x by=%llx-%llx",
+ __entry->wreq,
+ __print_symbolic(__entry->what, netfs_write_traces),
+ __entry->cookie,
+ __entry->start, __entry->start + __entry->len - 1)
+ );
+
#undef EM
#undef E_
#endif /* _TRACE_NETFS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index ea49677c6338..750e779c23db 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2688,6 +2688,7 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
return filemap_write_and_wait_range(mapping, pos, end);
}
+EXPORT_SYMBOL_GPL(kiocb_write_and_wait);
int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
{
@@ -2715,6 +2716,7 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
end >> PAGE_SHIFT);
}
+EXPORT_SYMBOL_GPL(kiocb_invalidate_pages);
/**
* generic_file_read_iter - generic filesystem read routine