summaryrefslogtreecommitdiff
path: root/fs/netfs/direct_read.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2024-12-16 20:41:17 +0000
committerChristian Brauner <brauner@kernel.org>2024-12-20 22:34:08 +0100
commite2d46f2ec332533816417b60933954173f602121 (patch)
tree53b798c049ae403a978b3a81739a3aa13345b075 /fs/netfs/direct_read.c
parenteddf51f2bb2c28b082199c6f5fd95611ca511135 (diff)
netfs: Change the read result collector to only use one work item
Change the way netfslib collects read results to do all the collection for a particular read request using a single work item that walks along the subrequest queue as subrequests make progress or complete, unlocking folios progressively rather than doing the unlock in parallel as parallel requests come in. The code is remodelled to be more like the write-side code, though only using a single stream. This makes it more directly comparable and thus easier to duplicate fixes between the two sides. This has a number of advantages: (1) It's simpler. There doesn't need to be a complex donation mechanism to handle mismatches between the size and alignment of subrequests and folios. The collector unlocks folios as the subrequests covering each complete. (2) It should cause less scheduler overhead as there's a single work item in play unlocking pages in parallel when a read gets split up into a lot of subrequests instead of one per subrequest. Whilst the parallellism is nice in theory, in practice, the vast majority of loads are sequential reads of the whole file, so committing a bunch of threads to unlocking folios out of order doesn't help in those cases. (3) It should make it easier to implement content decryption. A folio cannot be decrypted until all the requests that contribute to it have completed - and, again, most loads are sequential and so, most of the time, we want to begin decryption sequentially (though it's great if the decryption can happen in parallel). There is a disadvantage in that we're losing the ability to decrypt and unlock things on an as-things-arrive basis which may affect some applications. Signed-off-by: David Howells <dhowells@redhat.com> Link: https://lore.kernel.org/r/20241216204124.3752367-28-dhowells@redhat.com cc: Jeff Layton <jlayton@kernel.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/netfs/direct_read.c')
-rw-r--r--fs/netfs/direct_read.c60
1 files changed, 34 insertions, 26 deletions
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 1a20cc3979c7..0bf3c2f5a710 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -47,12 +47,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
*/
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
- atomic_set(&rreq->nr_outstanding, 1);
-
do {
struct netfs_io_subrequest *subreq;
ssize_t slice;
@@ -67,11 +66,18 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
- atomic_inc(&rreq->nr_outstanding);
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
spin_lock(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated = rreq->prev_donated;
- rreq->prev_donated = 0;
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
+ stream->front = subreq;
+ if (!stream->active) {
+ stream->collected_to = stream->front->start;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ }
+ }
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
spin_unlock(&rreq->lock);
@@ -79,7 +85,6 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- atomic_dec(&rreq->nr_outstanding);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
break;
}
@@ -87,20 +92,32 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
netfs_prepare_dio_read_iterator(subreq);
slice = subreq->len;
- rreq->netfs_ops->issue_read(subreq);
-
size -= slice;
start += slice;
rreq->submitted += slice;
+ if (size <= 0) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+
+ rreq->netfs_ops->issue_read(subreq);
+ if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ netfs_wait_for_pause(rreq);
+ if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
+ break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
cond_resched();
} while (size > 0);
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq);
+ if (unlikely(size > 0)) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ netfs_wake_read_collector(rreq);
+ }
+
return ret;
}
@@ -133,21 +150,10 @@ static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
goto out;
}
- if (sync) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
-
- ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len &&
- rreq->origin != NETFS_DIO_READ) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
- } else {
+ if (sync)
+ ret = netfs_wait_for_read(rreq);
+ else
ret = -EIOCBQUEUED;
- }
-
out:
_leave(" = %d", ret);
return ret;
@@ -215,8 +221,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
// TODO: Set up bounce buffer if needed
- if (!sync)
+ if (!sync) {
rreq->iocb = iocb;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
+ }
ret = netfs_unbuffered_read(rreq, sync);
if (ret < 0)