summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoanne Koong <joannelkoong@gmail.com>2025-09-25 17:26:02 -0700
committerChristian Brauner <brauner@kernel.org>2025-11-05 12:57:23 +0100
commitd43558ae67299266ea54715773610fdec4291c90 (patch)
tree77124e7c54af419a971e12067ce838eba560aa3c
parent87a13819dde34e8af247744e979583a31d353244 (diff)
iomap: track pending read bytes more optimally
Instead of incrementing read_bytes_pending for every folio range read in (which requires acquiring the spinlock to do so), set read_bytes_pending to the folio size when the first range is asynchronously read in, keep track of how many bytes total are asynchronously read in, and adjust read_bytes_pending accordingly after issuing requests to read in all the necessary ranges. iomap_read_folio_ctx->cur_folio_in_bio can be removed since a non-zero value for pending bytes necessarily indicates the folio is in the bio. Signed-off-by: Joanne Koong <joannelkoong@gmail.com> Suggested-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/iomap/buffered-io.c123
1 files changed, 102 insertions, 21 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 09e65771a947..f8b17ce549eb 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -362,7 +362,6 @@ static void iomap_read_end_io(struct bio *bio)
struct iomap_read_folio_ctx {
struct folio *cur_folio;
- bool cur_folio_in_bio;
void *read_ctx;
struct readahead_control *rac;
};
@@ -380,19 +379,11 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
{
struct folio *folio = ctx->cur_folio;
const struct iomap *iomap = &iter->iomap;
- struct iomap_folio_state *ifs = folio->private;
size_t poff = offset_in_folio(folio, pos);
loff_t length = iomap_length(iter);
sector_t sector;
struct bio *bio = ctx->read_ctx;
- ctx->cur_folio_in_bio = true;
- if (ifs) {
- spin_lock_irq(&ifs->state_lock);
- ifs->read_bytes_pending += plen;
- spin_unlock_irq(&ifs->state_lock);
- }
-
sector = iomap_sector(iomap, pos);
if (!bio || bio_end_sector(bio) != sector ||
!bio_add_folio(bio, folio, plen, poff)) {
@@ -422,8 +413,93 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
}
}
+static void iomap_read_init(struct folio *folio)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (ifs) {
+ size_t len = folio_size(folio);
+
+ /*
+ * ifs->read_bytes_pending is used to track how many bytes are
+ * read in asynchronously by the IO helper. We need to track
+ * this so that we can know when the IO helper has finished
+ * reading in all the necessary ranges of the folio and can end
+ * the read.
+ *
+ * Increase ->read_bytes_pending by the folio size to start, and
+ * add a +1 bias. We'll subtract the bias and any uptodate /
+ * zeroed ranges that did not require IO in iomap_read_end()
+ * after we're done processing the folio.
+ *
+ * We do this because otherwise, we would have to increment
+ * ifs->read_bytes_pending every time a range in the folio needs
+ * to be read in, which can get expensive since the spinlock
+ * needs to be held whenever modifying ifs->read_bytes_pending.
+ *
+ * We add the bias to ensure the read has not been ended on the
+ * folio when iomap_read_end() is called, even if the IO helper
+ * has already finished reading in the entire folio.
+ */
+ spin_lock_irq(&ifs->state_lock);
+ ifs->read_bytes_pending += len + 1;
+ spin_unlock_irq(&ifs->state_lock);
+ }
+}
+
+/*
+ * This ends IO if no bytes were submitted to an IO helper.
+ *
+ * Otherwise, this calibrates ifs->read_bytes_pending to represent only the
+ * submitted bytes (see comment in iomap_read_init()). If all bytes submitted
+ * have already been completed by the IO helper, then this will end the read.
+ * Else the IO helper will end the read after all submitted ranges have been
+ * read.
+ */
+static void iomap_read_end(struct folio *folio, size_t bytes_pending)
+{
+ struct iomap_folio_state *ifs;
+
+ /*
+ * If there are no bytes pending, this means we are responsible for
+ * unlocking the folio here, since no IO helper has taken ownership of
+ * it.
+ */
+ if (!bytes_pending) {
+ folio_unlock(folio);
+ return;
+ }
+
+ ifs = folio->private;
+ if (ifs) {
+ bool end_read, uptodate;
+ /*
+ * Subtract any bytes that were initially accounted to
+ * read_bytes_pending but skipped for IO.
+ * The +1 accounts for the bias we added in iomap_read_init().
+ */
+ size_t bytes_accounted = folio_size(folio) + 1 -
+ bytes_pending;
+
+ spin_lock_irq(&ifs->state_lock);
+ ifs->read_bytes_pending -= bytes_accounted;
+ /*
+ * If !ifs->read_bytes_pending, this means all pending reads
+ * by the IO helper have already completed, which means we need
+ * to end the folio read here. If ifs->read_bytes_pending != 0,
+ * the IO helper will end the folio read.
+ */
+ end_read = !ifs->read_bytes_pending;
+ if (end_read)
+ uptodate = ifs_is_fully_uptodate(folio, ifs);
+ spin_unlock_irq(&ifs->state_lock);
+ if (end_read)
+ folio_end_read(folio, uptodate);
+ }
+}
+
static int iomap_read_folio_iter(struct iomap_iter *iter,
- struct iomap_read_folio_ctx *ctx)
+ struct iomap_read_folio_ctx *ctx, size_t *bytes_pending)
{
const struct iomap *iomap = &iter->iomap;
loff_t pos = iter->pos;
@@ -460,6 +536,9 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
folio_zero_range(folio, poff, plen);
iomap_set_range_uptodate(folio, poff, plen);
} else {
+ if (!*bytes_pending)
+ iomap_read_init(folio);
+ *bytes_pending += plen;
iomap_bio_read_folio_range(iter, ctx, pos, plen);
}
@@ -482,17 +561,18 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
struct iomap_read_folio_ctx ctx = {
.cur_folio = folio,
};
+ size_t bytes_pending = 0;
int ret;
trace_iomap_readpage(iter.inode, 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.status = iomap_read_folio_iter(&iter, &ctx);
+ iter.status = iomap_read_folio_iter(&iter, &ctx,
+ &bytes_pending);
iomap_bio_submit_read(&ctx);
- if (!ctx.cur_folio_in_bio)
- folio_unlock(folio);
+ iomap_read_end(folio, bytes_pending);
/*
* Just like mpage_readahead and block_read_full_folio, we always
@@ -504,24 +584,23 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
EXPORT_SYMBOL_GPL(iomap_read_folio);
static int iomap_readahead_iter(struct iomap_iter *iter,
- struct iomap_read_folio_ctx *ctx)
+ struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending)
{
int ret;
while (iomap_length(iter)) {
if (ctx->cur_folio &&
offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
- if (!ctx->cur_folio_in_bio)
- folio_unlock(ctx->cur_folio);
+ iomap_read_end(ctx->cur_folio, *cur_bytes_pending);
ctx->cur_folio = NULL;
}
if (!ctx->cur_folio) {
ctx->cur_folio = readahead_folio(ctx->rac);
if (WARN_ON_ONCE(!ctx->cur_folio))
return -EINVAL;
- ctx->cur_folio_in_bio = false;
+ *cur_bytes_pending = 0;
}
- ret = iomap_read_folio_iter(iter, ctx);
+ ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending);
if (ret)
return ret;
}
@@ -554,16 +633,18 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
struct iomap_read_folio_ctx ctx = {
.rac = rac,
};
+ size_t cur_bytes_pending;
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
while (iomap_iter(&iter, ops) > 0)
- iter.status = iomap_readahead_iter(&iter, &ctx);
+ iter.status = iomap_readahead_iter(&iter, &ctx,
+ &cur_bytes_pending);
iomap_bio_submit_read(&ctx);
- if (ctx.cur_folio && !ctx.cur_folio_in_bio)
- folio_unlock(ctx.cur_folio);
+ if (ctx.cur_folio)
+ iomap_read_end(ctx.cur_folio, cur_bytes_pending);
}
EXPORT_SYMBOL_GPL(iomap_readahead);