summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2022-05-04 12:37:18 +1000
committerDave Chinner <david@fromorbit.com>2022-05-04 12:37:18 +1000
commit956f1b8f80b5d34394950269fcf7286122b9a3a9 (patch)
tree63e0180eeab58262dc1c7e9ec32f1fcf4f6d033c /fs
parent5e116e99dc94c84650a94aaf51d0c5bca3702352 (diff)
parent1edf8056131aca6fe7f98873da8297e6fa279d8c (diff)
Merge tag 'rmap-speedups-5.19_2022-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-5.19-for-next
xfs: fix rmap inefficiencies Reduce the performance impact of the reverse mapping btree when reflink is enabled by using the much faster non-overlapped btree lookup functions when we're searching the rmap index with a fully specified key. If we find the exact record we're looking for, great! We don't have to perform the full overlapped scan. For filesystems with high sharing factors this reduces the xfs_scrub runtime by a good 15%%. This has been shown to reduce the fstests runtime for realtime rmap configurations by 30%%, since the lack of AGs severely limits scalability. Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c161
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h7
-rw-r--r--fs/xfs/scrub/bmap.c24
-rw-r--r--fs/xfs/xfs_trace.h5
4 files changed, 106 insertions, 91 deletions
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index cd322174dbff..2845019d31da 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -34,18 +34,32 @@ int
xfs_rmap_lookup_le(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
- xfs_extlen_t len,
uint64_t owner,
uint64_t offset,
unsigned int flags,
+ struct xfs_rmap_irec *irec,
int *stat)
{
+ int get_stat = 0;
+ int error;
+
cur->bc_rec.r.rm_startblock = bno;
- cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_blockcount = 0;
cur->bc_rec.r.rm_owner = owner;
cur->bc_rec.r.rm_offset = offset;
cur->bc_rec.r.rm_flags = flags;
- return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+
+ error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+ if (error || !(*stat) || !irec)
+ return error;
+
+ error = xfs_rmap_get_rec(cur, irec, &get_stat);
+ if (error)
+ return error;
+ if (!get_stat)
+ return -EFSCORRUPTED;
+
+ return 0;
}
/*
@@ -251,7 +265,6 @@ out_bad_rec:
struct xfs_find_left_neighbor_info {
struct xfs_rmap_irec high;
struct xfs_rmap_irec *irec;
- int *stat;
};
/* For each rmap given, figure out if it matches the key we want. */
@@ -276,7 +289,6 @@ xfs_rmap_find_left_neighbor_helper(
return 0;
*info->irec = *rec;
- *info->stat = 1;
return -ECANCELED;
}
@@ -285,7 +297,7 @@ xfs_rmap_find_left_neighbor_helper(
* return a match with the same owner and adjacent physical and logical
* block ranges.
*/
-int
+STATIC int
xfs_rmap_find_left_neighbor(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
@@ -296,6 +308,7 @@ xfs_rmap_find_left_neighbor(
int *stat)
{
struct xfs_find_left_neighbor_info info;
+ int found = 0;
int error;
*stat = 0;
@@ -313,21 +326,44 @@ xfs_rmap_find_left_neighbor(
info.high.rm_flags = flags;
info.high.rm_blockcount = 0;
info.irec = irec;
- info.stat = stat;
trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
- error = xfs_rmap_query_range(cur, &info.high, &info.high,
- xfs_rmap_find_left_neighbor_helper, &info);
- if (error == -ECANCELED)
- error = 0;
- if (*stat)
- trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
- irec->rm_blockcount, irec->rm_owner,
- irec->rm_offset, irec->rm_flags);
- return error;
+ /*
+ * Historically, we always used the range query to walk every reverse
+ * mapping that could possibly overlap the key that the caller asked
+ * for, and filter out the ones that don't. That is very slow when
+ * there are a lot of records.
+ *
+ * However, there are two scenarios where the classic btree search can
+ * produce correct results -- if the index contains a record that is an
+ * exact match for the lookup key; and if there are no other records
+ * between the record we want and the key we supplied.
+ *
+ * As an optimization, try a non-overlapped lookup first. This makes
+ * extent conversion and remap operations run a bit faster if the
+ * physical extents aren't being shared. If we don't find what we
+ * want, we fall back to the overlapped query.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
+ &found);
+ if (error)
+ return error;
+ if (found)
+ error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
+ if (!error)
+ error = xfs_rmap_query_range(cur, &info.high, &info.high,
+ xfs_rmap_find_left_neighbor_helper, &info);
+ if (error != -ECANCELED)
+ return error;
+
+ *stat = 1;
+ trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
+ irec->rm_flags);
+ return 0;
}
/* For each rmap given, figure out if it matches the key we want. */
@@ -353,7 +389,6 @@ xfs_rmap_lookup_le_range_helper(
return 0;
*info->irec = *rec;
- *info->stat = 1;
return -ECANCELED;
}
@@ -374,6 +409,7 @@ xfs_rmap_lookup_le_range(
int *stat)
{
struct xfs_find_left_neighbor_info info;
+ int found = 0;
int error;
info.high.rm_startblock = bno;
@@ -386,20 +422,44 @@ xfs_rmap_lookup_le_range(
info.high.rm_blockcount = 0;
*stat = 0;
info.irec = irec;
- info.stat = stat;
- trace_xfs_rmap_lookup_le_range(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
- error = xfs_rmap_query_range(cur, &info.high, &info.high,
- xfs_rmap_lookup_le_range_helper, &info);
- if (error == -ECANCELED)
- error = 0;
- if (*stat)
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
- irec->rm_blockcount, irec->rm_owner,
- irec->rm_offset, irec->rm_flags);
- return error;
+ trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ bno, 0, owner, offset, flags);
+
+ /*
+ * Historically, we always used the range query to walk every reverse
+ * mapping that could possibly overlap the key that the caller asked
+ * for, and filter out the ones that don't. That is very slow when
+ * there are a lot of records.
+ *
+ * However, there are two scenarios where the classic btree search can
+ * produce correct results -- if the index contains a record that is an
+ * exact match for the lookup key; and if there are no other records
+ * between the record we want and the key we supplied.
+ *
+ * As an optimization, try a non-overlapped lookup first. This makes
+ * scrub run much faster on most filesystems because bmbt records are
+ * usually an exact match for rmap records. If we don't find what we
+ * want, we fall back to the overlapped query.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
+ &found);
+ if (error)
+ return error;
+ if (found)
+ error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
+ if (!error)
+ error = xfs_rmap_query_range(cur, &info.high, &info.high,
+ xfs_rmap_lookup_le_range_helper, &info);
+ if (error != -ECANCELED)
+ return error;
+
+ *stat = 1;
+ trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
+ irec->rm_flags);
+ return 0;
}
/*
@@ -510,7 +570,7 @@ xfs_rmap_unmap(
* for the AG headers at rm_startblock == 0 created by mkfs/growfs that
* will not ever be removed from the tree.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -518,13 +578,6 @@ xfs_rmap_unmap(
goto out_error;
}
- error = xfs_rmap_get_rec(cur, &ltrec, &i);
- if (error)
- goto out_error;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto out_error;
- }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -786,18 +839,11 @@ xfs_rmap_map(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec,
&have_lt);
if (error)
goto out_error;
if (have_lt) {
- error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
- if (error)
- goto out_error;
- if (XFS_IS_CORRUPT(mp, have_lt != 1)) {
- error = -EFSCORRUPTED;
- goto out_error;
- }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -1022,7 +1068,7 @@ xfs_rmap_convert(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -1030,13 +1076,6 @@ xfs_rmap_convert(
goto done;
}
- error = xfs_rmap_get_rec(cur, &PREV, &i);
- if (error)
- goto done;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto done;
- }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
PREV.rm_blockcount, PREV.rm_owner,
@@ -1140,7 +1179,7 @@ xfs_rmap_convert(
_RET_IP_);
/* reset the cursor back to PREV */
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -2677,7 +2716,7 @@ xfs_rmap_record_exists(
ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
(flags & XFS_RMAP_BMBT_BLOCK));
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
&has_record);
if (error)
return error;
@@ -2686,14 +2725,6 @@ xfs_rmap_record_exists(
return 0;
}
- error = xfs_rmap_get_rec(cur, &irec, &has_record);
- if (error)
- return error;
- if (!has_record) {
- *has_rmap = false;
- return 0;
- }
-
*has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
irec.rm_startblock + irec.rm_blockcount >= bno + len);
return 0;
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index b718ebeda372..54741a591a17 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -122,8 +122,8 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
const struct xfs_owner_info *oinfo);
int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, uint64_t owner, uint64_t offset,
- unsigned int flags, int *stat);
+ uint64_t owner, uint64_t offset, unsigned int flags,
+ struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, uint64_t owner, uint64_t offset,
unsigned int flags, int *stat);
@@ -184,9 +184,6 @@ int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
-int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- uint64_t owner, uint64_t offset, unsigned int flags,
- struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat);
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index c357593e0a02..285995ba3947 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -133,29 +133,13 @@ xchk_bmap_get_rmap(
if (info->is_shared) {
error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
owner, offset, rflags, rmap, &has_rmap);
- if (!xchk_should_check_xref(info->sc, &error,
- &info->sc->sa.rmap_cur))
- return false;
- goto out;
+ } else {
+ error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
+ owner, offset, rflags, rmap, &has_rmap);
}
-
- /*
- * Otherwise, use the (faster) regular lookup.
- */
- error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
- offset, rflags, &has_rmap);
- if (!xchk_should_check_xref(info->sc, &error,
- &info->sc->sa.rmap_cur))
+ if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
return false;
- if (!has_rmap)
- goto out;
- error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
- if (!xchk_should_check_xref(info->sc, &error,
- &info->sc->sa.rmap_cur))
- return false;
-
-out:
if (!has_rmap)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 75934e3c3f55..fe8f4ec98ec6 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -418,6 +418,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__field(unsigned, lockval)
__field(unsigned, flags)
__field(unsigned long, caller_ip)
+ __field(const void *, buf_ops)
),
TP_fast_assign(
__entry->dev = bp->b_target->bt_dev;
@@ -428,9 +429,10 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
__entry->caller_ip = caller_ip;
+ __entry->buf_ops = bp->b_ops;
),
TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
- "lock %d flags %s caller %pS",
+ "lock %d flags %s bufops %pS caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->bno,
__entry->nblks,
@@ -438,6 +440,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->pincount,
__entry->lockval,
__print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+ __entry->buf_ops,
(void *)__entry->caller_ip)
)