summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-18 10:09:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-18 10:09:48 -0700
commitd551d7bbf264ed11d897368e3670bda5b37b360e (patch)
treead453622401fdc1393d194ee434a2a8607a8e309
parentd3d16f31d7b305df46080a95f2d254f78e04d588 (diff)
parent5948705adbf1a7afcecfe9a13ff39221ef61e16b (diff)
Merge tag 'xfs-fixes-6.16-rc7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino: "This contains mostly code clean up, refactoring and comments modification. The most important patch in this series is the last one that removes an unnecessary data structure allocation of xfs busy extents which might lead to a memory leak on the zoned allocator code" * tag 'xfs-fixes-6.16-rc7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: don't allocate the xfs_extent_busy structure for zoned RTGs xfs: remove the bt_bdev_file buftarg field xfs: rename the bt_bdev_* buftarg fields xfs: refactor xfs_calc_atomic_write_unit_max xfs: add a xfs_group_type_buftarg helper xfs: remove the call to sync_blockdev in xfs_configure_buftarg xfs: clean up the initial read logic in xfs_readsb xfs: replace strncpy with memcpy in xattr listing
-rw-r--r--fs/xfs/libxfs/xfs_group.c14
-rw-r--r--fs/xfs/xfs_buf.c15
-rw-r--r--fs/xfs/xfs_buf.h8
-rw-r--r--fs/xfs/xfs_discard.c29
-rw-r--r--fs/xfs/xfs_extent_busy.h8
-rw-r--r--fs/xfs/xfs_file.c2
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_mount.c97
-rw-r--r--fs/xfs/xfs_mount.h17
-rw-r--r--fs/xfs/xfs_notify_failure.c3
-rw-r--r--fs/xfs/xfs_trace.h31
-rw-r--r--fs/xfs/xfs_xattr.c2
14 files changed, 108 insertions, 124 deletions
diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c
index e9d76bcdc820..20ad7c309489 100644
--- a/fs/xfs/libxfs/xfs_group.c
+++ b/fs/xfs/libxfs/xfs_group.c
@@ -163,7 +163,8 @@ xfs_group_free(
xfs_defer_drain_free(&xg->xg_intents_drain);
#ifdef __KERNEL__
- kfree(xg->xg_busy_extents);
+ if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type))
+ kfree(xg->xg_busy_extents);
#endif
if (uninit)
@@ -189,9 +190,11 @@ xfs_group_insert(
xg->xg_type = type;
#ifdef __KERNEL__
- xg->xg_busy_extents = xfs_extent_busy_alloc();
- if (!xg->xg_busy_extents)
- return -ENOMEM;
+ if (xfs_group_has_extent_busy(mp, type)) {
+ xg->xg_busy_extents = xfs_extent_busy_alloc();
+ if (!xg->xg_busy_extents)
+ return -ENOMEM;
+ }
spin_lock_init(&xg->xg_state_lock);
xfs_hooks_init(&xg->xg_rmap_update_hooks);
#endif
@@ -210,7 +213,8 @@ xfs_group_insert(
out_drain:
xfs_defer_drain_free(&xg->xg_intents_drain);
#ifdef __KERNEL__
- kfree(xg->xg_busy_extents);
+ if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type))
+ kfree(xg->xg_busy_extents);
#endif
return error;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ba5bd6031ece..f9ef3b2a332a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1683,7 +1683,7 @@ xfs_free_buftarg(
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
/* the main block device is closed by kill_block_super */
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
- bdev_fput(btp->bt_bdev_file);
+ bdev_fput(btp->bt_file);
kfree(btp);
}
@@ -1712,8 +1712,8 @@ xfs_configure_buftarg_atomic_writes(
max_bytes = 0;
}
- btp->bt_bdev_awu_min = min_bytes;
- btp->bt_bdev_awu_max = max_bytes;
+ btp->bt_awu_min = min_bytes;
+ btp->bt_awu_max = max_bytes;
}
/* Configure a buffer target that abstracts a block device. */
@@ -1738,14 +1738,9 @@ xfs_configure_buftarg(
return -EINVAL;
}
- /*
- * Flush the block device pagecache so our bios see anything dirtied
- * before mount.
- */
if (bdev_can_atomic_write(btp->bt_bdev))
xfs_configure_buftarg_atomic_writes(btp);
-
- return sync_blockdev(btp->bt_bdev);
+ return 0;
}
int
@@ -1803,7 +1798,7 @@ xfs_alloc_buftarg(
btp = kzalloc(sizeof(*btp), GFP_KERNEL | __GFP_NOFAIL);
btp->bt_mount = mp;
- btp->bt_bdev_file = bdev_file;
+ btp->bt_file = bdev_file;
btp->bt_bdev = file_bdev(bdev_file);
btp->bt_dev = btp->bt_bdev->bd_dev;
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 15fc56948346..b269e115d9ac 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -94,7 +94,6 @@ void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
*/
struct xfs_buftarg {
dev_t bt_dev;
- struct file *bt_bdev_file;
struct block_device *bt_bdev;
struct dax_device *bt_daxdev;
struct file *bt_file;
@@ -112,9 +111,9 @@ struct xfs_buftarg {
struct percpu_counter bt_readahead_count;
struct ratelimit_state bt_ioerror_rl;
- /* Atomic write unit values, bytes */
- unsigned int bt_bdev_awu_min;
- unsigned int bt_bdev_awu_max;
+ /* Hardware atomic write unit values, bytes */
+ unsigned int bt_awu_min;
+ unsigned int bt_awu_max;
/* built-in cache, if we're not using the perag one */
struct xfs_buf_cache bt_cache[];
@@ -375,7 +374,6 @@ extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
int xfs_configure_buftarg(struct xfs_buftarg *btp, unsigned int sectorsize);
-#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 94d0873bcd62..603d51365645 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -103,24 +103,6 @@ xfs_discard_endio(
bio_put(bio);
}
-static inline struct block_device *
-xfs_group_bdev(
- const struct xfs_group *xg)
-{
- struct xfs_mount *mp = xg->xg_mount;
-
- switch (xg->xg_type) {
- case XG_TYPE_AG:
- return mp->m_ddev_targp->bt_bdev;
- case XG_TYPE_RTG:
- return mp->m_rtdev_targp->bt_bdev;
- default:
- ASSERT(0);
- break;
- }
- return NULL;
-}
-
/*
* Walk the discard list and issue discards on all the busy extents in the
* list. We plug and chain the bios so that we only need a single completion
@@ -138,11 +120,14 @@ xfs_discard_extents(
blk_start_plug(&plug);
list_for_each_entry(busyp, &extents->extent_list, list) {
- trace_xfs_discard_extent(busyp->group, busyp->bno,
- busyp->length);
+ struct xfs_group *xg = busyp->group;
+ struct xfs_buftarg *btp =
+ xfs_group_type_buftarg(xg->xg_mount, xg->xg_type);
+
+ trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
- error = __blkdev_issue_discard(xfs_group_bdev(busyp->group),
- xfs_gbno_to_daddr(busyp->group, busyp->bno),
+ error = __blkdev_issue_discard(btp->bt_bdev,
+ xfs_gbno_to_daddr(xg, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_KERNEL, &bio);
if (error && error != -EOPNOTSUPP) {
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index f069b04e8ea1..3e6e019b6146 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -68,4 +68,12 @@ static inline void xfs_extent_busy_sort(struct list_head *list)
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
}
+/*
+ * Zoned RTGs don't need to track busy extents, as the actual block freeing only
+ * happens by a zone reset, which forces out all transactions that touched the
+ * to be reset zone first.
+ */
+#define xfs_group_has_extent_busy(mp, type) \
+ ((type) == XG_TYPE_AG || !xfs_has_zoned((mp)))
+
#endif /* __XFS_EXTENT_BUSY_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 0b41b18debf3..38e365b16348 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -752,7 +752,7 @@ xfs_file_dio_write_atomic(
* HW offload should be faster, so try that first if it is already
* known that the write length is not too large.
*/
- if (ocount > xfs_inode_buftarg(ip)->bt_bdev_awu_max)
+ if (ocount > xfs_inode_buftarg(ip)->bt_awu_max)
dops = &xfs_atomic_write_cow_iomap_ops;
else
dops = &xfs_direct_write_iomap_ops;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d7e2b902ef5c..07fbdcc4cbf5 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -358,7 +358,7 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
{
- return xfs_inode_buftarg(ip)->bt_bdev_awu_max > 0;
+ return xfs_inode_buftarg(ip)->bt_awu_max > 0;
}
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ff05e6b1b0bb..ec30b78bf5c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -827,7 +827,7 @@ xfs_bmap_hw_atomic_write_possible(
/*
* The ->iomap_begin caller should ensure this, but check anyway.
*/
- return len <= xfs_inode_buftarg(ip)->bt_bdev_awu_max;
+ return len <= xfs_inode_buftarg(ip)->bt_awu_max;
}
static int
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 8cddbb7c149b..01e597290eb5 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -665,7 +665,7 @@ xfs_get_atomic_write_max_opt(
* less than our out of place write limit, but we don't want to exceed
* the awu_max.
*/
- return min(awu_max, xfs_inode_buftarg(ip)->bt_bdev_awu_max);
+ return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
}
static void
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 29276fe60df9..0b690bc119d7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -171,19 +171,16 @@ xfs_readsb(
ASSERT(mp->m_ddev_targp != NULL);
/*
- * For the initial read, we must guess at the sector
- * size based on the block device. It's enough to
- * get the sb_sectsize out of the superblock and
- * then reread with the proper length.
- * We don't verify it yet, because it may not be complete.
+ * In the first pass, use the device sector size to just read enough
+ * of the superblock to extract the XFS sector size.
+ *
+ * The device sector size must be smaller than or equal to the XFS
+ * sector size and thus we can always read the superblock. Once we know
+ * the XFS sector size, re-read it and run the buffer verifier.
*/
- sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
+ sector_size = mp->m_ddev_targp->bt_logical_sectorsize;
buf_ops = NULL;
- /*
- * Allocate a (locked) buffer to hold the superblock. This will be kept
- * around at all times to optimize access to the superblock.
- */
reread:
error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
BTOBB(sector_size), &bp, buf_ops);
@@ -247,6 +244,10 @@ reread:
/* no need to be quiet anymore, so reset the buf ops */
bp->b_ops = &xfs_sb_buf_ops;
+ /*
+ * Keep a pointer of the sb buffer around instead of caching it in the
+ * buffer cache because we access it frequently.
+ */
mp->m_sb_bp = bp;
xfs_buf_unlock(bp);
return 0;
@@ -678,68 +679,46 @@ static inline unsigned int max_pow_of_two_factor(const unsigned int nr)
}
/*
- * If the data device advertises atomic write support, limit the size of data
- * device atomic writes to the greatest power-of-two factor of the AG size so
- * that every atomic write unit aligns with the start of every AG. This is
- * required so that the per-AG allocations for an atomic write will always be
+ * If the underlying device advertises atomic write support, limit the size of
+ * atomic writes to the greatest power-of-two factor of the group size so
+ * that every atomic write unit aligns with the start of every group. This is
+ * required so that the allocations for an atomic write will always be
* aligned compatibly with the alignment requirements of the storage.
*
- * If the data device doesn't advertise atomic writes, then there are no
- * alignment restrictions and the largest out-of-place write we can do
- * ourselves is the number of blocks that user files can allocate from any AG.
- */
-static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp)
-{
- if (mp->m_ddev_targp->bt_bdev_awu_min > 0)
- return max_pow_of_two_factor(mp->m_sb.sb_agblocks);
- return rounddown_pow_of_two(mp->m_ag_max_usable);
-}
-
-/*
- * Reflink on the realtime device requires rtgroups, and atomic writes require
- * reflink.
- *
- * If the realtime device advertises atomic write support, limit the size of
- * data device atomic writes to the greatest power-of-two factor of the rtgroup
- * size so that every atomic write unit aligns with the start of every rtgroup.
- * This is required so that the per-rtgroup allocations for an atomic write
- * will always be aligned compatibly with the alignment requirements of the
- * storage.
- *
- * If the rt device doesn't advertise atomic writes, then there are no
- * alignment restrictions and the largest out-of-place write we can do
- * ourselves is the number of blocks that user files can allocate from any
- * rtgroup.
+ * If the device doesn't advertise atomic writes, then there are no alignment
+ * restrictions and the largest out-of-place write we can do ourselves is the
+ * number of blocks that user files can allocate from any group.
*/
-static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp)
+static xfs_extlen_t
+xfs_calc_group_awu_max(
+ struct xfs_mount *mp,
+ enum xfs_group_type type)
{
- struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+ struct xfs_groups *g = &mp->m_groups[type];
+ struct xfs_buftarg *btp = xfs_group_type_buftarg(mp, type);
- if (rgs->blocks == 0)
+ if (g->blocks == 0)
return 0;
- if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0)
- return max_pow_of_two_factor(rgs->blocks);
- return rounddown_pow_of_two(rgs->blocks);
+ if (btp && btp->bt_awu_min > 0)
+ return max_pow_of_two_factor(g->blocks);
+ return rounddown_pow_of_two(g->blocks);
}
/* Compute the maximum atomic write unit size for each section. */
static inline void
xfs_calc_atomic_write_unit_max(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ enum xfs_group_type type)
{
- struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
- struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+ struct xfs_groups *g = &mp->m_groups[type];
const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp);
- const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp);
- const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp);
-
- ags->awu_max = min3(max_write, max_ioend, max_agsize);
- rgs->awu_max = min3(max_write, max_ioend, max_rgsize);
+ const xfs_extlen_t max_gsize = xfs_calc_group_awu_max(mp, type);
- trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend,
- max_agsize, max_rgsize);
+ g->awu_max = min3(max_write, max_ioend, max_gsize);
+ trace_xfs_calc_atomic_write_unit_max(mp, type, max_write, max_ioend,
+ max_gsize, g->awu_max);
}
/*
@@ -757,7 +736,8 @@ xfs_set_max_atomic_write_opt(
max(mp->m_groups[XG_TYPE_AG].blocks,
mp->m_groups[XG_TYPE_RTG].blocks);
const xfs_extlen_t max_group_write =
- max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp));
+ max(xfs_calc_group_awu_max(mp, XG_TYPE_AG),
+ xfs_calc_group_awu_max(mp, XG_TYPE_RTG));
int error;
if (new_max_bytes == 0)
@@ -813,7 +793,8 @@ set_limit:
return error;
}
- xfs_calc_atomic_write_unit_max(mp);
+ xfs_calc_atomic_write_unit_max(mp, XG_TYPE_AG);
+ xfs_calc_atomic_write_unit_max(mp, XG_TYPE_RTG);
mp->m_awu_max_bytes = new_max_bytes;
return 0;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d85084f9f317..97de44c32272 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -802,4 +802,21 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
unsigned long long new_max_bytes);
+static inline struct xfs_buftarg *
+xfs_group_type_buftarg(
+ struct xfs_mount *mp,
+ enum xfs_group_type type)
+{
+ switch (type) {
+ case XG_TYPE_AG:
+ return mp->m_ddev_targp;
+ case XG_TYPE_RTG:
+ return mp->m_rtdev_targp;
+ default:
+ ASSERT(0);
+ break;
+ }
+ return NULL;
+}
+
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 3545dc1d953c..42e9c72b85c0 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -253,8 +253,7 @@ xfs_dax_notify_dev_failure(
return -EOPNOTSUPP;
}
- error = xfs_dax_translate_range(type == XG_TYPE_RTG ?
- mp->m_rtdev_targp : mp->m_ddev_targp,
+ error = xfs_dax_translate_range(xfs_group_type_buftarg(mp, type),
offset, len, &daddr, &bblen);
if (error)
return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ba45d801df1c..78be223b13b2 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -171,36 +171,33 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
TRACE_EVENT(xfs_calc_atomic_write_unit_max,
- TP_PROTO(struct xfs_mount *mp, unsigned int max_write,
- unsigned int max_ioend, unsigned int max_agsize,
- unsigned int max_rgsize),
- TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize),
+ TP_PROTO(struct xfs_mount *mp, enum xfs_group_type type,
+ unsigned int max_write, unsigned int max_ioend,
+ unsigned int max_gsize, unsigned int awu_max),
+ TP_ARGS(mp, type, max_write, max_ioend, max_gsize, awu_max),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(unsigned int, max_write)
__field(unsigned int, max_ioend)
- __field(unsigned int, max_agsize)
- __field(unsigned int, max_rgsize)
- __field(unsigned int, data_awu_max)
- __field(unsigned int, rt_awu_max)
+ __field(unsigned int, max_gsize)
+ __field(unsigned int, awu_max)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
+ __entry->type = type;
__entry->max_write = max_write;
__entry->max_ioend = max_ioend;
- __entry->max_agsize = max_agsize;
- __entry->max_rgsize = max_rgsize;
- __entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max;
- __entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max;
+ __entry->max_gsize = max_gsize;
+ __entry->awu_max = awu_max;
),
- TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u",
+ TP_printk("dev %d:%d %s max_write %u max_ioend %u max_gsize %u awu_max %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->max_write,
__entry->max_ioend,
- __entry->max_agsize,
- __entry->max_rgsize,
- __entry->data_awu_max,
- __entry->rt_awu_max)
+ __entry->max_gsize,
+ __entry->awu_max)
);
TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 0f641a9091ec..ac5cecec9aa1 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -243,7 +243,7 @@ __xfs_xattr_put_listent(
offset = context->buffer + context->count;
memcpy(offset, prefix, prefix_len);
offset += prefix_len;
- strncpy(offset, (char *)name, namelen); /* real name */
+ memcpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';