summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_super.c')
-rw-r--r--fs/xfs/xfs_super.c601
1 files changed, 460 insertions, 141 deletions
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c21f10ab0f5d..bb0a82635a77 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -43,6 +43,10 @@
#include "xfs_iunlink_item.h"
#include "xfs_dahash_test.h"
#include "xfs_rtbitmap.h"
+#include "xfs_exchmaps_item.h"
+#include "xfs_parent.h"
+#include "xfs_rtalloc.h"
+#include "xfs_zone_alloc.h"
#include "scrub/stats.h"
#include "scrub/rcbag_btree.h"
@@ -64,6 +68,9 @@ enum xfs_dax_mode {
XFS_DAX_NEVER = 2,
};
+/* Were quota mount options provided? Must use the upper 16 bits of qflags. */
+#define XFS_QFLAGS_MNTOPTS (1U << 31)
+
static void
xfs_mount_set_dax_mode(
struct xfs_mount *mp,
@@ -103,7 +110,8 @@ enum {
Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
- Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
+ Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
+ Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
};
static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -148,6 +156,10 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_flag("nodiscard", Opt_nodiscard),
fsparam_flag("dax", Opt_dax),
fsparam_enum("dax", Opt_dax_enum, dax_param_enums),
+ fsparam_u32("max_open_zones", Opt_max_open_zones),
+ fsparam_flag("lifetime", Opt_lifetime),
+ fsparam_flag("nolifetime", Opt_nolifetime),
+ fsparam_string("max_atomic_write", Opt_max_atomic_write),
{}
};
@@ -176,6 +188,7 @@ xfs_fs_show_options(
{ XFS_FEAT_LARGE_IOSIZE, ",largeio" },
{ XFS_FEAT_DAX_ALWAYS, ",dax=always" },
{ XFS_FEAT_DAX_NEVER, ",dax=never" },
+ { XFS_FEAT_NOLIFETIME, ",nolifetime" },
{ 0, NULL }
};
struct xfs_mount *mp = XFS_M(root->d_sb);
@@ -227,6 +240,12 @@ xfs_fs_show_options(
if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
seq_puts(m, ",noquota");
+ if (mp->m_max_open_zones)
+ seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
+ if (mp->m_awu_max_bytes)
+ seq_printf(m, ",max_atomic_write=%lluk",
+ mp->m_awu_max_bytes >> 10);
+
return 0;
}
@@ -236,7 +255,7 @@ xfs_set_inode_alloc_perag(
xfs_ino_t ino,
xfs_agnumber_t max_metadata)
{
- if (!xfs_is_inode32(pag->pag_mount)) {
+ if (!xfs_is_inode32(pag_mount(pag))) {
set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
return false;
@@ -249,7 +268,7 @@ xfs_set_inode_alloc_perag(
}
set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
- if (pag->pag_agno < max_metadata)
+ if (pag_agno(pag) < max_metadata)
set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
else
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
@@ -309,9 +328,9 @@ xfs_set_inode_alloc(
* the allocator to accommodate the request.
*/
if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
- set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
+ xfs_set_inode32(mp);
else
- clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
+ xfs_clear_inode32(mp);
for (index = 0; index < agcount; index++) {
struct xfs_perag *pag;
@@ -365,10 +384,11 @@ xfs_blkdev_get(
struct file **bdev_filep)
{
int error = 0;
+ blk_mode_t mode;
- *bdev_filep = bdev_file_open_by_path(name,
- BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
- mp->m_super, &fs_holder_ops);
+ mode = sb_open_mode(mp->m_super->s_flags);
+ *bdev_filep = bdev_file_open_by_path(name, mode,
+ mp->m_super, &fs_holder_ops);
if (IS_ERR(*bdev_filep)) {
error = PTR_ERR(*bdev_filep);
*bdev_filep = NULL;
@@ -466,26 +486,34 @@ xfs_open_devices(
/*
* Setup xfs_mount buffer target pointers
*/
- error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
- if (!mp->m_ddev_targp)
+ if (IS_ERR(mp->m_ddev_targp)) {
+ error = PTR_ERR(mp->m_ddev_targp);
+ mp->m_ddev_targp = NULL;
goto out_close_rtdev;
+ }
if (rtdev_file) {
mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
- if (!mp->m_rtdev_targp)
+ if (IS_ERR(mp->m_rtdev_targp)) {
+ error = PTR_ERR(mp->m_rtdev_targp);
+ mp->m_rtdev_targp = NULL;
goto out_free_ddev_targ;
+ }
}
if (logdev_file && file_bdev(logdev_file) != ddev) {
mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
- if (!mp->m_logdev_targp)
+ if (IS_ERR(mp->m_logdev_targp)) {
+ error = PTR_ERR(mp->m_logdev_targp);
+ mp->m_logdev_targp = NULL;
goto out_free_rtdev_targ;
+ }
} else {
mp->m_logdev_targp = mp->m_ddev_targp;
/* Handle won't be used, drop it */
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
}
return 0;
@@ -497,10 +525,10 @@ xfs_open_devices(
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
if (rtdev_file)
- fput(rtdev_file);
+ bdev_fput(rtdev_file);
out_close_logdev:
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
return error;
}
@@ -513,7 +541,7 @@ xfs_setup_devices(
{
int error;
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
+ error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -522,13 +550,21 @@ xfs_setup_devices(
if (xfs_has_sector(mp))
log_sector_size = mp->m_sb.sb_logsectsize;
- error = xfs_setsize_buftarg(mp->m_logdev_targp,
+ error = xfs_configure_buftarg(mp->m_logdev_targp,
log_sector_size);
if (error)
return error;
}
- if (mp->m_rtdev_targp) {
- error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+
+ if (mp->m_sb.sb_rtstart) {
+ if (mp->m_rtdev_targp) {
+ xfs_warn(mp,
+ "can't use internal and external rtdev at the same time");
+ return -EINVAL;
+ }
+ mp->m_rtdev_targp = mp->m_ddev_targp;
+ } else if (mp->m_rtname) {
+ error = xfs_configure_buftarg(mp->m_rtdev_targp,
mp->m_sb.sb_sectsize);
if (error)
return error;
@@ -745,13 +781,24 @@ xfs_fs_drop_inode(
return generic_drop_inode(inode);
}
+STATIC void
+xfs_fs_evict_inode(
+ struct inode *inode)
+{
+ if (IS_DAX(inode))
+ dax_break_layout_final(inode);
+
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+}
+
static void
xfs_mount_free(
struct xfs_mount *mp)
{
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
xfs_free_buftarg(mp->m_logdev_targp);
- if (mp->m_rtdev_targp)
+ if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp)
xfs_free_buftarg(mp->m_rtdev_targp);
if (mp->m_ddev_targp)
xfs_free_buftarg(mp->m_ddev_targp);
@@ -808,25 +855,85 @@ xfs_fs_sync_fs(
if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
xfs_inodegc_stop(mp);
xfs_blockgc_stop(mp);
+ xfs_zone_gc_stop(mp);
}
return 0;
}
+static xfs_extlen_t
+xfs_internal_log_size(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_sb.sb_logstart)
+ return 0;
+ return mp->m_sb.sb_logblocks;
+}
+
+static void
+xfs_statfs_data(
+ struct xfs_mount *mp,
+ struct kstatfs *st)
+{
+ int64_t fdblocks =
+ xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
+
+ /* make sure st->f_bfree does not underflow */
+ st->f_bfree = max(0LL,
+ fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
+
+ /*
+ * sb_dblocks can change during growfs, but nothing cares about reporting
+ * the old or new value during growfs.
+ */
+ st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp);
+}
+
+/*
+ * When stat(v)fs is called on a file with the realtime bit set or a directory
+ * with the rtinherit bit, report freespace information for the RT device
+ * instead of the main data device.
+ */
+static void
+xfs_statfs_rt(
+ struct xfs_mount *mp,
+ struct kstatfs *st)
+{
+ st->f_bfree = xfs_rtbxlen_to_blen(mp,
+ xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
+ st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp,
+ mp->m_free[XC_FREE_RTEXTENTS].res_total);
+}
+
+static void
+xfs_statfs_inodes(
+ struct xfs_mount *mp,
+ struct kstatfs *st)
+{
+ uint64_t icount = percpu_counter_sum(&mp->m_icount);
+ uint64_t ifree = percpu_counter_sum(&mp->m_ifree);
+ uint64_t fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree);
+
+ st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
+ if (M_IGEO(mp)->maxicount)
+ st->f_files = min_t(typeof(st->f_files), st->f_files,
+ M_IGEO(mp)->maxicount);
+
+ /* If sb_icount overshot maxicount, report actual allocation */
+ st->f_files = max_t(typeof(st->f_files), st->f_files,
+ mp->m_sb.sb_icount);
+
+ /* Make sure st->f_ffree does not underflow */
+ st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree));
+}
+
STATIC int
xfs_fs_statfs(
struct dentry *dentry,
- struct kstatfs *statp)
+ struct kstatfs *st)
{
struct xfs_mount *mp = XFS_M(dentry->d_sb);
- xfs_sb_t *sbp = &mp->m_sb;
struct xfs_inode *ip = XFS_I(d_inode(dentry));
- uint64_t fakeinos, id;
- uint64_t icount;
- uint64_t ifree;
- uint64_t fdblocks;
- xfs_extlen_t lsize;
- int64_t ffree;
/*
* Expedite background inodegc but don't wait. We do not want to block
@@ -834,80 +941,58 @@ xfs_fs_statfs(
*/
xfs_inodegc_push(mp);
- statp->f_type = XFS_SUPER_MAGIC;
- statp->f_namelen = MAXNAMELEN - 1;
-
- id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
- statp->f_fsid = u64_to_fsid(id);
-
- icount = percpu_counter_sum(&mp->m_icount);
- ifree = percpu_counter_sum(&mp->m_ifree);
- fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ st->f_type = XFS_SUPER_MAGIC;
+ st->f_namelen = MAXNAMELEN - 1;
+ st->f_bsize = mp->m_sb.sb_blocksize;
+ st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev));
- spin_lock(&mp->m_sb_lock);
- statp->f_bsize = sbp->sb_blocksize;
- lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
- statp->f_blocks = sbp->sb_dblocks - lsize;
- spin_unlock(&mp->m_sb_lock);
-
- /* make sure statp->f_bfree does not underflow */
- statp->f_bfree = max_t(int64_t, 0,
- fdblocks - xfs_fdblocks_unavailable(mp));
- statp->f_bavail = statp->f_bfree;
-
- fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
- statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
- if (M_IGEO(mp)->maxicount)
- statp->f_files = min_t(typeof(statp->f_files),
- statp->f_files,
- M_IGEO(mp)->maxicount);
-
- /* If sb_icount overshot maxicount, report actual allocation */
- statp->f_files = max_t(typeof(statp->f_files),
- statp->f_files,
- sbp->sb_icount);
-
- /* make sure statp->f_ffree does not underflow */
- ffree = statp->f_files - (icount - ifree);
- statp->f_ffree = max_t(int64_t, ffree, 0);
+ xfs_statfs_data(mp, st);
+ xfs_statfs_inodes(mp, st);
+ if (XFS_IS_REALTIME_MOUNT(mp) &&
+ (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
+ xfs_statfs_rt(mp, st);
if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
(XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
- xfs_qm_statvfs(ip, statp);
-
- if (XFS_IS_REALTIME_MOUNT(mp) &&
- (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
- s64 freertx;
-
- statp->f_blocks = sbp->sb_rblocks;
- freertx = percpu_counter_sum_positive(&mp->m_frextents);
- statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
- }
+ xfs_qm_statvfs(ip, st);
+ /*
+ * XFS does not distinguish between blocks available to privileged and
+ * unprivileged users.
+ */
+ st->f_bavail = st->f_bfree;
return 0;
}
STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+ struct xfs_mount *mp)
{
- mp->m_resblks_save = mp->m_resblks;
- xfs_reserve_blocks(mp, 0);
+ enum xfs_free_counter i;
+
+ for (i = 0; i < XC_FREE_NR; i++) {
+ mp->m_free[i].res_saved = mp->m_free[i].res_total;
+ xfs_reserve_blocks(mp, i, 0);
+ }
}
STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+ struct xfs_mount *mp)
{
- uint64_t resblks;
-
- if (mp->m_resblks_save) {
- resblks = mp->m_resblks_save;
- mp->m_resblks_save = 0;
- } else
- resblks = xfs_default_resblks(mp);
+ uint64_t resblks;
+ enum xfs_free_counter i;
- xfs_reserve_blocks(mp, resblks);
+ for (i = 0; i < XC_FREE_NR; i++) {
+ if (mp->m_free[i].res_saved) {
+ resblks = mp->m_free[i].res_saved;
+ mp->m_free[i].res_saved = 0;
+ } else
+ resblks = xfs_default_resblks(mp, i);
+ xfs_reserve_blocks(mp, i, resblks);
+ }
}
/*
@@ -944,6 +1029,7 @@ xfs_fs_freeze(
if (ret && !xfs_is_readonly(mp)) {
xfs_blockgc_start(mp);
xfs_inodegc_start(mp);
+ xfs_zone_gc_start(mp);
}
return ret;
@@ -965,6 +1051,7 @@ xfs_fs_unfreeze(
* filesystem.
*/
if (!xfs_is_readonly(mp)) {
+ xfs_zone_gc_start(mp);
xfs_blockgc_start(mp);
xfs_inodegc_start(mp);
}
@@ -1026,6 +1113,19 @@ xfs_finish_flags(
return -EINVAL;
}
+ if (!xfs_has_zoned(mp)) {
+ if (mp->m_max_open_zones) {
+ xfs_warn(mp,
+"max_open_zones mount option only supported on zoned file systems.");
+ return -EINVAL;
+ }
+ if (mp->m_features & XFS_FEAT_NOLIFETIME) {
+ xfs_warn(mp,
+"nolifetime mount option only supported on zoned file systems.");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1033,7 +1133,8 @@ static int
xfs_init_percpu_counters(
struct xfs_mount *mp)
{
- int error;
+ int error;
+ int i;
error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
if (error)
@@ -1043,24 +1144,29 @@ xfs_init_percpu_counters(
if (error)
goto free_icount;
- error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
- if (error)
- goto free_ifree;
-
error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
if (error)
- goto free_fdblocks;
+ goto free_ifree;
- error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
+ error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
if (error)
goto free_delalloc;
+ for (i = 0; i < XC_FREE_NR; i++) {
+ error = percpu_counter_init(&mp->m_free[i].count, 0,
+ GFP_KERNEL);
+ if (error)
+ goto free_freecounters;
+ }
+
return 0;
+free_freecounters:
+ while (--i >= 0)
+ percpu_counter_destroy(&mp->m_free[i].count);
+ percpu_counter_destroy(&mp->m_delalloc_rtextents);
free_delalloc:
percpu_counter_destroy(&mp->m_delalloc_blks);
-free_fdblocks:
- percpu_counter_destroy(&mp->m_fdblocks);
free_ifree:
percpu_counter_destroy(&mp->m_ifree);
free_icount:
@@ -1074,21 +1180,28 @@ xfs_reinit_percpu_counters(
{
percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
- percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
- percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+ xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks);
+ if (!xfs_has_zoned(mp))
+ xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
+ mp->m_sb.sb_frextents);
}
static void
xfs_destroy_percpu_counters(
struct xfs_mount *mp)
{
+ enum xfs_free_counter i;
+
+ for (i = 0; i < XC_FREE_NR; i++)
+ percpu_counter_destroy(&mp->m_free[i].count);
percpu_counter_destroy(&mp->m_icount);
percpu_counter_destroy(&mp->m_ifree);
- percpu_counter_destroy(&mp->m_fdblocks);
+ ASSERT(xfs_is_shutdown(mp) ||
+ percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
+ percpu_counter_destroy(&mp->m_delalloc_rtextents);
ASSERT(xfs_is_shutdown(mp) ||
percpu_counter_sum(&mp->m_delalloc_blks) == 0);
percpu_counter_destroy(&mp->m_delalloc_blks);
- percpu_counter_destroy(&mp->m_frextents);
}
static int
@@ -1133,6 +1246,7 @@ xfs_fs_put_super(
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
+ xfs_rtmount_freesb(mp);
xfs_freesb(mp);
xchk_mount_stats_free(mp);
free_percpu(mp->m_stats.xs_stats);
@@ -1168,11 +1282,24 @@ xfs_fs_shutdown(
xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
}
+static int
+xfs_fs_show_stats(
+ struct seq_file *m,
+ struct dentry *root)
+{
+ struct xfs_mount *mp = XFS_M(root->d_sb);
+
+ if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT))
+ xfs_zoned_show_stats(m, mp);
+ return 0;
+}
+
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.drop_inode = xfs_fs_drop_inode,
+ .evict_inode = xfs_fs_evict_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
@@ -1182,6 +1309,7 @@ static const struct super_operations xfs_super_operations = {
.nr_cached_objects = xfs_fs_nr_cached_objects,
.free_cached_objects = xfs_fs_free_cached_objects,
.shutdown = xfs_fs_shutdown,
+ .show_stats = xfs_fs_show_stats,
};
static int
@@ -1219,6 +1347,42 @@ suffix_kstrtoint(
return ret;
}
+static int
+suffix_kstrtoull(
+ const char *s,
+ unsigned int base,
+ unsigned long long *res)
+{
+ int last, shift_left_factor = 0;
+ unsigned long long _res;
+ char *value;
+ int ret = 0;
+
+ value = kstrdup(s, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ last = strlen(value) - 1;
+ if (value[last] == 'K' || value[last] == 'k') {
+ shift_left_factor = 10;
+ value[last] = '\0';
+ }
+ if (value[last] == 'M' || value[last] == 'm') {
+ shift_left_factor = 20;
+ value[last] = '\0';
+ }
+ if (value[last] == 'G' || value[last] == 'g') {
+ shift_left_factor = 30;
+ value[last] = '\0';
+ }
+
+ if (kstrtoull(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
+ *res = _res << shift_left_factor;
+ return ret;
+}
+
static inline void
xfs_fs_warn_deprecated(
struct fs_context *fc,
@@ -1250,6 +1414,8 @@ xfs_fs_parse_param(
int size = 0;
int opt;
+ BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL);
+
opt = fs_parse(fc, xfs_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -1327,32 +1493,39 @@ xfs_fs_parse_param(
case Opt_noquota:
parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_quota:
case Opt_uquota:
case Opt_usrquota:
parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_qnoenforce:
case Opt_uqnoenforce:
parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_pquota:
case Opt_prjquota:
parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_pqnoenforce:
parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_gquota:
case Opt_grpquota:
parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_gqnoenforce:
parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_discard:
parsing_mp->m_features |= XFS_FEAT_DISCARD;
@@ -1385,6 +1558,23 @@ xfs_fs_parse_param(
xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
parsing_mp->m_features |= XFS_FEAT_NOATTR2;
return 0;
+ case Opt_max_open_zones:
+ parsing_mp->m_max_open_zones = result.uint_32;
+ return 0;
+ case Opt_lifetime:
+ parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME;
+ return 0;
+ case Opt_nolifetime:
+ parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
+ return 0;
+ case Opt_max_atomic_write:
+ if (suffix_kstrtoull(param->string, 10,
+ &parsing_mp->m_awu_max_bytes)) {
+ xfs_warn(parsing_mp,
+ "max atomic write size must be positive integer");
+ return -EINVAL;
+ }
+ return 0;
default:
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -1419,7 +1609,8 @@ xfs_fs_validate_params(
return -EINVAL;
}
- if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
+ if (!IS_ENABLED(CONFIG_XFS_QUOTA) &&
+ (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) {
xfs_warn(mp, "quota support not available in this kernel.");
return -EINVAL;
}
@@ -1500,7 +1691,7 @@ xfs_fs_fill_super(
* the newer fsopen/fsconfig API.
*/
if (fc->sb_flags & SB_RDONLY)
- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+ xfs_set_readonly(mp);
if (fc->sb_flags & SB_DIRSYNC)
mp->m_features |= XFS_FEAT_DIRSYNC;
if (fc->sb_flags & SB_SYNCHRONOUS)
@@ -1579,17 +1770,21 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
- /* V4 support is undergoing deprecation. */
- if (!xfs_has_crc(mp)) {
-#ifdef CONFIG_XFS_SUPPORT_V4
+ /*
+ * V4 support is undergoing deprecation.
+ *
+ * Note: this has to use an open coded m_features check as xfs_has_crc
+ * always returns false for !CONFIG_XFS_SUPPORT_V4.
+ */
+ if (!(mp->m_features & XFS_FEAT_CRC)) {
+ if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) {
+ xfs_warn(mp,
+ "Deprecated V4 format (crc=0) not supported by kernel.");
+ error = -EINVAL;
+ goto out_free_sb;
+ }
xfs_warn_once(mp,
"Deprecated V4 format (crc=0) will not be supported after September 2030.");
-#else
- xfs_warn(mp,
- "Deprecated V4 format (crc=0) not supported by kernel.");
- error = -EINVAL;
- goto out_free_sb;
-#endif
}
/* ASCII case insensitivity is undergoing deprecation. */
@@ -1605,8 +1800,12 @@ xfs_fs_fill_super(
#endif
}
- /* Filesystem claims it needs repair, so refuse the mount. */
- if (xfs_has_needsrepair(mp)) {
+ /*
+ * Filesystem claims it needs repair, so refuse the mount unless
+ * norecovery is also specified, in which case the filesystem can
+ * be mounted with no risk of further damage.
+ */
+ if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair.");
error = -EFSCORRUPTED;
goto out_free_sb;
@@ -1623,16 +1822,26 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- /*
- * Until this is fixed only page-sized or smaller data blocks work.
- */
if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
- xfs_warn(mp,
- "File system with blocksize %d bytes. "
- "Only pagesize (%ld) or less will currently work.",
+ size_t max_folio_size = mapping_max_folio_size_supported();
+
+ if (!xfs_has_crc(mp)) {
+ xfs_warn(mp,
+"V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
mp->m_sb.sb_blocksize, PAGE_SIZE);
- error = -ENOSYS;
- goto out_free_sb;
+ error = -ENOSYS;
+ goto out_free_sb;
+ }
+
+ if (mp->m_sb.sb_blocksize > max_folio_size) {
+ xfs_warn(mp,
+"block size (%u bytes) not supported; Only block size (%zu) or less is supported",
+ mp->m_sb.sb_blocksize, max_folio_size);
+ error = -ENOSYS;
+ goto out_free_sb;
+ }
+
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
}
/* Ensure this filesystem fits in the page cache limits */
@@ -1664,10 +1873,14 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- error = xfs_filestream_mount(mp);
+ error = xfs_rtmount_readsb(mp);
if (error)
goto out_free_sb;
+ error = xfs_filestream_mount(mp);
+ if (error)
+ goto out_free_rtsb;
+
/*
* we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes.
@@ -1686,7 +1899,7 @@ xfs_fs_fill_super(
sb->s_time_max = XFS_LEGACY_TIME_MAX;
}
trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
- sb->s_iflags |= SB_I_CGROUPWB;
+ sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
set_posix_acl_flag(sb);
@@ -1706,10 +1919,31 @@ xfs_fs_fill_super(
mp->m_features &= ~XFS_FEAT_DISCARD;
}
+ if (xfs_has_zoned(mp)) {
+ if (!xfs_has_metadir(mp)) {
+ xfs_alert(mp,
+ "metadir feature required for zoned realtime devices.");
+ error = -EINVAL;
+ goto out_filestream_unmount;
+ }
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
+ } else if (xfs_has_metadir(mp)) {
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
+ }
+
if (xfs_has_reflink(mp)) {
- if (mp->m_sb.sb_rblocks) {
+ if (xfs_has_realtime(mp) &&
+ !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) {
xfs_alert(mp,
- "reflink not compatible with realtime device!");
+ "reflink not compatible with realtime extent size %u!",
+ mp->m_sb.sb_rextsize);
+ error = -EINVAL;
+ goto out_filestream_unmount;
+ }
+
+ if (xfs_has_zoned(mp)) {
+ xfs_alert(mp,
+ "reflink not compatible with zoned RT device!");
error = -EINVAL;
goto out_filestream_unmount;
}
@@ -1720,12 +1954,13 @@ xfs_fs_fill_super(
}
}
- if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
- xfs_alert(mp,
- "reverse mapping btree not compatible with realtime device!");
- error = -EINVAL;
- goto out_filestream_unmount;
- }
+ /*
+ * If no quota mount options were provided, maybe we'll try to pick
+ * up the quota accounting and enforcement flags from the ondisk sb.
+ */
+ if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS))
+ xfs_set_resuming_quotaon(mp);
+ mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
error = xfs_mountfs(mp);
if (error)
@@ -1746,6 +1981,8 @@ xfs_fs_fill_super(
out_filestream_unmount:
xfs_filestream_unmount(mp);
+ out_free_rtsb:
+ xfs_rtmount_freesb(mp);
out_free_sb:
xfs_freesb(mp);
out_free_scrub_stats:
@@ -1765,7 +2002,7 @@ xfs_fs_fill_super(
out_unmount:
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
- goto out_free_sb;
+ goto out_free_rtsb;
}
static int
@@ -1782,6 +2019,19 @@ xfs_remount_rw(
struct xfs_sb *sbp = &mp->m_sb;
int error;
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
+ xfs_readonly_buftarg(mp->m_logdev_targp)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited by read-only logdev");
+ return -EACCES;
+ }
+
+ if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited by read-only rtdev");
+ return -EACCES;
+ }
+
if (xfs_has_norecovery(mp)) {
xfs_warn(mp,
"ro->rw transition prohibited on norecovery mount");
@@ -1797,7 +2047,7 @@ xfs_remount_rw(
return -EINVAL;
}
- clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+ xfs_clear_readonly(mp);
/*
* If this is the first remount to writeable state we might have some
@@ -1828,6 +2078,9 @@ xfs_remount_rw(
/* Re-enable the background inode inactivation worker. */
xfs_inodegc_start(mp);
+ /* Restart zone reclaim */
+ xfs_zone_gc_start(mp);
+
return 0;
}
@@ -1872,12 +2125,11 @@ xfs_remount_ro(
*/
xfs_inodegc_stop(mp);
+ /* Stop zone reclaim */
+ xfs_zone_gc_stop(mp);
+
/* Free the per-AG metadata reservation pool. */
- error = xfs_fs_unreserve_ag_blocks(mp);
- if (error) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
+ xfs_fs_unreserve_ag_blocks(mp);
/*
* Before we sync the metadata, we need to free up the reserve block
@@ -1889,7 +2141,7 @@ xfs_remount_ro(
xfs_save_resvblks(mp);
xfs_log_clean(mp);
- set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
+ xfs_set_readonly(mp);
return 0;
}
@@ -1915,6 +2167,8 @@ xfs_fs_reconfigure(
int flags = fc->sb_flags;
int error;
+ new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
+
/* version 5 superblocks always support version counters. */
if (xfs_has_crc(mp))
fc->sb_flags |= SB_I_VERSION;
@@ -1923,6 +2177,29 @@ xfs_fs_reconfigure(
if (error)
return error;
+ /* attr2 -> noattr2 */
+ if (xfs_has_noattr2(new_mp)) {
+ if (xfs_has_crc(mp)) {
+ xfs_warn(mp,
+ "attr2 is always enabled for a V5 filesystem - can't be changed.");
+ return -EINVAL;
+ }
+ mp->m_features &= ~XFS_FEAT_ATTR2;
+ mp->m_features |= XFS_FEAT_NOATTR2;
+ } else if (xfs_has_attr2(new_mp)) {
+ /* noattr2 -> attr2 */
+ mp->m_features &= ~XFS_FEAT_NOATTR2;
+ mp->m_features |= XFS_FEAT_ATTR2;
+ }
+
+ /* Validate new max_atomic_write option before making other changes */
+ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
+ error = xfs_set_max_atomic_write_opt(mp,
+ new_mp->m_awu_max_bytes);
+ if (error)
+ return error;
+ }
+
/* inode32 -> inode64 */
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
@@ -1935,6 +2212,17 @@ xfs_fs_reconfigure(
mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
}
+ /*
+ * Now that mp has been modified according to the remount options, we
+ * do a final option validation with xfs_finish_flags() just like it is
+ * just like it is done during mount. We cannot use
+ * done during mount. We cannot use xfs_finish_flags() on new_mp as it
+ * contains only the user given options.
+ */
+ error = xfs_finish_flags(mp);
+ if (error)
+ return error;
+
/* ro -> rw */
if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
error = xfs_remount_rw(mp);
@@ -1980,19 +2268,22 @@ static const struct fs_context_operations xfs_context_ops = {
* mount option parsing having already been performed as this can be called from
* fsopen() before any parameters have been set.
*/
-static int xfs_init_fs_context(
+static int
+xfs_init_fs_context(
struct fs_context *fc)
{
struct xfs_mount *mp;
+ int i;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
if (!mp)
return -ENOMEM;
spin_lock_init(&mp->m_sb_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
- spin_lock_init(&mp->m_perag_lock);
+ for (i = 0; i < XG_TYPE_MAX; i++)
+ xa_init(&mp->m_groups[i].xa);
mutex_init(&mp->m_growlock);
+ mutex_init(&mp->m_metafile_resv_lock);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
mp->m_kobj.kobject.kset = xfs_kset;
@@ -2033,7 +2324,8 @@ static struct file_system_type xfs_fs_type = {
.init_fs_context = xfs_init_fs_context,
.parameters = xfs_fs_parameters,
.kill_sb = xfs_kill_sb,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
+ FS_LBS,
};
MODULE_ALIAS_FS("xfs");
@@ -2185,8 +2477,32 @@ xfs_init_caches(void)
if (!xfs_iunlink_cache)
goto out_destroy_attri_cache;
+ xfs_xmd_cache = kmem_cache_create("xfs_xmd_item",
+ sizeof(struct xfs_xmd_log_item),
+ 0, 0, NULL);
+ if (!xfs_xmd_cache)
+ goto out_destroy_iul_cache;
+
+ xfs_xmi_cache = kmem_cache_create("xfs_xmi_item",
+ sizeof(struct xfs_xmi_log_item),
+ 0, 0, NULL);
+ if (!xfs_xmi_cache)
+ goto out_destroy_xmd_cache;
+
+ xfs_parent_args_cache = kmem_cache_create("xfs_parent_args",
+ sizeof(struct xfs_parent_args),
+ 0, 0, NULL);
+ if (!xfs_parent_args_cache)
+ goto out_destroy_xmi_cache;
+
return 0;
+ out_destroy_xmi_cache:
+ kmem_cache_destroy(xfs_xmi_cache);
+ out_destroy_xmd_cache:
+ kmem_cache_destroy(xfs_xmd_cache);
+ out_destroy_iul_cache:
+ kmem_cache_destroy(xfs_iunlink_cache);
out_destroy_attri_cache:
kmem_cache_destroy(xfs_attri_cache);
out_destroy_attrd_cache:
@@ -2243,6 +2559,9 @@ xfs_destroy_caches(void)
* destroy caches.
*/
rcu_barrier();
+ kmem_cache_destroy(xfs_parent_args_cache);
+ kmem_cache_destroy(xfs_xmd_cache);
+ kmem_cache_destroy(xfs_xmi_cache);
kmem_cache_destroy(xfs_iunlink_cache);
kmem_cache_destroy(xfs_attri_cache);
kmem_cache_destroy(xfs_attrd_cache);