summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_super.c')
-rw-r--r--fs/xfs/xfs_super.c3087
1 files changed, 1853 insertions, 1234 deletions
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 38aaacdbb8b3..bc71aa9dcee8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
@@ -23,18 +11,15 @@
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_fsops.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
@@ -50,471 +35,183 @@
#include "xfs_refcount_item.h"
#include "xfs_bmap_item.h"
#include "xfs_reflink.h"
-
-#include <linux/namei.h>
-#include <linux/dax.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/parser.h>
+#include "xfs_pwork.h"
+#include "xfs_ag.h"
+#include "xfs_defer.h"
+#include "xfs_attr_item.h"
+#include "xfs_xattr.h"
+#include "xfs_iunlink_item.h"
+#include "xfs_dahash_test.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_exchmaps_item.h"
+#include "xfs_parent.h"
+#include "xfs_rtalloc.h"
+#include "xfs_zone_alloc.h"
+#include "scrub/stats.h"
+#include "scrub/rcbag_btree.h"
+
+#include <linux/magic.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
static const struct super_operations xfs_super_operations;
-struct bio_set *xfs_ioend_bioset;
+static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */
static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG
static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
#endif
-/*
- * Table driven mount option parser.
- */
-enum {
- Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
- Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
- Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
- Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier,
- Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep,
- Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams,
- Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota,
- Opt_uquota, Opt_gquota, Opt_pquota,
- Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
- Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
-};
-
-static const match_table_t tokens = {
- {Opt_logbufs, "logbufs=%u"}, /* number of XFS log buffers */
- {Opt_logbsize, "logbsize=%s"}, /* size of XFS log buffers */
- {Opt_logdev, "logdev=%s"}, /* log device */
- {Opt_rtdev, "rtdev=%s"}, /* realtime I/O device */
- {Opt_biosize, "biosize=%u"}, /* log2 of preferred buffered io size */
- {Opt_wsync, "wsync"}, /* safe-mode nfs compatible mount */
- {Opt_noalign, "noalign"}, /* turn off stripe alignment */
- {Opt_swalloc, "swalloc"}, /* turn on stripe width allocation */
- {Opt_sunit, "sunit=%u"}, /* data volume stripe unit */
- {Opt_swidth, "swidth=%u"}, /* data volume stripe width */
- {Opt_nouuid, "nouuid"}, /* ignore filesystem UUID */
- {Opt_mtpt, "mtpt"}, /* filesystem mount point */
- {Opt_grpid, "grpid"}, /* group-ID from parent directory */
- {Opt_nogrpid, "nogrpid"}, /* group-ID from current process */
- {Opt_bsdgroups, "bsdgroups"}, /* group-ID from parent directory */
- {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */
- {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
- {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */
- {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */
- {Opt_inode32, "inode32"}, /* inode allocation limited to
- * XFS_MAXINUMBER_32 */
- {Opt_ikeep, "ikeep"}, /* do not free empty inode clusters */
- {Opt_noikeep, "noikeep"}, /* free empty inode clusters */
- {Opt_largeio, "largeio"}, /* report large I/O sizes in stat() */
- {Opt_nolargeio, "nolargeio"}, /* do not report large I/O sizes
- * in stat(). */
- {Opt_attr2, "attr2"}, /* do use attr2 attribute format */
- {Opt_noattr2, "noattr2"}, /* do not use attr2 attribute format */
- {Opt_filestreams,"filestreams"},/* use filestreams allocator */
- {Opt_quota, "quota"}, /* disk quotas (user) */
- {Opt_noquota, "noquota"}, /* no quotas */
- {Opt_usrquota, "usrquota"}, /* user quota enabled */
- {Opt_grpquota, "grpquota"}, /* group quota enabled */
- {Opt_prjquota, "prjquota"}, /* project quota enabled */
- {Opt_uquota, "uquota"}, /* user quota (IRIX variant) */
- {Opt_gquota, "gquota"}, /* group quota (IRIX variant) */
- {Opt_pquota, "pquota"}, /* project quota (IRIX variant) */
- {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
- {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
- {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
- {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
- {Opt_discard, "discard"}, /* Discard unused blocks */
- {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */
-
- {Opt_dax, "dax"}, /* Enable direct access to bdev pages */
-
- /* Deprecated mount options scheduled for removal */
- {Opt_barrier, "barrier"}, /* use writer barriers for log write and
- * unwritten extent conversion */
- {Opt_nobarrier, "nobarrier"}, /* .. disable */
-
- {Opt_err, NULL},
+enum xfs_dax_mode {
+ XFS_DAX_INODE = 0,
+ XFS_DAX_ALWAYS = 1,
+ XFS_DAX_NEVER = 2,
};
+/* Were quota mount options provided? Must use the upper 16 bits of qflags. */
+#define XFS_QFLAGS_MNTOPTS (1U << 31)
-STATIC int
-suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
+static void
+xfs_mount_set_dax_mode(
+ struct xfs_mount *mp,
+ enum xfs_dax_mode mode)
{
- int last, shift_left_factor = 0, _res;
- char *value;
- int ret = 0;
-
- value = match_strdup(s);
- if (!value)
- return -ENOMEM;
-
- last = strlen(value) - 1;
- if (value[last] == 'K' || value[last] == 'k') {
- shift_left_factor = 10;
- value[last] = '\0';
+ switch (mode) {
+ case XFS_DAX_INODE:
+ mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
+ break;
+ case XFS_DAX_ALWAYS:
+ mp->m_features |= XFS_FEAT_DAX_ALWAYS;
+ mp->m_features &= ~XFS_FEAT_DAX_NEVER;
+ break;
+ case XFS_DAX_NEVER:
+ mp->m_features |= XFS_FEAT_DAX_NEVER;
+ mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
+ break;
}
- if (value[last] == 'M' || value[last] == 'm') {
- shift_left_factor = 20;
- value[last] = '\0';
- }
- if (value[last] == 'G' || value[last] == 'g') {
- shift_left_factor = 30;
- value[last] = '\0';
- }
-
- if (kstrtoint(value, base, &_res))
- ret = -EINVAL;
- kfree(value);
- *res = _res << shift_left_factor;
- return ret;
}
+static const struct constant_table dax_param_enums[] = {
+ {"inode", XFS_DAX_INODE },
+ {"always", XFS_DAX_ALWAYS },
+ {"never", XFS_DAX_NEVER },
+ {}
+};
+
/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure. The caller takes care of them.
- *
- * *sb is const because this is also used to test options on the remount
- * path, and we don't want this to have any side effects at remount time.
- * Today this function does not change *sb, but just to future-proof...
+ * Table driven mount option parser.
*/
-STATIC int
-xfs_parseargs(
- struct xfs_mount *mp,
- char *options)
-{
- const struct super_block *sb = mp->m_super;
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int dsunit = 0;
- int dswidth = 0;
- int iosize = 0;
- uint8_t iosizelog = 0;
-
- /*
- * set up the mount name first so all the errors will refer to the
- * correct device.
- */
- mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
- if (!mp->m_fsname)
- return -ENOMEM;
- mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
- /*
- * Copy binary VFS mount flags we are interested in.
- */
- if (sb->s_flags & MS_RDONLY)
- mp->m_flags |= XFS_MOUNT_RDONLY;
- if (sb->s_flags & MS_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-
- /*
- * Set some default flags that could be cleared by the mount option
- * parsing.
- */
- mp->m_flags |= XFS_MOUNT_BARRIER;
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
- /*
- * These can be overridden by the mount option parsing.
- */
- mp->m_logbufs = -1;
- mp->m_logbsize = -1;
+enum {
+ Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
+ Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
+ Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
+ Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32,
+ Opt_largeio, Opt_nolargeio,
+ Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
+ Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
+ Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
+ Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
+ Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
+};
- if (!options)
- goto done;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
-
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_logbufs:
- if (match_int(args, &mp->m_logbufs))
- return -EINVAL;
- break;
- case Opt_logbsize:
- if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
- return -EINVAL;
- break;
- case Opt_logdev:
- mp->m_logname = match_strdup(args);
- if (!mp->m_logname)
- return -ENOMEM;
- break;
- case Opt_mtpt:
- xfs_warn(mp, "%s option not allowed on this system", p);
- return -EINVAL;
- case Opt_rtdev:
- mp->m_rtname = match_strdup(args);
- if (!mp->m_rtname)
- return -ENOMEM;
- break;
- case Opt_allocsize:
- case Opt_biosize:
- if (suffix_kstrtoint(args, 10, &iosize))
- return -EINVAL;
- iosizelog = ffs(iosize) - 1;
- break;
- case Opt_grpid:
- case Opt_bsdgroups:
- mp->m_flags |= XFS_MOUNT_GRPID;
- break;
- case Opt_nogrpid:
- case Opt_sysvgroups:
- mp->m_flags &= ~XFS_MOUNT_GRPID;
- break;
- case Opt_wsync:
- mp->m_flags |= XFS_MOUNT_WSYNC;
- break;
- case Opt_norecovery:
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- break;
- case Opt_noalign:
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- break;
- case Opt_swalloc:
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- break;
- case Opt_sunit:
- if (match_int(args, &dsunit))
- return -EINVAL;
- break;
- case Opt_swidth:
- if (match_int(args, &dswidth))
- return -EINVAL;
- break;
- case Opt_inode32:
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- break;
- case Opt_inode64:
- mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
- break;
- case Opt_nouuid:
- mp->m_flags |= XFS_MOUNT_NOUUID;
- break;
- case Opt_ikeep:
- mp->m_flags |= XFS_MOUNT_IKEEP;
- break;
- case Opt_noikeep:
- mp->m_flags &= ~XFS_MOUNT_IKEEP;
- break;
- case Opt_largeio:
- mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
- break;
- case Opt_nolargeio:
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
- break;
- case Opt_attr2:
- mp->m_flags |= XFS_MOUNT_ATTR2;
- break;
- case Opt_noattr2:
- mp->m_flags &= ~XFS_MOUNT_ATTR2;
- mp->m_flags |= XFS_MOUNT_NOATTR2;
- break;
- case Opt_filestreams:
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
- break;
- case Opt_noquota:
- mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
- mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
- mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
- break;
- case Opt_quota:
- case Opt_uquota:
- case Opt_usrquota:
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
- XFS_UQUOTA_ENFD);
- break;
- case Opt_qnoenforce:
- case Opt_uqnoenforce:
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_UQUOTA_ENFD;
- break;
- case Opt_pquota:
- case Opt_prjquota:
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- XFS_PQUOTA_ENFD);
- break;
- case Opt_pqnoenforce:
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_PQUOTA_ENFD;
- break;
- case Opt_gquota:
- case Opt_grpquota:
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- XFS_GQUOTA_ENFD);
- break;
- case Opt_gqnoenforce:
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_GQUOTA_ENFD;
- break;
- case Opt_discard:
- mp->m_flags |= XFS_MOUNT_DISCARD;
- break;
- case Opt_nodiscard:
- mp->m_flags &= ~XFS_MOUNT_DISCARD;
- break;
-#ifdef CONFIG_FS_DAX
- case Opt_dax:
- mp->m_flags |= XFS_MOUNT_DAX;
- break;
-#endif
- case Opt_barrier:
- xfs_warn(mp, "%s option is deprecated, ignoring.", p);
- mp->m_flags |= XFS_MOUNT_BARRIER;
- break;
- case Opt_nobarrier:
- xfs_warn(mp, "%s option is deprecated, ignoring.", p);
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- break;
- default:
- xfs_warn(mp, "unknown mount option [%s].", p);
- return -EINVAL;
- }
- }
+#define fsparam_dead(NAME) \
+ __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL)
+static const struct fs_parameter_spec xfs_fs_parameters[] = {
/*
- * no recovery flag requires a read-only mount
+ * These mount options were supposed to be deprecated in September 2025
+ * but the deprecation warning was buggy, so not all users were
+ * notified. The deprecation is now obnoxiously loud and postponed to
+ * September 2030.
*/
- if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
- !(mp->m_flags & XFS_MOUNT_RDONLY)) {
- xfs_warn(mp, "no-recovery mounts must be read-only.");
- return -EINVAL;
- }
-
- if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
- xfs_warn(mp,
- "sunit and swidth options incompatible with the noalign option");
- return -EINVAL;
- }
-
-#ifndef CONFIG_XFS_QUOTA
- if (XFS_IS_QUOTA_RUNNING(mp)) {
- xfs_warn(mp, "quota support not available in this kernel.");
- return -EINVAL;
- }
-#endif
-
- if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
- xfs_warn(mp, "sunit and swidth must be specified together");
- return -EINVAL;
- }
-
- if (dsunit && (dswidth % dsunit != 0)) {
- xfs_warn(mp,
- "stripe width (%d) must be a multiple of the stripe unit (%d)",
- dswidth, dsunit);
- return -EINVAL;
- }
-
-done:
- if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- mp->m_dalign = dsunit;
- mp->m_swidth = dswidth;
- }
-
- if (mp->m_logbufs != -1 &&
- mp->m_logbufs != 0 &&
- (mp->m_logbufs < XLOG_MIN_ICLOGS ||
- mp->m_logbufs > XLOG_MAX_ICLOGS)) {
- xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
- mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return -EINVAL;
- }
- if (mp->m_logbsize != -1 &&
- mp->m_logbsize != 0 &&
- (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
- mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(mp->m_logbsize))) {
- xfs_warn(mp,
- "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- mp->m_logbsize);
- return -EINVAL;
- }
-
- if (iosizelog) {
- if (iosizelog > XFS_MAX_IO_LOG ||
- iosizelog < XFS_MIN_IO_LOG) {
- xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
- iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return -EINVAL;
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = iosizelog;
- mp->m_writeio_log = iosizelog;
- }
-
- return 0;
-}
+ fsparam_dead("attr2"),
+ fsparam_dead("noattr2"),
+ fsparam_dead("ikeep"),
+ fsparam_dead("noikeep"),
+
+ fsparam_u32("logbufs", Opt_logbufs),
+ fsparam_string("logbsize", Opt_logbsize),
+ fsparam_string("logdev", Opt_logdev),
+ fsparam_string("rtdev", Opt_rtdev),
+ fsparam_flag("wsync", Opt_wsync),
+ fsparam_flag("noalign", Opt_noalign),
+ fsparam_flag("swalloc", Opt_swalloc),
+ fsparam_u32("sunit", Opt_sunit),
+ fsparam_u32("swidth", Opt_swidth),
+ fsparam_flag("nouuid", Opt_nouuid),
+ fsparam_flag("grpid", Opt_grpid),
+ fsparam_flag("nogrpid", Opt_nogrpid),
+ fsparam_flag("bsdgroups", Opt_bsdgroups),
+ fsparam_flag("sysvgroups", Opt_sysvgroups),
+ fsparam_string("allocsize", Opt_allocsize),
+ fsparam_flag("norecovery", Opt_norecovery),
+ fsparam_flag("inode64", Opt_inode64),
+ fsparam_flag("inode32", Opt_inode32),
+ fsparam_flag("largeio", Opt_largeio),
+ fsparam_flag("nolargeio", Opt_nolargeio),
+ fsparam_flag("filestreams", Opt_filestreams),
+ fsparam_flag("quota", Opt_quota),
+ fsparam_flag("noquota", Opt_noquota),
+ fsparam_flag("usrquota", Opt_usrquota),
+ fsparam_flag("grpquota", Opt_grpquota),
+ fsparam_flag("prjquota", Opt_prjquota),
+ fsparam_flag("uquota", Opt_uquota),
+ fsparam_flag("gquota", Opt_gquota),
+ fsparam_flag("pquota", Opt_pquota),
+ fsparam_flag("uqnoenforce", Opt_uqnoenforce),
+ fsparam_flag("gqnoenforce", Opt_gqnoenforce),
+ fsparam_flag("pqnoenforce", Opt_pqnoenforce),
+ fsparam_flag("qnoenforce", Opt_qnoenforce),
+ fsparam_flag("discard", Opt_discard),
+ fsparam_flag("nodiscard", Opt_nodiscard),
+ fsparam_flag("dax", Opt_dax),
+ fsparam_enum("dax", Opt_dax_enum, dax_param_enums),
+ fsparam_u32("max_open_zones", Opt_max_open_zones),
+ fsparam_flag("lifetime", Opt_lifetime),
+ fsparam_flag("nolifetime", Opt_nolifetime),
+ fsparam_string("max_atomic_write", Opt_max_atomic_write),
+ {}
+};
struct proc_xfs_info {
uint64_t flag;
char *str;
};
-STATIC int
-xfs_showargs(
- struct xfs_mount *mp,
- struct seq_file *m)
+static int
+xfs_fs_show_options(
+ struct seq_file *m,
+ struct dentry *root)
{
static struct proc_xfs_info xfs_info_set[] = {
/* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_IKEEP, ",ikeep" },
- { XFS_MOUNT_WSYNC, ",wsync" },
- { XFS_MOUNT_NOALIGN, ",noalign" },
- { XFS_MOUNT_SWALLOC, ",swalloc" },
- { XFS_MOUNT_NOUUID, ",nouuid" },
- { XFS_MOUNT_NORECOVERY, ",norecovery" },
- { XFS_MOUNT_ATTR2, ",attr2" },
- { XFS_MOUNT_FILESTREAMS, ",filestreams" },
- { XFS_MOUNT_GRPID, ",grpid" },
- { XFS_MOUNT_DISCARD, ",discard" },
- { XFS_MOUNT_SMALL_INUMS, ",inode32" },
- { XFS_MOUNT_DAX, ",dax" },
- { 0, NULL }
- };
- static struct proc_xfs_info xfs_info_unset[] = {
- /* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" },
- { XFS_MOUNT_BARRIER, ",nobarrier" },
- { XFS_MOUNT_SMALL_INUMS, ",inode64" },
+ { XFS_FEAT_WSYNC, ",wsync" },
+ { XFS_FEAT_NOALIGN, ",noalign" },
+ { XFS_FEAT_SWALLOC, ",swalloc" },
+ { XFS_FEAT_NOUUID, ",nouuid" },
+ { XFS_FEAT_NORECOVERY, ",norecovery" },
+ { XFS_FEAT_FILESTREAMS, ",filestreams" },
+ { XFS_FEAT_GRPID, ",grpid" },
+ { XFS_FEAT_DISCARD, ",discard" },
+ { XFS_FEAT_LARGE_IOSIZE, ",largeio" },
+ { XFS_FEAT_DAX_ALWAYS, ",dax=always" },
+ { XFS_FEAT_DAX_NEVER, ",dax=never" },
+ { XFS_FEAT_NOLIFETIME, ",nolifetime" },
{ 0, NULL }
};
+ struct xfs_mount *mp = XFS_M(root->d_sb);
struct proc_xfs_info *xfs_infop;
for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
- if (mp->m_flags & xfs_infop->flag)
- seq_puts(m, xfs_infop->str);
- }
- for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
- if (!(mp->m_flags & xfs_infop->flag))
+ if (mp->m_features & xfs_infop->flag)
seq_puts(m, xfs_infop->str);
}
- if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+ seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
+
+ if (xfs_has_allocsize(mp))
seq_printf(m, ",allocsize=%dk",
- (int)(1 << mp->m_writeio_log) >> 10);
+ (1 << mp->m_allocsize_log) >> 10);
if (mp->m_logbufs > 0)
seq_printf(m, ",logbufs=%d", mp->m_logbufs);
@@ -533,71 +230,67 @@ xfs_showargs(
seq_printf(m, ",swidth=%d",
(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
- if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+ if (mp->m_qflags & XFS_UQUOTA_ENFD)
seq_puts(m, ",usrquota");
else if (mp->m_qflags & XFS_UQUOTA_ACCT)
seq_puts(m, ",uqnoenforce");
- if (mp->m_qflags & XFS_PQUOTA_ACCT) {
- if (mp->m_qflags & XFS_PQUOTA_ENFD)
- seq_puts(m, ",prjquota");
- else
- seq_puts(m, ",pqnoenforce");
- }
- if (mp->m_qflags & XFS_GQUOTA_ACCT) {
- if (mp->m_qflags & XFS_GQUOTA_ENFD)
- seq_puts(m, ",grpquota");
- else
- seq_puts(m, ",gqnoenforce");
- }
+ if (mp->m_qflags & XFS_PQUOTA_ENFD)
+ seq_puts(m, ",prjquota");
+ else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+ seq_puts(m, ",pqnoenforce");
+
+ if (mp->m_qflags & XFS_GQUOTA_ENFD)
+ seq_puts(m, ",grpquota");
+ else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+ seq_puts(m, ",gqnoenforce");
if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
seq_puts(m, ",noquota");
+ if (mp->m_max_open_zones)
+ seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
+ if (mp->m_awu_max_bytes)
+ seq_printf(m, ",max_atomic_write=%lluk",
+ mp->m_awu_max_bytes >> 10);
+
return 0;
}
-static uint64_t
-xfs_max_file_offset(
- unsigned int blockshift)
+
+static bool
+xfs_set_inode_alloc_perag(
+ struct xfs_perag *pag,
+ xfs_ino_t ino,
+ xfs_agnumber_t max_metadata)
{
- unsigned int pagefactor = 1;
- unsigned int bitshift = BITS_PER_LONG - 1;
-
- /* Figure out maximum filesize, on Linux this can depend on
- * the filesystem blocksize (on 32 bit platforms).
- * __block_write_begin does this in an [unsigned] long...
- * page->index << (PAGE_SHIFT - bbits)
- * So, for page sized blocks (4K on 32 bit platforms),
- * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
- * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
- * but for smaller blocksizes it is less (bbits = log2 bsize).
- * Note1: get_block_t takes a long (implicit cast from above)
- * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
- * can optionally convert the [unsigned] long from above into
- * an [unsigned] long long.
- */
+ if (!xfs_is_inode32(pag_mount(pag))) {
+ set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+ clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+ return false;
+ }
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBDAF)
- ASSERT(sizeof(sector_t) == 8);
- pagefactor = PAGE_SIZE;
- bitshift = BITS_PER_LONG;
-# else
- pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
-# endif
-#endif
+ if (ino > XFS_MAXINUMBER_32) {
+ clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+ clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+ return false;
+ }
- return (((uint64_t)pagefactor) << bitshift) - 1;
+ set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
+ if (pag_agno(pag) < max_metadata)
+ set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+ else
+ clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
+ return true;
}
/*
* Set parameters for inode allocation heuristics, taking into account
* filesystem size and inode32/inode64 mount options; i.e. specifically
- * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ * whether or not XFS_FEAT_SMALL_INUMS is set.
*
* Inode allocation patterns are altered only if inode32 is requested
- * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
- * If altered, XFS_MOUNT_32BITINODES is set as well.
+ * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
+ * If altered, XFS_OPSTATE_INODE32 is set as well.
*
* An agcount independent of that in the mount structure is provided
* because in the growfs case, mp->m_sb.sb_agcount is not yet updated
@@ -621,7 +314,7 @@ xfs_set_inode_alloc(
* Calculate how much should be reserved for inodes to meet
* the max inode percentage. Used only for inode32.
*/
- if (mp->m_maxicount) {
+ if (M_IGEO(mp)->maxicount) {
uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
@@ -634,18 +327,18 @@ xfs_set_inode_alloc(
}
/* Get the last possible inode in the filesystem */
- agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+ agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
/*
* If user asked for no more than 32-bit inodes, and the fs is
- * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+ * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
* the allocator to accommodate the request.
*/
- if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
- mp->m_flags |= XFS_MOUNT_32BITINODES;
+ if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
+ xfs_set_inode32(mp);
else
- mp->m_flags &= ~XFS_MOUNT_32BITINODES;
+ xfs_clear_inode32(mp);
for (index = 0; index < agcount; index++) {
struct xfs_perag *pag;
@@ -653,42 +346,60 @@ xfs_set_inode_alloc(
ino = XFS_AGINO_TO_INO(mp, index, agino);
pag = xfs_perag_get(mp, index);
+ if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
+ maxagi++;
+ xfs_perag_put(pag);
+ }
- if (mp->m_flags & XFS_MOUNT_32BITINODES) {
- if (ino > XFS_MAXINUMBER_32) {
- pag->pagi_inodeok = 0;
- pag->pagf_metadata = 0;
- } else {
- pag->pagi_inodeok = 1;
- maxagi++;
- if (index < max_metadata)
- pag->pagf_metadata = 1;
- else
- pag->pagf_metadata = 0;
- }
- } else {
- pag->pagi_inodeok = 1;
- pag->pagf_metadata = 0;
- }
+ return xfs_is_inode32(mp) ? maxagi : agcount;
+}
- xfs_perag_put(pag);
+static int
+xfs_setup_dax_always(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_ddev_targp->bt_daxdev &&
+ (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
+ xfs_alert(mp,
+ "DAX unsupported by block device. Turning off DAX.");
+ goto disable_dax;
+ }
+
+ if (mp->m_super->s_blocksize != PAGE_SIZE) {
+ xfs_alert(mp,
+ "DAX not supported for blocksize. Turning off DAX.");
+ goto disable_dax;
}
- return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
+ if (xfs_has_reflink(mp) &&
+ bdev_is_partition(mp->m_ddev_targp->bt_bdev)) {
+ xfs_alert(mp,
+ "DAX and reflink cannot work with multi-partitions!");
+ return -EINVAL;
+ }
+
+ return 0;
+
+disable_dax:
+ xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
+ return 0;
}
STATIC int
xfs_blkdev_get(
xfs_mount_t *mp,
const char *name,
- struct block_device **bdevp)
+ struct file **bdev_filep)
{
int error = 0;
-
- *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- mp);
- if (IS_ERR(*bdevp)) {
- error = PTR_ERR(*bdevp);
+ blk_mode_t mode;
+
+ mode = sb_open_mode(mp->m_super->s_flags);
+ *bdev_filep = bdev_file_open_by_path(name, mode,
+ mp->m_super, &fs_holder_ops);
+ if (IS_ERR(*bdev_filep)) {
+ error = PTR_ERR(*bdev_filep);
+ *bdev_filep = NULL;
xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
}
@@ -696,35 +407,45 @@ xfs_blkdev_get(
}
STATIC void
-xfs_blkdev_put(
- struct block_device *bdev)
-{
- if (bdev)
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void
-xfs_blkdev_issue_flush(
- xfs_buftarg_t *buftarg)
-{
- blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
-}
-
-STATIC void
-xfs_close_devices(
+xfs_shutdown_devices(
struct xfs_mount *mp)
{
+ /*
+ * Udev is triggered whenever anyone closes a block device or unmounts
+ * a file systemm on a block device.
+ * The default udev rules invoke blkid to read the fs super and create
+ * symlinks to the bdev under /dev/disk. For this, it uses buffered
+ * reads through the page cache.
+ *
+ * xfs_db also uses buffered reads to examine metadata. There is no
+ * coordination between xfs_db and udev, which means that they can run
+ * concurrently. Note there is no coordination between the kernel and
+ * blkid either.
+ *
+ * On a system with 64k pages, the page cache can cache the superblock
+ * and the root inode (and hence the root directory) with the same 64k
+ * page. If udev spawns blkid after the mkfs and the system is busy
+ * enough that it is still running when xfs_db starts up, they'll both
+ * read from the same page in the pagecache.
+ *
+ * The unmount writes updated inode metadata to disk directly. The XFS
+ * buffer cache does not use the bdev pagecache, so it needs to
+ * invalidate that pagecache on unmount. If the above scenario occurs,
+ * the pagecache no longer reflects what's on disk, xfs_db reads the
+ * stale metadata, and fails to find /a. Most of the time this succeeds
+ * because closing a bdev invalidates the page cache, but when processes
+ * race, everyone loses.
+ */
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
- struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
- xfs_free_buftarg(mp, mp->m_logdev_targp);
- xfs_blkdev_put(logdev);
+ blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
+ invalidate_bdev(mp->m_logdev_targp->bt_bdev);
}
if (mp->m_rtdev_targp) {
- struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
- xfs_free_buftarg(mp, mp->m_rtdev_targp);
- xfs_blkdev_put(rtdev);
+ blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
+ invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
}
- xfs_free_buftarg(mp, mp->m_ddev_targp);
+ blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+ invalidate_bdev(mp->m_ddev_targp->bt_bdev);
}
/*
@@ -741,25 +462,28 @@ STATIC int
xfs_open_devices(
struct xfs_mount *mp)
{
- struct block_device *ddev = mp->m_super->s_bdev;
- struct block_device *logdev = NULL, *rtdev = NULL;
+ struct super_block *sb = mp->m_super;
+ struct block_device *ddev = sb->s_bdev;
+ struct file *logdev_file = NULL, *rtdev_file = NULL;
int error;
/*
* Open real time and log devices - order is important.
*/
if (mp->m_logname) {
- error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
+ error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file);
if (error)
- goto out;
+ return error;
}
if (mp->m_rtname) {
- error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
+ error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file);
if (error)
goto out_close_logdev;
- if (rtdev == ddev || rtdev == logdev) {
+ if (file_bdev(rtdev_file) == ddev ||
+ (logdev_file &&
+ file_bdev(rtdev_file) == file_bdev(logdev_file))) {
xfs_warn(mp,
"Cannot mount filesystem with identical rtdev and ddev/logdev.");
error = -EINVAL;
@@ -770,38 +494,49 @@ xfs_open_devices(
/*
* Setup xfs_mount buffer target pointers
*/
- error = -ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
- if (!mp->m_ddev_targp)
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
+ if (IS_ERR(mp->m_ddev_targp)) {
+ error = PTR_ERR(mp->m_ddev_targp);
+ mp->m_ddev_targp = NULL;
goto out_close_rtdev;
+ }
- if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);
- if (!mp->m_rtdev_targp)
+ if (rtdev_file) {
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
+ if (IS_ERR(mp->m_rtdev_targp)) {
+ error = PTR_ERR(mp->m_rtdev_targp);
+ mp->m_rtdev_targp = NULL;
goto out_free_ddev_targ;
+ }
}
- if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);
- if (!mp->m_logdev_targp)
+ if (logdev_file && file_bdev(logdev_file) != ddev) {
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
+ if (IS_ERR(mp->m_logdev_targp)) {
+ error = PTR_ERR(mp->m_logdev_targp);
+ mp->m_logdev_targp = NULL;
goto out_free_rtdev_targ;
+ }
} else {
mp->m_logdev_targp = mp->m_ddev_targp;
+ /* Handle won't be used, drop it */
+ if (logdev_file)
+ bdev_fput(logdev_file);
}
return 0;
out_free_rtdev_targ:
if (mp->m_rtdev_targp)
- xfs_free_buftarg(mp, mp->m_rtdev_targp);
+ xfs_free_buftarg(mp->m_rtdev_targp);
out_free_ddev_targ:
- xfs_free_buftarg(mp, mp->m_ddev_targp);
+ xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
- xfs_blkdev_put(rtdev);
+ if (rtdev_file)
+ bdev_fput(rtdev_file);
out_close_logdev:
- if (logdev && logdev != ddev)
- xfs_blkdev_put(logdev);
- out:
+ if (logdev_file)
+ bdev_fput(logdev_file);
return error;
}
@@ -814,23 +549,32 @@ xfs_setup_devices(
{
int error;
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
+ error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize,
+ mp->m_sb.sb_dblocks);
if (error)
return error;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
unsigned int log_sector_size = BBSIZE;
- if (xfs_sb_version_hassector(&mp->m_sb))
+ if (xfs_has_sector(mp))
log_sector_size = mp->m_sb.sb_logsectsize;
- error = xfs_setsize_buftarg(mp->m_logdev_targp,
- log_sector_size);
+ error = xfs_configure_buftarg(mp->m_logdev_targp,
+ log_sector_size, mp->m_sb.sb_logblocks);
if (error)
return error;
}
- if (mp->m_rtdev_targp) {
- error = xfs_setsize_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_sectsize);
+
+ if (mp->m_sb.sb_rtstart) {
+ if (mp->m_rtdev_targp) {
+ xfs_warn(mp,
+ "can't use internal and external rtdev at the same time");
+ return -EINVAL;
+ }
+ mp->m_rtdev_targp = mp->m_ddev_targp;
+ } else if (mp->m_rtname) {
+ error = xfs_configure_buftarg(mp->m_rtdev_targp,
+ mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks);
if (error)
return error;
}
@@ -843,60 +587,51 @@ xfs_init_mount_workqueues(
struct xfs_mount *mp)
{
mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
+ 1, mp->m_super->s_id);
if (!mp->m_buf_workqueue)
goto out;
- mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- if (!mp->m_data_workqueue)
- goto out_destroy_buf;
-
mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
+ 0, mp->m_super->s_id);
if (!mp->m_unwritten_workqueue)
- goto out_destroy_data_iodone_queue;
-
- mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- if (!mp->m_cil_workqueue)
- goto out_destroy_unwritten;
+ goto out_destroy_buf;
mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
+ 0, mp->m_super->s_id);
if (!mp->m_reclaim_workqueue)
- goto out_destroy_cil;
+ goto out_destroy_unwritten;
- mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
- mp->m_fsname);
- if (!mp->m_log_workqueue)
+ mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
+ XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ 0, mp->m_super->s_id);
+ if (!mp->m_blockgc_wq)
goto out_destroy_reclaim;
- mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- if (!mp->m_eofblocks_workqueue)
- goto out_destroy_log;
+ mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
+ 1, mp->m_super->s_id);
+ if (!mp->m_inodegc_wq)
+ goto out_destroy_blockgc;
- mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
- mp->m_fsname);
+ mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
+ XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0,
+ mp->m_super->s_id);
if (!mp->m_sync_workqueue)
- goto out_destroy_eofb;
+ goto out_destroy_inodegc;
return 0;
-out_destroy_eofb:
- destroy_workqueue(mp->m_eofblocks_workqueue);
-out_destroy_log:
- destroy_workqueue(mp->m_log_workqueue);
+out_destroy_inodegc:
+ destroy_workqueue(mp->m_inodegc_wq);
+out_destroy_blockgc:
+ destroy_workqueue(mp->m_blockgc_wq);
out_destroy_reclaim:
destroy_workqueue(mp->m_reclaim_workqueue);
-out_destroy_cil:
- destroy_workqueue(mp->m_cil_workqueue);
out_destroy_unwritten:
destroy_workqueue(mp->m_unwritten_workqueue);
-out_destroy_data_iodone_queue:
- destroy_workqueue(mp->m_data_workqueue);
out_destroy_buf:
destroy_workqueue(mp->m_buf_workqueue);
out:
@@ -908,15 +643,27 @@ xfs_destroy_mount_workqueues(
struct xfs_mount *mp)
{
destroy_workqueue(mp->m_sync_workqueue);
- destroy_workqueue(mp->m_eofblocks_workqueue);
- destroy_workqueue(mp->m_log_workqueue);
+ destroy_workqueue(mp->m_blockgc_wq);
+ destroy_workqueue(mp->m_inodegc_wq);
destroy_workqueue(mp->m_reclaim_workqueue);
- destroy_workqueue(mp->m_cil_workqueue);
- destroy_workqueue(mp->m_data_workqueue);
destroy_workqueue(mp->m_unwritten_workqueue);
destroy_workqueue(mp->m_buf_workqueue);
}
+static void
+xfs_flush_inodes_worker(
+ struct work_struct *work)
+{
+ struct xfs_mount *mp = container_of(work, struct xfs_mount,
+ m_flush_inodes_work);
+ struct super_block *sb = mp->m_super;
+
+ if (down_read_trylock(&sb->s_umount)) {
+ sync_inodes_sb(sb);
+ up_read(&sb->s_umount);
+ }
+}
+
/*
* Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
* or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
@@ -927,12 +674,15 @@ void
xfs_flush_inodes(
struct xfs_mount *mp)
{
- struct super_block *sb = mp->m_super;
+ /*
+ * If flush_work() returns true then that means we waited for a flush
+ * which was already in progress. Don't bother running another scan.
+ */
+ if (flush_work(&mp->m_flush_inodes_work))
+ return;
- if (down_read_trylock(&sb->s_umount)) {
- sync_inodes_sb(sb);
- up_read(&sb->s_umount);
- }
+ queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
+ flush_work(&mp->m_flush_inodes_work);
}
/* Catch misguided souls that try to use this interface on XFS */
@@ -953,41 +703,41 @@ xfs_fs_destroy_inode(
struct inode *inode)
{
struct xfs_inode *ip = XFS_I(inode);
- int error;
trace_xfs_destroy_inode(ip);
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
+ xfs_inode_mark_reclaimable(ip);
+}
- if (xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
- if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
- xfs_warn(ip->i_mount,
-"Error %d while evicting CoW blocks for inode %llu.",
- error, ip->i_ino);
- }
-
- xfs_inactive(ip);
-
- ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
- XFS_STATS_INC(ip->i_mount, vn_reclaim);
+static void
+xfs_fs_dirty_inode(
+ struct inode *inode,
+ int flags)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
- /*
- * We should never get here with one of the reclaim flags already set.
- */
- ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+ if (!(inode->i_sb->s_flags & SB_LAZYTIME))
+ return;
/*
- * We always use background reclaim here because even if the
- * inode is clean, it still may be under IO and hence we have
- * to take the flush lock. The background reclaim path handles
- * this more efficiently than we can here, so simply let background
- * reclaim tear down all inodes.
+ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
+ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
+ * in flags possibly together with I_DIRTY_SYNC.
*/
- xfs_inode_set_reclaim_tag(ip);
+ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
+ return;
+
+ if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+ return;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+ xfs_trans_commit(tp);
}
/*
@@ -1012,11 +762,7 @@ xfs_fs_inode_init_once(
/* xfs inode */
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
-
- mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
- mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
+ init_rwsem(&ip->i_lock);
}
/*
@@ -1038,20 +784,45 @@ xfs_fs_drop_inode(
* that. See the comment for this inode flag.
*/
if (ip->i_flags & XFS_IRECOVERY) {
- ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
+ ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
return 0;
}
- return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
+ return inode_generic_drop(inode);
}
STATIC void
-xfs_free_fsname(
+xfs_fs_evict_inode(
+ struct inode *inode)
+{
+ if (IS_DAX(inode))
+ dax_break_layout_final(inode);
+
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+
+ if (IS_ENABLED(CONFIG_XFS_RT) &&
+ S_ISREG(inode->i_mode) && inode->i_private) {
+ xfs_open_zone_put(inode->i_private);
+ inode->i_private = NULL;
+ }
+}
+
+static void
+xfs_mount_free(
struct xfs_mount *mp)
{
- kfree(mp->m_fsname);
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_free_buftarg(mp->m_logdev_targp);
+ if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp)
+ xfs_free_buftarg(mp->m_rtdev_targp);
+ if (mp->m_ddev_targp)
+ xfs_free_buftarg(mp->m_ddev_targp);
+
+ debugfs_remove(mp->m_debugfs);
kfree(mp->m_rtname);
kfree(mp->m_logname);
+ kfree(mp);
}
STATIC int
@@ -1060,6 +831,9 @@ xfs_fs_sync_fs(
int wait)
{
struct xfs_mount *mp = XFS_M(sb);
+ int error;
+
+ trace_xfs_fs_sync_fs(mp, __return_address);
/*
* Doing anything during the async pass would be counterproductive.
@@ -1067,7 +841,10 @@ xfs_fs_sync_fs(
if (!wait)
return 0;
- xfs_log_force(mp, XFS_LOG_SYNC);
+ error = xfs_log_force(mp, XFS_LOG_SYNC);
+ if (error)
+ return error;
+
if (laptop_mode) {
/*
* The disk must be active because we're syncing.
@@ -1077,308 +854,161 @@ xfs_fs_sync_fs(
flush_delayed_work(&mp->m_log->l_work);
}
- return 0;
-}
-
-STATIC int
-xfs_fs_statfs(
- struct dentry *dentry,
- struct kstatfs *statp)
-{
- struct xfs_mount *mp = XFS_M(dentry->d_sb);
- xfs_sb_t *sbp = &mp->m_sb;
- struct xfs_inode *ip = XFS_I(d_inode(dentry));
- uint64_t fakeinos, id;
- uint64_t icount;
- uint64_t ifree;
- uint64_t fdblocks;
- xfs_extlen_t lsize;
- int64_t ffree;
-
- statp->f_type = XFS_SB_MAGIC;
- statp->f_namelen = MAXNAMELEN - 1;
-
- id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
- statp->f_fsid.val[0] = (u32)id;
- statp->f_fsid.val[1] = (u32)(id >> 32);
-
- icount = percpu_counter_sum(&mp->m_icount);
- ifree = percpu_counter_sum(&mp->m_ifree);
- fdblocks = percpu_counter_sum(&mp->m_fdblocks);
-
- spin_lock(&mp->m_sb_lock);
- statp->f_bsize = sbp->sb_blocksize;
- lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
- statp->f_blocks = sbp->sb_dblocks - lsize;
- spin_unlock(&mp->m_sb_lock);
-
- statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
- statp->f_bavail = statp->f_bfree;
-
- fakeinos = statp->f_bfree << sbp->sb_inopblog;
- statp->f_files = MIN(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
- if (mp->m_maxicount)
- statp->f_files = min_t(typeof(statp->f_files),
- statp->f_files,
- mp->m_maxicount);
-
- /* If sb_icount overshot maxicount, report actual allocation */
- statp->f_files = max_t(typeof(statp->f_files),
- statp->f_files,
- sbp->sb_icount);
-
- /* make sure statp->f_ffree does not underflow */
- ffree = statp->f_files - (icount - ifree);
- statp->f_ffree = max_t(int64_t, ffree, 0);
-
+ /*
+ * If we are called with page faults frozen out, it means we are about
+ * to freeze the transaction subsystem. Take the opportunity to shut
+ * down inodegc because once SB_FREEZE_FS is set it's too late to
+ * prevent inactivation races with freeze. The fs doesn't get called
+ * again by the freezing process until after SB_FREEZE_FS has been set,
+ * so it's now or never. Same logic applies to speculative allocation
+ * garbage collection.
+ *
+ * We don't care if this is a normal syncfs call that does this or
+ * freeze that does this - we can run this multiple times without issue
+ * and we won't race with a restart because a restart can only occur
+ * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
+ */
+ if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
+ xfs_inodegc_stop(mp);
+ xfs_blockgc_stop(mp);
+ xfs_zone_gc_stop(mp);
+ }
- if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
- ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
- xfs_qm_statvfs(ip, statp);
return 0;
}
-STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+static xfs_extlen_t
+xfs_internal_log_size(
+ struct xfs_mount *mp)
{
- uint64_t resblks = 0;
-
- mp->m_resblks_save = mp->m_resblks;
- xfs_reserve_blocks(mp, &resblks, NULL);
+ if (!mp->m_sb.sb_logstart)
+ return 0;
+ return mp->m_sb.sb_logblocks;
}
-STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+static void
+xfs_statfs_data(
+ struct xfs_mount *mp,
+ struct kstatfs *st)
{
- uint64_t resblks;
+ int64_t fdblocks =
+ xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
- if (mp->m_resblks_save) {
- resblks = mp->m_resblks_save;
- mp->m_resblks_save = 0;
- } else
- resblks = xfs_default_resblks(mp);
+ /* make sure st->f_bfree does not underflow */
+ st->f_bfree = max(0LL,
+ fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
- xfs_reserve_blocks(mp, &resblks, NULL);
+ /*
+ * sb_dblocks can change during growfs, but nothing cares about reporting
+ * the old or new value during growfs.
+ */
+ st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp);
}
/*
- * Trigger writeback of all the dirty metadata in the file system.
- *
- * This ensures that the metadata is written to their location on disk rather
- * than just existing in transactions in the log. This means after a quiesce
- * there is no log replay required to write the inodes to disk - this is the
- * primary difference between a sync and a quiesce.
- *
- * Note: xfs_log_quiesce() stops background log work - the callers must ensure
- * it is started again when appropriate.
+ * When stat(v)fs is called on a file with the realtime bit set or a directory
+ * with the rtinherit bit, report freespace information for the RT device
+ * instead of the main data device.
*/
-void
-xfs_quiesce_attr(
- struct xfs_mount *mp)
+static void
+xfs_statfs_rt(
+ struct xfs_mount *mp,
+ struct kstatfs *st)
{
- int error = 0;
-
- /* wait for all modifications to complete */
- while (atomic_read(&mp->m_active_trans) > 0)
- delay(100);
-
- /* force the log to unpin objects from the now complete transactions */
- xfs_log_force(mp, XFS_LOG_SYNC);
-
- /* reclaim inodes to do any IO before the freeze completes */
- xfs_reclaim_inodes(mp, 0);
- xfs_reclaim_inodes(mp, SYNC_WAIT);
-
- /* Push the superblock and write an unmount record */
- error = xfs_log_sbcount(mp);
- if (error)
- xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
- "Frozen image may not be consistent.");
- /*
- * Just warn here till VFS can correctly support
- * read-only remount without racing.
- */
- WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
- xfs_log_quiesce(mp);
+ st->f_bfree = xfs_rtbxlen_to_blen(mp,
+ xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
+ st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp,
+ mp->m_free[XC_FREE_RTEXTENTS].res_total);
}
-STATIC int
-xfs_test_remount_options(
- struct super_block *sb,
+static void
+xfs_statfs_inodes(
struct xfs_mount *mp,
- char *options)
+ struct kstatfs *st)
{
- int error = 0;
- struct xfs_mount *tmp_mp;
+ uint64_t icount = percpu_counter_sum(&mp->m_icount);
+ uint64_t ifree = percpu_counter_sum(&mp->m_ifree);
+ uint64_t fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree);
- tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
- if (!tmp_mp)
- return -ENOMEM;
+ st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
+ if (M_IGEO(mp)->maxicount)
+ st->f_files = min_t(typeof(st->f_files), st->f_files,
+ M_IGEO(mp)->maxicount);
- tmp_mp->m_super = sb;
- error = xfs_parseargs(tmp_mp, options);
- xfs_free_fsname(tmp_mp);
- kfree(tmp_mp);
+ /* If sb_icount overshot maxicount, report actual allocation */
+ st->f_files = max_t(typeof(st->f_files), st->f_files,
+ mp->m_sb.sb_icount);
- return error;
+ /* Make sure st->f_ffree does not underflow */
+ st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree));
}
STATIC int
-xfs_fs_remount(
- struct super_block *sb,
- int *flags,
- char *options)
+xfs_fs_statfs(
+ struct dentry *dentry,
+ struct kstatfs *st)
{
- struct xfs_mount *mp = XFS_M(sb);
- xfs_sb_t *sbp = &mp->m_sb;
- substring_t args[MAX_OPT_ARGS];
- char *p;
- int error;
-
- /* First, check for complete junk; i.e. invalid options */
- error = xfs_test_remount_options(sb, mp, options);
- if (error)
- return error;
+ struct xfs_mount *mp = XFS_M(dentry->d_sb);
+ struct xfs_inode *ip = XFS_I(d_inode(dentry));
- sync_filesystem(sb);
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
-
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_barrier:
- xfs_warn(mp, "%s option is deprecated, ignoring.", p);
- mp->m_flags |= XFS_MOUNT_BARRIER;
- break;
- case Opt_nobarrier:
- xfs_warn(mp, "%s option is deprecated, ignoring.", p);
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- break;
- case Opt_inode64:
- mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
- mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
- break;
- case Opt_inode32:
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
- break;
- default:
- /*
- * Logically we would return an error here to prevent
- * users from believing they might have changed
- * mount options using remount which can't be changed.
- *
- * But unfortunately mount(8) adds all options from
- * mtab and fstab to the mount arguments in some cases
- * so we can't blindly reject options, but have to
- * check for each specified option if it actually
- * differs from the currently set option and only
- * reject it if that's the case.
- *
- * Until that is implemented we return success for
- * every remount request, and silently ignore all
- * options that we can't actually change.
- */
-#if 0
- xfs_info(mp,
- "mount option \"%s\" not supported for remount", p);
- return -EINVAL;
-#else
- break;
-#endif
- }
- }
+ /*
+ * Expedite background inodegc but don't wait. We do not want to block
+ * here waiting hours for a billion extent file to be truncated.
+ */
+ xfs_inodegc_push(mp);
- /* ro -> rw */
- if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
- if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
- xfs_warn(mp,
- "ro->rw transition prohibited on norecovery mount");
- return -EINVAL;
- }
+ st->f_type = XFS_SUPER_MAGIC;
+ st->f_namelen = MAXNAMELEN - 1;
+ st->f_bsize = mp->m_sb.sb_blocksize;
+ st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev));
- if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
- xfs_sb_has_ro_compat_feature(sbp,
- XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
- xfs_warn(mp,
-"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
- (sbp->sb_features_ro_compat &
- XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
- return -EINVAL;
- }
+ xfs_statfs_data(mp, st);
+ xfs_statfs_inodes(mp, st);
- mp->m_flags &= ~XFS_MOUNT_RDONLY;
+ if (XFS_IS_REALTIME_MOUNT(mp) &&
+ (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
+ xfs_statfs_rt(mp, st);
- /*
- * If this is the first remount to writeable state we
- * might have some superblock changes to update.
- */
- if (mp->m_update_sb) {
- error = xfs_sync_sb(mp, false);
- if (error) {
- xfs_warn(mp, "failed to write sb changes");
- return error;
- }
- mp->m_update_sb = false;
- }
+ if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
+ xfs_qm_statvfs(ip, st);
- /*
- * Fill out the reserve pool if it is empty. Use the stashed
- * value if it is non-zero, otherwise go with the default.
- */
- xfs_restore_resvblks(mp);
- xfs_log_work_queue(mp);
- xfs_queue_eofblocks(mp);
+ /*
+ * XFS does not distinguish between blocks available to privileged and
+ * unprivileged users.
+ */
+ st->f_bavail = st->f_bfree;
+ return 0;
+}
- /* Recover any CoW blocks that never got remapped. */
- error = xfs_reflink_recover_cow(mp);
- if (error) {
- xfs_err(mp,
- "Error %d recovering leftover CoW allocations.", error);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
+STATIC void
+xfs_save_resvblks(
+ struct xfs_mount *mp)
+{
+ enum xfs_free_counter i;
- /* Create the per-AG metadata reservation pool .*/
- error = xfs_fs_reserve_ag_blocks(mp);
- if (error && error != -ENOSPC)
- return error;
+ for (i = 0; i < XC_FREE_NR; i++) {
+ mp->m_free[i].res_saved = mp->m_free[i].res_total;
+ xfs_reserve_blocks(mp, i, 0);
}
+}
- /* rw -> ro */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
- /* Free the per-AG metadata reservation pool. */
- error = xfs_fs_unreserve_ag_blocks(mp);
- if (error) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
-
- /*
- * Before we sync the metadata, we need to free up the reserve
- * block pool so that the used block count in the superblock on
- * disk is correct at the end of the remount. Stash the current
- * reserve pool size so that if we get remounted rw, we can
- * return it to the same size.
- */
- xfs_save_resvblks(mp);
-
- /*
- * Cancel background eofb scanning so it cannot race with the
- * final log force+buftarg wait and deadlock the remount.
- */
- cancel_delayed_work_sync(&mp->m_eofblocks_work);
-
- xfs_quiesce_attr(mp);
- mp->m_flags |= XFS_MOUNT_RDONLY;
+STATIC void
+xfs_restore_resvblks(
+ struct xfs_mount *mp)
+{
+ uint64_t resblks;
+ enum xfs_free_counter i;
+
+ for (i = 0; i < XC_FREE_NR; i++) {
+ if (mp->m_free[i].res_saved) {
+ resblks = mp->m_free[i].res_saved;
+ mp->m_free[i].res_saved = 0;
+ } else
+ resblks = xfs_default_resblks(mp, i);
+ xfs_reserve_blocks(mp, i, resblks);
}
-
- return 0;
}
/*
@@ -1392,10 +1022,33 @@ xfs_fs_freeze(
struct super_block *sb)
{
struct xfs_mount *mp = XFS_M(sb);
+ unsigned int flags;
+ int ret;
+ /*
+ * The filesystem is now frozen far enough that memory reclaim
+ * cannot safely operate on the filesystem. Hence we need to
+ * set a GFP_NOFS context here to avoid recursion deadlocks.
+ */
+ flags = memalloc_nofs_save();
xfs_save_resvblks(mp);
- xfs_quiesce_attr(mp);
- return xfs_sync_sb(mp, true);
+ ret = xfs_log_quiesce(mp);
+ memalloc_nofs_restore(flags);
+
+ /*
+ * For read-write filesystems, we need to restart the inodegc on error
+ * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
+ * going to be run to restart it now. We are at SB_FREEZE_FS level
+ * here, so we can restart safely without racing with a stop in
+ * xfs_fs_sync_fs().
+ */
+ if (ret && !xfs_is_readonly(mp)) {
+ xfs_blockgc_start(mp);
+ xfs_inodegc_start(mp);
+ xfs_zone_gc_start(mp);
+ }
+
+ return ret;
}
STATIC int
@@ -1406,15 +1059,20 @@ xfs_fs_unfreeze(
xfs_restore_resvblks(mp);
xfs_log_work_queue(mp);
- return 0;
-}
-STATIC int
-xfs_fs_show_options(
- struct seq_file *m,
- struct dentry *root)
-{
- return xfs_showargs(XFS_M(root->d_sb), m);
+ /*
+ * Don't reactivate the inodegc worker on a readonly filesystem because
+ * inodes are sent directly to reclaim. Don't reactivate the blockgc
+ * worker because there are no speculative preallocations on a readonly
+ * filesystem.
+ */
+ if (!xfs_is_readonly(mp)) {
+ xfs_zone_gc_start(mp);
+ xfs_blockgc_start(mp);
+ xfs_inodegc_start(mp);
+ }
+
+ return 0;
}
/*
@@ -1425,10 +1083,8 @@ STATIC int
xfs_finish_flags(
struct xfs_mount *mp)
{
- int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
/* Fail a mount where the logbuf is smaller than the log stripe */
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+ if (xfs_has_logv2(mp)) {
if (mp->m_logbsize <= 0 &&
mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
mp->m_logbsize = mp->m_sb.sb_logsunit;
@@ -1448,40 +1104,35 @@ xfs_finish_flags(
}
/*
- * V5 filesystems always use attr2 format for attributes.
- */
- if (xfs_sb_version_hascrc(&mp->m_sb) &&
- (mp->m_flags & XFS_MOUNT_NOATTR2)) {
- xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
- "attr2 is always enabled for V5 filesystems.");
- return -EINVAL;
- }
-
- /*
- * mkfs'ed attr2 will turn on attr2 mount unless explicitly
- * told by noattr2 to turn it off
- */
- if (xfs_sb_version_hasattr2(&mp->m_sb) &&
- !(mp->m_flags & XFS_MOUNT_NOATTR2))
- mp->m_flags |= XFS_MOUNT_ATTR2;
-
- /*
* prohibit r/w mounts of read-only filesystems
*/
- if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
xfs_warn(mp,
"cannot mount a read-only filesystem as read-write");
return -EROFS;
}
- if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
- (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
- !xfs_sb_version_has_pquotino(&mp->m_sb)) {
+ if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
+ (mp->m_qflags & XFS_PQUOTA_ACCT) &&
+ !xfs_has_pquotino(mp)) {
xfs_warn(mp,
"Super block does not support project and group quota together");
return -EINVAL;
}
+ if (!xfs_has_zoned(mp)) {
+ if (mp->m_max_open_zones) {
+ xfs_warn(mp,
+"max_open_zones mount option only supported on zoned file systems.");
+ return -EINVAL;
+ }
+ if (mp->m_features & XFS_FEAT_NOLIFETIME) {
+ xfs_warn(mp,
+"nolifetime mount option only supported on zoned file systems.");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1489,7 +1140,8 @@ static int
xfs_init_percpu_counters(
struct xfs_mount *mp)
{
- int error;
+ int error;
+ int i;
error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
if (error)
@@ -1499,12 +1151,29 @@ xfs_init_percpu_counters(
if (error)
goto free_icount;
- error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+ error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
if (error)
goto free_ifree;
+ error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
+ if (error)
+ goto free_delalloc;
+
+ for (i = 0; i < XC_FREE_NR; i++) {
+ error = percpu_counter_init(&mp->m_free[i].count, 0,
+ GFP_KERNEL);
+ if (error)
+ goto free_freecounters;
+ }
+
return 0;
+free_freecounters:
+ while (--i >= 0)
+ percpu_counter_destroy(&mp->m_free[i].count);
+ percpu_counter_destroy(&mp->m_delalloc_rtextents);
+free_delalloc:
+ percpu_counter_destroy(&mp->m_delalloc_blks);
free_ifree:
percpu_counter_destroy(&mp->m_ifree);
free_icount:
@@ -1518,48 +1187,516 @@ xfs_reinit_percpu_counters(
{
percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
- percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
+ xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks);
+ if (!xfs_has_zoned(mp))
+ xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
+ mp->m_sb.sb_frextents);
}
static void
xfs_destroy_percpu_counters(
struct xfs_mount *mp)
{
+ enum xfs_free_counter i;
+
+ for (i = 0; i < XC_FREE_NR; i++)
+ percpu_counter_destroy(&mp->m_free[i].count);
percpu_counter_destroy(&mp->m_icount);
percpu_counter_destroy(&mp->m_ifree);
- percpu_counter_destroy(&mp->m_fdblocks);
+ ASSERT(xfs_is_shutdown(mp) ||
+ percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
+ percpu_counter_destroy(&mp->m_delalloc_rtextents);
+ ASSERT(xfs_is_shutdown(mp) ||
+ percpu_counter_sum(&mp->m_delalloc_blks) == 0);
+ percpu_counter_destroy(&mp->m_delalloc_blks);
}
-STATIC int
-xfs_fs_fill_super(
+static int
+xfs_inodegc_init_percpu(
+ struct xfs_mount *mp)
+{
+ struct xfs_inodegc *gc;
+ int cpu;
+
+ mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
+ if (!mp->m_inodegc)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ gc = per_cpu_ptr(mp->m_inodegc, cpu);
+ gc->cpu = cpu;
+ gc->mp = mp;
+ init_llist_head(&gc->list);
+ gc->items = 0;
+ gc->error = 0;
+ INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
+ }
+ return 0;
+}
+
+static void
+xfs_inodegc_free_percpu(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_inodegc)
+ return;
+ free_percpu(mp->m_inodegc);
+}
+
+static void
+xfs_fs_put_super(
+ struct super_block *sb)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+
+ xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
+ xfs_filestream_unmount(mp);
+ xfs_unmountfs(mp);
+
+ xfs_rtmount_freesb(mp);
+ xfs_freesb(mp);
+ xchk_mount_stats_free(mp);
+ free_percpu(mp->m_stats.xs_stats);
+ xfs_inodegc_free_percpu(mp);
+ xfs_destroy_percpu_counters(mp);
+ xfs_destroy_mount_workqueues(mp);
+ xfs_shutdown_devices(mp);
+}
+
+static long
+xfs_fs_nr_cached_objects(
struct super_block *sb,
- void *data,
- int silent)
+ struct shrink_control *sc)
{
- struct inode *root;
- struct xfs_mount *mp = NULL;
- int flags = 0, error = -ENOMEM;
+ /* Paranoia: catch incorrect calls during mount setup or teardown */
+ if (WARN_ON_ONCE(!sb->s_fs_info))
+ return 0;
+ return xfs_reclaim_inodes_count(XFS_M(sb));
+}
- mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
- if (!mp)
- goto out;
+static long
+xfs_fs_free_cached_objects(
+ struct super_block *sb,
+ struct shrink_control *sc)
+{
+ return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
+}
- spin_lock_init(&mp->m_sb_lock);
- mutex_init(&mp->m_growlock);
- atomic_set(&mp->m_active_trans, 0);
- INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
- INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
- INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
- mp->m_kobj.kobject.kset = xfs_kset;
+static void
+xfs_fs_shutdown(
+ struct super_block *sb)
+{
+ xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
+}
+
+static int
+xfs_fs_show_stats(
+ struct seq_file *m,
+ struct dentry *root)
+{
+ struct xfs_mount *mp = XFS_M(root->d_sb);
+
+ if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT))
+ xfs_zoned_show_stats(m, mp);
+ return 0;
+}
+
+static const struct super_operations xfs_super_operations = {
+ .alloc_inode = xfs_fs_alloc_inode,
+ .destroy_inode = xfs_fs_destroy_inode,
+ .dirty_inode = xfs_fs_dirty_inode,
+ .drop_inode = xfs_fs_drop_inode,
+ .evict_inode = xfs_fs_evict_inode,
+ .put_super = xfs_fs_put_super,
+ .sync_fs = xfs_fs_sync_fs,
+ .freeze_fs = xfs_fs_freeze,
+ .unfreeze_fs = xfs_fs_unfreeze,
+ .statfs = xfs_fs_statfs,
+ .show_options = xfs_fs_show_options,
+ .nr_cached_objects = xfs_fs_nr_cached_objects,
+ .free_cached_objects = xfs_fs_free_cached_objects,
+ .shutdown = xfs_fs_shutdown,
+ .show_stats = xfs_fs_show_stats,
+};
+
+static int
+suffix_kstrtoint(
+ const char *s,
+ unsigned int base,
+ int *res)
+{
+ int last, shift_left_factor = 0, _res;
+ char *value;
+ int ret = 0;
+
+ value = kstrdup(s, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ last = strlen(value) - 1;
+ if (value[last] == 'K' || value[last] == 'k') {
+ shift_left_factor = 10;
+ value[last] = '\0';
+ }
+ if (value[last] == 'M' || value[last] == 'm') {
+ shift_left_factor = 20;
+ value[last] = '\0';
+ }
+ if (value[last] == 'G' || value[last] == 'g') {
+ shift_left_factor = 30;
+ value[last] = '\0';
+ }
+
+ if (kstrtoint(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
+ *res = _res << shift_left_factor;
+ return ret;
+}
+
+static int
+suffix_kstrtoull(
+ const char *s,
+ unsigned int base,
+ unsigned long long *res)
+{
+ int last, shift_left_factor = 0;
+ unsigned long long _res;
+ char *value;
+ int ret = 0;
+
+ value = kstrdup(s, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ last = strlen(value) - 1;
+ if (value[last] == 'K' || value[last] == 'k') {
+ shift_left_factor = 10;
+ value[last] = '\0';
+ }
+ if (value[last] == 'M' || value[last] == 'm') {
+ shift_left_factor = 20;
+ value[last] = '\0';
+ }
+ if (value[last] == 'G' || value[last] == 'g') {
+ shift_left_factor = 30;
+ value[last] = '\0';
+ }
+
+ if (kstrtoull(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
+ *res = _res << shift_left_factor;
+ return ret;
+}
+
+static inline void
+xfs_fs_warn_deprecated(
+ struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ /*
+ * Always warn about someone passing in a deprecated mount option.
+ * Previously we wouldn't print the warning if we were reconfiguring
+ * and current mount point already had the flag set, but that was not
+ * the right thing to do.
+ *
+ * Many distributions mount the root filesystem with no options in the
+ * initramfs and rely on mount -a to remount the root fs with the
+ * options in fstab. However, the old behavior meant that there would
+ * never be a warning about deprecated mount options for the root fs in
+ * /etc/fstab. On a single-fs system, that means no warning at all.
+ *
+ * Compounding this problem are distribution scripts that copy
+ * /proc/mounts to fstab, which means that we can't remove mount
+ * options unless we're 100% sure they have only ever been advertised
+ * in /proc/mounts in response to explicitly provided mount options.
+ */
+ xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
+}
+
+/*
+ * Set mount state from a mount option.
+ *
+ * NOTE: mp->m_super is NULL here!
+ */
+static int
+xfs_fs_parse_param(
+ struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct xfs_mount *parsing_mp = fc->s_fs_info;
+ struct fs_parse_result result;
+ int size = 0;
+ int opt;
+
+ BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL);
+
+ opt = fs_parse(fc, xfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Op_deprecated:
+ xfs_fs_warn_deprecated(fc, param);
+ return 0;
+ case Opt_logbufs:
+ parsing_mp->m_logbufs = result.uint_32;
+ return 0;
+ case Opt_logbsize:
+ if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize))
+ return -EINVAL;
+ return 0;
+ case Opt_logdev:
+ kfree(parsing_mp->m_logname);
+ parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL);
+ if (!parsing_mp->m_logname)
+ return -ENOMEM;
+ return 0;
+ case Opt_rtdev:
+ kfree(parsing_mp->m_rtname);
+ parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
+ if (!parsing_mp->m_rtname)
+ return -ENOMEM;
+ return 0;
+ case Opt_allocsize:
+ if (suffix_kstrtoint(param->string, 10, &size))
+ return -EINVAL;
+ parsing_mp->m_allocsize_log = ffs(size) - 1;
+ parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
+ return 0;
+ case Opt_grpid:
+ case Opt_bsdgroups:
+ parsing_mp->m_features |= XFS_FEAT_GRPID;
+ return 0;
+ case Opt_nogrpid:
+ case Opt_sysvgroups:
+ parsing_mp->m_features &= ~XFS_FEAT_GRPID;
+ return 0;
+ case Opt_wsync:
+ parsing_mp->m_features |= XFS_FEAT_WSYNC;
+ return 0;
+ case Opt_norecovery:
+ parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
+ return 0;
+ case Opt_noalign:
+ parsing_mp->m_features |= XFS_FEAT_NOALIGN;
+ return 0;
+ case Opt_swalloc:
+ parsing_mp->m_features |= XFS_FEAT_SWALLOC;
+ return 0;
+ case Opt_sunit:
+ parsing_mp->m_dalign = result.uint_32;
+ return 0;
+ case Opt_swidth:
+ parsing_mp->m_swidth = result.uint_32;
+ return 0;
+ case Opt_inode32:
+ parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
+ return 0;
+ case Opt_inode64:
+ parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
+ return 0;
+ case Opt_nouuid:
+ parsing_mp->m_features |= XFS_FEAT_NOUUID;
+ return 0;
+ case Opt_largeio:
+ parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
+ return 0;
+ case Opt_nolargeio:
+ parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
+ return 0;
+ case Opt_filestreams:
+ parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
+ return 0;
+ case Opt_noquota:
+ parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
+ parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_quota:
+ case Opt_uquota:
+ case Opt_usrquota:
+ parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_qnoenforce:
+ case Opt_uqnoenforce:
+ parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
+ parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_pquota:
+ case Opt_prjquota:
+ parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_pqnoenforce:
+ parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
+ parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_gquota:
+ case Opt_grpquota:
+ parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_gqnoenforce:
+ parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
+ parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
+ return 0;
+ case Opt_discard:
+ parsing_mp->m_features |= XFS_FEAT_DISCARD;
+ return 0;
+ case Opt_nodiscard:
+ parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
+ return 0;
+#ifdef CONFIG_FS_DAX
+ case Opt_dax:
+ xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS);
+ return 0;
+ case Opt_dax_enum:
+ xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
+ return 0;
+#endif
+ case Opt_max_open_zones:
+ parsing_mp->m_max_open_zones = result.uint_32;
+ return 0;
+ case Opt_lifetime:
+ parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME;
+ return 0;
+ case Opt_nolifetime:
+ parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
+ return 0;
+ case Opt_max_atomic_write:
+ if (suffix_kstrtoull(param->string, 10,
+ &parsing_mp->m_awu_max_bytes)) {
+ xfs_warn(parsing_mp,
+ "max atomic write size must be positive integer");
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+xfs_fs_validate_params(
+ struct xfs_mount *mp)
+{
+ /* No recovery flag requires a read-only mount */
+ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
+ xfs_warn(mp, "no-recovery mounts must be read-only.");
+ return -EINVAL;
+ }
+
+ if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
+ xfs_warn(mp,
+ "sunit and swidth options incompatible with the noalign option");
+ return -EINVAL;
+ }
+
+ if (!IS_ENABLED(CONFIG_XFS_QUOTA) &&
+ (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) {
+ xfs_warn(mp, "quota support not available in this kernel.");
+ return -EINVAL;
+ }
+
+ if ((mp->m_dalign && !mp->m_swidth) ||
+ (!mp->m_dalign && mp->m_swidth)) {
+ xfs_warn(mp, "sunit and swidth must be specified together");
+ return -EINVAL;
+ }
+
+ if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
+ xfs_warn(mp,
+ "stripe width (%d) must be a multiple of the stripe unit (%d)",
+ mp->m_swidth, mp->m_dalign);
+ return -EINVAL;
+ }
+
+ if (mp->m_logbufs != -1 &&
+ mp->m_logbufs != 0 &&
+ (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+ mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+ xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
+ mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+ return -EINVAL;
+ }
+
+ if (mp->m_logbsize != -1 &&
+ mp->m_logbsize != 0 &&
+ (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+ mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+ !is_power_of_2(mp->m_logbsize))) {
+ xfs_warn(mp,
+ "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+ mp->m_logbsize);
+ return -EINVAL;
+ }
+
+ if (xfs_has_allocsize(mp) &&
+ (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
+ mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
+ xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
+ mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct dentry *
+xfs_debugfs_mkdir(
+ const char *name,
+ struct dentry *parent)
+{
+ struct dentry *child;
+
+ /* Apparently we're expected to ignore error returns?? */
+ child = debugfs_create_dir(name, parent);
+ if (IS_ERR(child))
+ return NULL;
+
+ return child;
+}
+
+static int
+xfs_fs_fill_super(
+ struct super_block *sb,
+ struct fs_context *fc)
+{
+ struct xfs_mount *mp = sb->s_fs_info;
+ struct inode *root;
+ int flags = 0, error;
mp->m_super = sb;
- sb->s_fs_info = mp;
- error = xfs_parseargs(mp, (char *)data);
+ /*
+ * Copy VFS mount flags from the context now that all parameter parsing
+ * is guaranteed to have been completed by either the old mount API or
+ * the newer fsopen/fsconfig API.
+ */
+ if (fc->sb_flags & SB_RDONLY)
+ xfs_set_readonly(mp);
+ if (fc->sb_flags & SB_DIRSYNC)
+ mp->m_features |= XFS_FEAT_DIRSYNC;
+ if (fc->sb_flags & SB_SYNCHRONOUS)
+ mp->m_features |= XFS_FEAT_WSYNC;
+
+ error = xfs_fs_validate_params(mp);
if (error)
- goto out_free_fsname;
+ return error;
- sb_min_blocksize(sb, BBSIZE);
+ if (!sb_min_blocksize(sb, BBSIZE)) {
+ xfs_err(mp, "unable to set blocksize");
+ return -EINVAL;
+ }
sb->s_xattr = xfs_xattr_handlers;
sb->s_export_op = &xfs_export_operations;
#ifdef CONFIG_XFS_QUOTA
@@ -1568,32 +1705,58 @@ xfs_fs_fill_super(
#endif
sb->s_op = &xfs_super_operations;
- if (silent)
+ /*
+ * Delay mount work if the debug hook is set. This is debug
+ * instrumention to coordinate simulation of xfs mount failures with
+ * VFS superblock operations
+ */
+ if (xfs_globals.mount_delay) {
+ xfs_notice(mp, "Delaying mount for %d seconds.",
+ xfs_globals.mount_delay);
+ msleep(xfs_globals.mount_delay * 1000);
+ }
+
+ if (fc->sb_flags & SB_SILENT)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_free_fsname;
+ return error;
+
+ if (xfs_debugfs) {
+ mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id,
+ xfs_debugfs);
+ } else {
+ mp->m_debugfs = NULL;
+ }
error = xfs_init_mount_workqueues(mp);
if (error)
- goto out_close_devices;
+ goto out_shutdown_devices;
error = xfs_init_percpu_counters(mp);
if (error)
goto out_destroy_workqueues;
+ error = xfs_inodegc_init_percpu(mp);
+ if (error)
+ goto out_destroy_counters;
+
/* Allocate stats memory before we do operations that might use it */
mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
if (!mp->m_stats.xs_stats) {
error = -ENOMEM;
- goto out_destroy_counters;
+ goto out_destroy_inodegc;
}
- error = xfs_readsb(mp, flags);
+ error = xchk_mount_stats_alloc(mp);
if (error)
goto out_free_stats;
+ error = xfs_readsb(mp, flags);
+ if (error)
+ goto out_free_scrub_stats;
+
error = xfs_finish_flags(mp);
if (error)
goto out_free_sb;
@@ -1602,55 +1765,197 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
- error = xfs_filestream_mount(mp);
+ /*
+ * V4 support is undergoing deprecation.
+ *
+ * Note: this has to use an open coded m_features check as xfs_has_crc
+ * always returns false for !CONFIG_XFS_SUPPORT_V4.
+ */
+ if (!(mp->m_features & XFS_FEAT_CRC)) {
+ if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) {
+ xfs_warn(mp,
+ "Deprecated V4 format (crc=0) not supported by kernel.");
+ error = -EINVAL;
+ goto out_free_sb;
+ }
+ xfs_warn_once(mp,
+ "Deprecated V4 format (crc=0) will not be supported after September 2030.");
+ }
+
+ /* ASCII case insensitivity is undergoing deprecation. */
+ if (xfs_has_asciici(mp)) {
+#ifdef CONFIG_XFS_SUPPORT_ASCII_CI
+ xfs_warn_once(mp,
+ "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030.");
+#else
+ xfs_warn(mp,
+ "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel.");
+ error = -EINVAL;
+ goto out_free_sb;
+#endif
+ }
+
+ /*
+ * Filesystem claims it needs repair, so refuse the mount unless
+ * norecovery is also specified, in which case the filesystem can
+ * be mounted with no risk of further damage.
+ */
+ if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
+ xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair.");
+ error = -EFSCORRUPTED;
+ goto out_free_sb;
+ }
+
+ /*
+ * Don't touch the filesystem if a user tool thinks it owns the primary
+ * superblock. mkfs doesn't clear the flag from secondary supers, so
+ * we don't check them at all.
+ */
+ if (mp->m_sb.sb_inprogress) {
+ xfs_warn(mp, "Offline file system operation in progress!");
+ error = -EFSCORRUPTED;
+ goto out_free_sb;
+ }
+
+ if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
+ size_t max_folio_size = mapping_max_folio_size_supported();
+
+ if (!xfs_has_crc(mp)) {
+ xfs_warn(mp,
+"V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
+ mp->m_sb.sb_blocksize, PAGE_SIZE);
+ error = -ENOSYS;
+ goto out_free_sb;
+ }
+
+ if (mp->m_sb.sb_blocksize > max_folio_size) {
+ xfs_warn(mp,
+"block size (%u bytes) not supported; Only block size (%zu) or less is supported",
+ mp->m_sb.sb_blocksize, max_folio_size);
+ error = -ENOSYS;
+ goto out_free_sb;
+ }
+
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
+ }
+
+ /* Ensure this filesystem fits in the page cache limits */
+ if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
+ xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
+ xfs_warn(mp,
+ "file system too large to be mounted on this system.");
+ error = -EFBIG;
+ goto out_free_sb;
+ }
+
+ /*
+ * XFS block mappings use 54 bits to store the logical block offset.
+ * This should suffice to handle the maximum file size that the VFS
+ * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
+ * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
+ * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
+ * to check this assertion.
+ *
+ * Avoid integer overflow by comparing the maximum bmbt offset to the
+ * maximum pagecache offset in units of fs blocks.
+ */
+ if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
+ xfs_warn(mp,
+"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
+ XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
+ XFS_MAX_FILEOFF);
+ error = -EINVAL;
+ goto out_free_sb;
+ }
+
+ error = xfs_rtmount_readsb(mp);
if (error)
goto out_free_sb;
+ error = xfs_filestream_mount(mp);
+ if (error)
+ goto out_free_rtsb;
+
/*
* we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes.
*/
- sb->s_magic = XFS_SB_MAGIC;
+ sb->s_magic = XFS_SUPER_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
- sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
+ if (xfs_has_bigtime(mp)) {
+ sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
+ sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
+ } else {
+ sb->s_time_min = XFS_LEGACY_TIME_MIN;
+ sb->s_time_max = XFS_LEGACY_TIME_MAX;
+ }
+ trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
+ sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
+
set_posix_acl_flag(sb);
/* version 5 superblocks support inode version counters. */
- if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
- sb->s_flags |= MS_I_VERSION;
+ if (xfs_has_crc(mp))
+ sb->s_flags |= SB_I_VERSION;
+
+ if (xfs_has_dax_always(mp)) {
+ error = xfs_setup_dax_always(mp);
+ if (error)
+ goto out_filestream_unmount;
+ }
- if (mp->m_flags & XFS_MOUNT_DAX) {
+ if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
xfs_warn(mp,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ "mounting with \"discard\" option, but the device does not support discard");
+ mp->m_features &= ~XFS_FEAT_DISCARD;
+ }
- error = bdev_dax_supported(sb, sb->s_blocksize);
- if (error) {
+ if (xfs_has_zoned(mp)) {
+ if (!xfs_has_metadir(mp)) {
xfs_alert(mp,
- "DAX unsupported by block device. Turning off DAX.");
- mp->m_flags &= ~XFS_MOUNT_DAX;
+ "metadir feature required for zoned realtime devices.");
+ error = -EINVAL;
+ goto out_filestream_unmount;
}
- if (xfs_sb_version_hasreflink(&mp->m_sb))
- xfs_alert(mp,
- "DAX and reflink have not been tested together!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
+ } else if (xfs_has_metadir(mp)) {
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
}
- if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
- if (mp->m_sb.sb_rblocks) {
+ if (xfs_has_reflink(mp)) {
+ if (xfs_has_realtime(mp) &&
+ !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) {
xfs_alert(mp,
- "EXPERIMENTAL reverse mapping btree not compatible with realtime device!");
+ "reflink not compatible with realtime extent size %u!",
+ mp->m_sb.sb_rextsize);
error = -EINVAL;
goto out_filestream_unmount;
}
- xfs_alert(mp,
- "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
+
+ if (xfs_has_zoned(mp)) {
+ xfs_alert(mp,
+ "reflink not compatible with zoned RT device!");
+ error = -EINVAL;
+ goto out_filestream_unmount;
+ }
+
+ if (xfs_globals.always_cow) {
+ xfs_info(mp, "using DEBUG-only always_cow mode.");
+ mp->m_always_cow = true;
+ }
}
- if (xfs_sb_version_hasreflink(&mp->m_sb))
- xfs_alert(mp,
- "EXPERIMENTAL reflink feature enabled. Use at your own risk!");
+ /*
+ * If no quota mount options were provided, maybe we'll try to pick
+ * up the quota accounting and enforcement flags from the ondisk sb.
+ */
+ if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS))
+ xfs_set_resuming_quotaon(mp);
+ mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
error = xfs_mountfs(mp);
if (error)
@@ -1671,285 +1976,595 @@ xfs_fs_fill_super(
out_filestream_unmount:
xfs_filestream_unmount(mp);
+ out_free_rtsb:
+ xfs_rtmount_freesb(mp);
out_free_sb:
xfs_freesb(mp);
+ out_free_scrub_stats:
+ xchk_mount_stats_free(mp);
out_free_stats:
free_percpu(mp->m_stats.xs_stats);
+ out_destroy_inodegc:
+ xfs_inodegc_free_percpu(mp);
out_destroy_counters:
xfs_destroy_percpu_counters(mp);
out_destroy_workqueues:
xfs_destroy_mount_workqueues(mp);
- out_close_devices:
- xfs_close_devices(mp);
- out_free_fsname:
- xfs_free_fsname(mp);
- kfree(mp);
- out:
+ out_shutdown_devices:
+ xfs_shutdown_devices(mp);
return error;
out_unmount:
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
- goto out_free_sb;
+ goto out_free_rtsb;
}
-STATIC void
-xfs_fs_put_super(
- struct super_block *sb)
+static int
+xfs_fs_get_tree(
+ struct fs_context *fc)
{
- struct xfs_mount *mp = XFS_M(sb);
+ return get_tree_bdev(fc, xfs_fs_fill_super);
+}
- xfs_notice(mp, "Unmounting Filesystem");
- xfs_filestream_unmount(mp);
- xfs_unmountfs(mp);
+static int
+xfs_remount_rw(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ int error;
- xfs_freesb(mp);
- free_percpu(mp->m_stats.xs_stats);
- xfs_destroy_percpu_counters(mp);
- xfs_destroy_mount_workqueues(mp);
- xfs_close_devices(mp);
- xfs_free_fsname(mp);
- kfree(mp);
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
+ xfs_readonly_buftarg(mp->m_logdev_targp)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited by read-only logdev");
+ return -EACCES;
+ }
+
+ if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited by read-only rtdev");
+ return -EACCES;
+ }
+
+ if (xfs_has_norecovery(mp)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited on norecovery mount");
+ return -EINVAL;
+ }
+
+ if (xfs_sb_is_v5(sbp) &&
+ xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
+ (sbp->sb_features_ro_compat &
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+ return -EINVAL;
+ }
+
+ xfs_clear_readonly(mp);
+
+ /*
+ * If this is the first remount to writeable state we might have some
+ * superblock changes to update.
+ */
+ if (mp->m_update_sb) {
+ error = xfs_sync_sb(mp, false);
+ if (error) {
+ xfs_warn(mp, "failed to write sb changes");
+ return error;
+ }
+ mp->m_update_sb = false;
+ }
+
+ /*
+ * Fill out the reserve pool if it is empty. Use the stashed value if
+ * it is non-zero, otherwise go with the default.
+ */
+ xfs_restore_resvblks(mp);
+ xfs_log_work_queue(mp);
+ xfs_blockgc_start(mp);
+
+ /* Create the per-AG metadata reservation pool .*/
+ error = xfs_fs_reserve_ag_blocks(mp);
+ if (error && error != -ENOSPC)
+ return error;
+
+ /* Re-enable the background inode inactivation worker. */
+ xfs_inodegc_start(mp);
+
+ /* Restart zone reclaim */
+ xfs_zone_gc_start(mp);
+
+ return 0;
}
-STATIC struct dentry *
-xfs_fs_mount(
- struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
+static int
+xfs_remount_ro(
+ struct xfs_mount *mp)
{
- return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+ struct xfs_icwalk icw = {
+ .icw_flags = XFS_ICWALK_FLAG_SYNC,
+ };
+ int error;
+
+ /* Flush all the dirty data to disk. */
+ error = sync_filesystem(mp->m_super);
+ if (error)
+ return error;
+
+ /*
+ * Cancel background eofb scanning so it cannot race with the final
+ * log force+buftarg wait and deadlock the remount.
+ */
+ xfs_blockgc_stop(mp);
+
+ /*
+ * Clear out all remaining COW staging extents and speculative post-EOF
+ * preallocations so that we don't leave inodes requiring inactivation
+ * cleanups during reclaim on a read-only mount. We must process every
+ * cached inode, so this requires a synchronous cache scan.
+ */
+ error = xfs_blockgc_free_space(mp, &icw);
+ if (error) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+ }
+
+ /*
+ * Stop the inodegc background worker. xfs_fs_reconfigure already
+ * flushed all pending inodegc work when it sync'd the filesystem.
+ * The VFS holds s_umount, so we know that inodes cannot enter
+ * xfs_fs_destroy_inode during a remount operation. In readonly mode
+ * we send inodes straight to reclaim, so no inodes will be queued.
+ */
+ xfs_inodegc_stop(mp);
+
+ /* Stop zone reclaim */
+ xfs_zone_gc_stop(mp);
+
+ /* Free the per-AG metadata reservation pool. */
+ xfs_fs_unreserve_ag_blocks(mp);
+
+ /*
+ * Before we sync the metadata, we need to free up the reserve block
+ * pool so that the used block count in the superblock on disk is
+ * correct at the end of the remount. Stash the current* reserve pool
+ * size so that if we get remounted rw, we can return it to the same
+ * size.
+ */
+ xfs_save_resvblks(mp);
+
+ xfs_log_clean(mp);
+ xfs_set_readonly(mp);
+
+ return 0;
}
-static long
-xfs_fs_nr_cached_objects(
- struct super_block *sb,
- struct shrink_control *sc)
+/*
+ * Logically we would return an error here to prevent users from believing
+ * they might have changed mount options using remount which can't be changed.
+ *
+ * But unfortunately mount(8) adds all options from mtab and fstab to the mount
+ * arguments in some cases so we can't blindly reject options, but have to
+ * check for each specified option if it actually differs from the currently
+ * set option and only reject it if that's the case.
+ *
+ * Until that is implemented we return success for every remount request, and
+ * silently ignore all options that we can't actually change.
+ */
+static int
+xfs_fs_reconfigure(
+ struct fs_context *fc)
{
- return xfs_reclaim_inodes_count(XFS_M(sb));
+ struct xfs_mount *mp = XFS_M(fc->root->d_sb);
+ struct xfs_mount *new_mp = fc->s_fs_info;
+ int flags = fc->sb_flags;
+ int error;
+
+ new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
+
+ /* version 5 superblocks always support version counters. */
+ if (xfs_has_crc(mp))
+ fc->sb_flags |= SB_I_VERSION;
+
+ error = xfs_fs_validate_params(new_mp);
+ if (error)
+ return error;
+
+ /* Validate new max_atomic_write option before making other changes */
+ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
+ error = xfs_set_max_atomic_write_opt(mp,
+ new_mp->m_awu_max_bytes);
+ if (error)
+ return error;
+ }
+
+ /* inode32 -> inode64 */
+ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
+ mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
+ mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
+ }
+
+ /* inode64 -> inode32 */
+ if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
+ mp->m_features |= XFS_FEAT_SMALL_INUMS;
+ mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
+ }
+
+ /*
+ * Now that mp has been modified according to the remount options, we
+ * do a final option validation with xfs_finish_flags() just like it is
+ * just like it is done during mount. We cannot use
+ * done during mount. We cannot use xfs_finish_flags() on new_mp as it
+ * contains only the user given options.
+ */
+ error = xfs_finish_flags(mp);
+ if (error)
+ return error;
+
+ /* ro -> rw */
+ if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
+ error = xfs_remount_rw(mp);
+ if (error)
+ return error;
+ }
+
+ /* rw -> ro */
+ if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
+ error = xfs_remount_ro(mp);
+ if (error)
+ return error;
+ }
+
+ return 0;
}
-static long
-xfs_fs_free_cached_objects(
- struct super_block *sb,
- struct shrink_control *sc)
+static void
+xfs_fs_free(
+ struct fs_context *fc)
{
- return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
+ struct xfs_mount *mp = fc->s_fs_info;
+
+ /*
+ * mp is stored in the fs_context when it is initialized.
+ * mp is transferred to the superblock on a successful mount,
+ * but if an error occurs before the transfer we have to free
+ * it here.
+ */
+ if (mp)
+ xfs_mount_free(mp);
}
-static const struct super_operations xfs_super_operations = {
- .alloc_inode = xfs_fs_alloc_inode,
- .destroy_inode = xfs_fs_destroy_inode,
- .drop_inode = xfs_fs_drop_inode,
- .put_super = xfs_fs_put_super,
- .sync_fs = xfs_fs_sync_fs,
- .freeze_fs = xfs_fs_freeze,
- .unfreeze_fs = xfs_fs_unfreeze,
- .statfs = xfs_fs_statfs,
- .remount_fs = xfs_fs_remount,
- .show_options = xfs_fs_show_options,
- .nr_cached_objects = xfs_fs_nr_cached_objects,
- .free_cached_objects = xfs_fs_free_cached_objects,
+static const struct fs_context_operations xfs_context_ops = {
+ .parse_param = xfs_fs_parse_param,
+ .get_tree = xfs_fs_get_tree,
+ .reconfigure = xfs_fs_reconfigure,
+ .free = xfs_fs_free,
};
+/*
+ * WARNING: do not initialise any parameters in this function that depend on
+ * mount option parsing having already been performed as this can be called from
+ * fsopen() before any parameters have been set.
+ */
+static int
+xfs_init_fs_context(
+ struct fs_context *fc)
+{
+ struct xfs_mount *mp;
+ int i;
+
+ mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+ if (!mp)
+ return -ENOMEM;
+
+ spin_lock_init(&mp->m_sb_lock);
+ for (i = 0; i < XG_TYPE_MAX; i++)
+ xa_init(&mp->m_groups[i].xa);
+ mutex_init(&mp->m_growlock);
+ mutex_init(&mp->m_metafile_resv_lock);
+ INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
+ INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+ mp->m_kobj.kobject.kset = xfs_kset;
+ /*
+ * We don't create the finobt per-ag space reservation until after log
+ * recovery, so we must set this to true so that an ifree transaction
+ * started during log recovery will not depend on space reservations
+ * for finobt expansion.
+ */
+ mp->m_finobt_nores = true;
+
+ /*
+ * These can be overridden by the mount option parsing.
+ */
+ mp->m_logbufs = -1;
+ mp->m_logbsize = -1;
+ mp->m_allocsize_log = 16; /* 64k */
+
+ xfs_hooks_init(&mp->m_dir_update_hooks);
+
+ fc->s_fs_info = mp;
+ fc->ops = &xfs_context_ops;
+
+ return 0;
+}
+
+static void
+xfs_kill_sb(
+ struct super_block *sb)
+{
+ kill_block_super(sb);
+ xfs_mount_free(XFS_M(sb));
+}
+
static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
- .mount = xfs_fs_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = xfs_init_fs_context,
+ .parameters = xfs_fs_parameters,
+ .kill_sb = xfs_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
+ FS_LBS,
};
MODULE_ALIAS_FS("xfs");
STATIC int __init
-xfs_init_zones(void)
+xfs_init_caches(void)
{
- xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
- offsetof(struct xfs_ioend, io_inline_bio),
- BIOSET_NEED_BVECS);
- if (!xfs_ioend_bioset)
+ int error;
+
+ xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT,
+ NULL);
+ if (!xfs_buf_cache)
goto out;
- xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
- "xfs_log_ticket");
- if (!xfs_log_ticket_zone)
- goto out_free_ioend_bioset;
-
- xfs_bmap_free_item_zone = kmem_zone_init(
- sizeof(struct xfs_extent_free_item),
- "xfs_bmap_free_item");
- if (!xfs_bmap_free_item_zone)
- goto out_destroy_log_ticket_zone;
-
- xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
- "xfs_btree_cur");
- if (!xfs_btree_cur_zone)
- goto out_destroy_bmap_free_item_zone;
-
- xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
- "xfs_da_state");
- if (!xfs_da_state_zone)
- goto out_destroy_btree_cur_zone;
-
- xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
- if (!xfs_ifork_zone)
- goto out_destroy_da_state_zone;
-
- xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
- if (!xfs_trans_zone)
- goto out_destroy_ifork_zone;
-
- xfs_log_item_desc_zone =
- kmem_zone_init(sizeof(struct xfs_log_item_desc),
- "xfs_log_item_desc");
- if (!xfs_log_item_desc_zone)
- goto out_destroy_trans_zone;
+ xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket",
+ sizeof(struct xlog_ticket),
+ 0, 0, NULL);
+ if (!xfs_log_ticket_cache)
+ goto out_destroy_buf_cache;
+
+ error = xfs_btree_init_cur_caches();
+ if (error)
+ goto out_destroy_log_ticket_cache;
+
+ error = rcbagbt_init_cur_cache();
+ if (error)
+ goto out_destroy_btree_cur_cache;
+
+ error = xfs_defer_init_item_caches();
+ if (error)
+ goto out_destroy_rcbagbt_cur_cache;
+
+ xfs_da_state_cache = kmem_cache_create("xfs_da_state",
+ sizeof(struct xfs_da_state),
+ 0, 0, NULL);
+ if (!xfs_da_state_cache)
+ goto out_destroy_defer_item_cache;
+
+ xfs_ifork_cache = kmem_cache_create("xfs_ifork",
+ sizeof(struct xfs_ifork),
+ 0, 0, NULL);
+ if (!xfs_ifork_cache)
+ goto out_destroy_da_state_cache;
+
+ xfs_trans_cache = kmem_cache_create("xfs_trans",
+ sizeof(struct xfs_trans),
+ 0, 0, NULL);
+ if (!xfs_trans_cache)
+ goto out_destroy_ifork_cache;
+
/*
- * The size of the zone allocated buf log item is the maximum
+ * The size of the cache-allocated buf log item is the maximum
* size possible under XFS. This wastes a little bit of memory,
* but it is much faster.
*/
- xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
- "xfs_buf_item");
- if (!xfs_buf_item_zone)
- goto out_destroy_log_item_desc_zone;
-
- xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
- ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))), "xfs_efd_item");
- if (!xfs_efd_zone)
- goto out_destroy_buf_item_zone;
-
- xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
- ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))), "xfs_efi_item");
- if (!xfs_efi_zone)
- goto out_destroy_efd_zone;
-
- xfs_inode_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
- KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
- if (!xfs_inode_zone)
- goto out_destroy_efi_zone;
-
- xfs_ili_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
- KM_ZONE_SPREAD, NULL);
- if (!xfs_ili_zone)
- goto out_destroy_inode_zone;
- xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
- "xfs_icr");
- if (!xfs_icreate_zone)
- goto out_destroy_ili_zone;
-
- xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
- "xfs_rud_item");
- if (!xfs_rud_zone)
- goto out_destroy_icreate_zone;
-
- xfs_rui_zone = kmem_zone_init(
+ xfs_buf_item_cache = kmem_cache_create("xfs_buf_item",
+ sizeof(struct xfs_buf_log_item),
+ 0, 0, NULL);
+ if (!xfs_buf_item_cache)
+ goto out_destroy_trans_cache;
+
+ xfs_efd_cache = kmem_cache_create("xfs_efd_item",
+ xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
+ if (!xfs_efd_cache)
+ goto out_destroy_buf_item_cache;
+
+ xfs_efi_cache = kmem_cache_create("xfs_efi_item",
+ xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
+ if (!xfs_efi_cache)
+ goto out_destroy_efd_cache;
+
+ xfs_inode_cache = kmem_cache_create("xfs_inode",
+ sizeof(struct xfs_inode), 0,
+ (SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_ACCOUNT),
+ xfs_fs_inode_init_once);
+ if (!xfs_inode_cache)
+ goto out_destroy_efi_cache;
+
+ xfs_ili_cache = kmem_cache_create("xfs_ili",
+ sizeof(struct xfs_inode_log_item), 0,
+ SLAB_RECLAIM_ACCOUNT,
+ NULL);
+ if (!xfs_ili_cache)
+ goto out_destroy_inode_cache;
+
+ xfs_icreate_cache = kmem_cache_create("xfs_icr",
+ sizeof(struct xfs_icreate_item),
+ 0, 0, NULL);
+ if (!xfs_icreate_cache)
+ goto out_destroy_ili_cache;
+
+ xfs_rud_cache = kmem_cache_create("xfs_rud_item",
+ sizeof(struct xfs_rud_log_item),
+ 0, 0, NULL);
+ if (!xfs_rud_cache)
+ goto out_destroy_icreate_cache;
+
+ xfs_rui_cache = kmem_cache_create("xfs_rui_item",
xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
- "xfs_rui_item");
- if (!xfs_rui_zone)
- goto out_destroy_rud_zone;
+ 0, 0, NULL);
+ if (!xfs_rui_cache)
+ goto out_destroy_rud_cache;
- xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
- "xfs_cud_item");
- if (!xfs_cud_zone)
- goto out_destroy_rui_zone;
+ xfs_cud_cache = kmem_cache_create("xfs_cud_item",
+ sizeof(struct xfs_cud_log_item),
+ 0, 0, NULL);
+ if (!xfs_cud_cache)
+ goto out_destroy_rui_cache;
- xfs_cui_zone = kmem_zone_init(
+ xfs_cui_cache = kmem_cache_create("xfs_cui_item",
xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
- "xfs_cui_item");
- if (!xfs_cui_zone)
- goto out_destroy_cud_zone;
+ 0, 0, NULL);
+ if (!xfs_cui_cache)
+ goto out_destroy_cud_cache;
- xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
- "xfs_bud_item");
- if (!xfs_bud_zone)
- goto out_destroy_cui_zone;
+ xfs_bud_cache = kmem_cache_create("xfs_bud_item",
+ sizeof(struct xfs_bud_log_item),
+ 0, 0, NULL);
+ if (!xfs_bud_cache)
+ goto out_destroy_cui_cache;
- xfs_bui_zone = kmem_zone_init(
+ xfs_bui_cache = kmem_cache_create("xfs_bui_item",
xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
- "xfs_bui_item");
- if (!xfs_bui_zone)
- goto out_destroy_bud_zone;
+ 0, 0, NULL);
+ if (!xfs_bui_cache)
+ goto out_destroy_bud_cache;
+
+ xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
+ sizeof(struct xfs_attrd_log_item),
+ 0, 0, NULL);
+ if (!xfs_attrd_cache)
+ goto out_destroy_bui_cache;
+
+ xfs_attri_cache = kmem_cache_create("xfs_attri_item",
+ sizeof(struct xfs_attri_log_item),
+ 0, 0, NULL);
+ if (!xfs_attri_cache)
+ goto out_destroy_attrd_cache;
+
+ xfs_iunlink_cache = kmem_cache_create("xfs_iul_item",
+ sizeof(struct xfs_iunlink_item),
+ 0, 0, NULL);
+ if (!xfs_iunlink_cache)
+ goto out_destroy_attri_cache;
+
+ xfs_xmd_cache = kmem_cache_create("xfs_xmd_item",
+ sizeof(struct xfs_xmd_log_item),
+ 0, 0, NULL);
+ if (!xfs_xmd_cache)
+ goto out_destroy_iul_cache;
+
+ xfs_xmi_cache = kmem_cache_create("xfs_xmi_item",
+ sizeof(struct xfs_xmi_log_item),
+ 0, 0, NULL);
+ if (!xfs_xmi_cache)
+ goto out_destroy_xmd_cache;
+
+ xfs_parent_args_cache = kmem_cache_create("xfs_parent_args",
+ sizeof(struct xfs_parent_args),
+ 0, 0, NULL);
+ if (!xfs_parent_args_cache)
+ goto out_destroy_xmi_cache;
return 0;
- out_destroy_bud_zone:
- kmem_zone_destroy(xfs_bud_zone);
- out_destroy_cui_zone:
- kmem_zone_destroy(xfs_cui_zone);
- out_destroy_cud_zone:
- kmem_zone_destroy(xfs_cud_zone);
- out_destroy_rui_zone:
- kmem_zone_destroy(xfs_rui_zone);
- out_destroy_rud_zone:
- kmem_zone_destroy(xfs_rud_zone);
- out_destroy_icreate_zone:
- kmem_zone_destroy(xfs_icreate_zone);
- out_destroy_ili_zone:
- kmem_zone_destroy(xfs_ili_zone);
- out_destroy_inode_zone:
- kmem_zone_destroy(xfs_inode_zone);
- out_destroy_efi_zone:
- kmem_zone_destroy(xfs_efi_zone);
- out_destroy_efd_zone:
- kmem_zone_destroy(xfs_efd_zone);
- out_destroy_buf_item_zone:
- kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
- kmem_zone_destroy(xfs_log_item_desc_zone);
- out_destroy_trans_zone:
- kmem_zone_destroy(xfs_trans_zone);
- out_destroy_ifork_zone:
- kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_da_state_zone:
- kmem_zone_destroy(xfs_da_state_zone);
- out_destroy_btree_cur_zone:
- kmem_zone_destroy(xfs_btree_cur_zone);
- out_destroy_bmap_free_item_zone:
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- out_destroy_log_ticket_zone:
- kmem_zone_destroy(xfs_log_ticket_zone);
- out_free_ioend_bioset:
- bioset_free(xfs_ioend_bioset);
+ out_destroy_xmi_cache:
+ kmem_cache_destroy(xfs_xmi_cache);
+ out_destroy_xmd_cache:
+ kmem_cache_destroy(xfs_xmd_cache);
+ out_destroy_iul_cache:
+ kmem_cache_destroy(xfs_iunlink_cache);
+ out_destroy_attri_cache:
+ kmem_cache_destroy(xfs_attri_cache);
+ out_destroy_attrd_cache:
+ kmem_cache_destroy(xfs_attrd_cache);
+ out_destroy_bui_cache:
+ kmem_cache_destroy(xfs_bui_cache);
+ out_destroy_bud_cache:
+ kmem_cache_destroy(xfs_bud_cache);
+ out_destroy_cui_cache:
+ kmem_cache_destroy(xfs_cui_cache);
+ out_destroy_cud_cache:
+ kmem_cache_destroy(xfs_cud_cache);
+ out_destroy_rui_cache:
+ kmem_cache_destroy(xfs_rui_cache);
+ out_destroy_rud_cache:
+ kmem_cache_destroy(xfs_rud_cache);
+ out_destroy_icreate_cache:
+ kmem_cache_destroy(xfs_icreate_cache);
+ out_destroy_ili_cache:
+ kmem_cache_destroy(xfs_ili_cache);
+ out_destroy_inode_cache:
+ kmem_cache_destroy(xfs_inode_cache);
+ out_destroy_efi_cache:
+ kmem_cache_destroy(xfs_efi_cache);
+ out_destroy_efd_cache:
+ kmem_cache_destroy(xfs_efd_cache);
+ out_destroy_buf_item_cache:
+ kmem_cache_destroy(xfs_buf_item_cache);
+ out_destroy_trans_cache:
+ kmem_cache_destroy(xfs_trans_cache);
+ out_destroy_ifork_cache:
+ kmem_cache_destroy(xfs_ifork_cache);
+ out_destroy_da_state_cache:
+ kmem_cache_destroy(xfs_da_state_cache);
+ out_destroy_defer_item_cache:
+ xfs_defer_destroy_item_caches();
+ out_destroy_rcbagbt_cur_cache:
+ rcbagbt_destroy_cur_cache();
+ out_destroy_btree_cur_cache:
+ xfs_btree_destroy_cur_caches();
+ out_destroy_log_ticket_cache:
+ kmem_cache_destroy(xfs_log_ticket_cache);
+ out_destroy_buf_cache:
+ kmem_cache_destroy(xfs_buf_cache);
out:
return -ENOMEM;
}
STATIC void
-xfs_destroy_zones(void)
+xfs_destroy_caches(void)
{
/*
* Make sure all delayed rcu free are flushed before we
* destroy caches.
*/
rcu_barrier();
- kmem_zone_destroy(xfs_bui_zone);
- kmem_zone_destroy(xfs_bud_zone);
- kmem_zone_destroy(xfs_cui_zone);
- kmem_zone_destroy(xfs_cud_zone);
- kmem_zone_destroy(xfs_rui_zone);
- kmem_zone_destroy(xfs_rud_zone);
- kmem_zone_destroy(xfs_icreate_zone);
- kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_inode_zone);
- kmem_zone_destroy(xfs_efi_zone);
- kmem_zone_destroy(xfs_efd_zone);
- kmem_zone_destroy(xfs_buf_item_zone);
- kmem_zone_destroy(xfs_log_item_desc_zone);
- kmem_zone_destroy(xfs_trans_zone);
- kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_da_state_zone);
- kmem_zone_destroy(xfs_btree_cur_zone);
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- kmem_zone_destroy(xfs_log_ticket_zone);
- bioset_free(xfs_ioend_bioset);
+ kmem_cache_destroy(xfs_parent_args_cache);
+ kmem_cache_destroy(xfs_xmd_cache);
+ kmem_cache_destroy(xfs_xmi_cache);
+ kmem_cache_destroy(xfs_iunlink_cache);
+ kmem_cache_destroy(xfs_attri_cache);
+ kmem_cache_destroy(xfs_attrd_cache);
+ kmem_cache_destroy(xfs_bui_cache);
+ kmem_cache_destroy(xfs_bud_cache);
+ kmem_cache_destroy(xfs_cui_cache);
+ kmem_cache_destroy(xfs_cud_cache);
+ kmem_cache_destroy(xfs_rui_cache);
+ kmem_cache_destroy(xfs_rud_cache);
+ kmem_cache_destroy(xfs_icreate_cache);
+ kmem_cache_destroy(xfs_ili_cache);
+ kmem_cache_destroy(xfs_inode_cache);
+ kmem_cache_destroy(xfs_efi_cache);
+ kmem_cache_destroy(xfs_efd_cache);
+ kmem_cache_destroy(xfs_buf_item_cache);
+ kmem_cache_destroy(xfs_trans_cache);
+ kmem_cache_destroy(xfs_ifork_cache);
+ kmem_cache_destroy(xfs_da_state_cache);
+ xfs_defer_destroy_item_caches();
+ rcbagbt_destroy_cur_cache();
+ xfs_btree_destroy_cur_caches();
+ kmem_cache_destroy(xfs_log_ticket_cache);
+ kmem_cache_destroy(xfs_buf_cache);
}
STATIC int __init
@@ -1961,12 +2576,13 @@ xfs_init_workqueues(void)
* AGs in all the filesystems mounted. Hence use the default large
* max_active value for this workqueue.
*/
- xfs_alloc_wq = alloc_workqueue("xfsalloc",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
+ xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
+ 0);
if (!xfs_alloc_wq)
return -ENOMEM;
- xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
+ xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
+ 0);
if (!xfs_discard_wq)
goto out_free_alloc_wq;
@@ -1990,44 +2606,41 @@ init_xfs_fs(void)
xfs_check_ondisk_structs();
+ error = xfs_dahash_test();
+ if (error)
+ return error;
+
printk(KERN_INFO XFS_VERSION_STRING " with "
XFS_BUILD_OPTIONS " enabled\n");
- xfs_extent_free_init_defer_op();
- xfs_rmap_update_init_defer_op();
- xfs_refcount_update_init_defer_op();
- xfs_bmap_update_init_defer_op();
-
xfs_dir_startup();
- error = xfs_init_zones();
+ error = xfs_init_caches();
if (error)
goto out;
error = xfs_init_workqueues();
if (error)
- goto out_destroy_zones;
+ goto out_destroy_caches;
error = xfs_mru_cache_init();
if (error)
goto out_destroy_wq;
- error = xfs_buf_init();
- if (error)
- goto out_mru_cache_uninit;
-
error = xfs_init_procfs();
if (error)
- goto out_buf_terminate;
+ goto out_mru_cache_uninit;
error = xfs_sysctl_register();
if (error)
goto out_cleanup_procfs;
+ xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL);
+
xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
if (!xfs_kset) {
error = -ENOMEM;
- goto out_sysctl_unregister;
+ goto out_debugfs_unregister;
}
xfsstats.xs_kobj.kobject.kset = xfs_kset;
@@ -2043,11 +2656,15 @@ init_xfs_fs(void)
if (error)
goto out_free_stats;
+ error = xchk_global_stats_setup(xfs_debugfs);
+ if (error)
+ goto out_remove_stats_kobj;
+
#ifdef DEBUG
xfs_dbg_kobj.kobject.kset = xfs_kset;
error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
if (error)
- goto out_remove_stats_kobj;
+ goto out_remove_scrub_stats;
#endif
error = xfs_qm_init();
@@ -2064,25 +2681,26 @@ init_xfs_fs(void)
out_remove_dbg_kobj:
#ifdef DEBUG
xfs_sysfs_del(&xfs_dbg_kobj);
- out_remove_stats_kobj:
+ out_remove_scrub_stats:
#endif
+ xchk_global_stats_teardown();
+ out_remove_stats_kobj:
xfs_sysfs_del(&xfsstats.xs_kobj);
out_free_stats:
free_percpu(xfsstats.xs_stats);
out_kset_unregister:
kset_unregister(xfs_kset);
- out_sysctl_unregister:
+ out_debugfs_unregister:
+ debugfs_remove(xfs_debugfs);
xfs_sysctl_unregister();
out_cleanup_procfs:
xfs_cleanup_procfs();
- out_buf_terminate:
- xfs_buf_terminate();
out_mru_cache_uninit:
xfs_mru_cache_uninit();
out_destroy_wq:
xfs_destroy_workqueues();
- out_destroy_zones:
- xfs_destroy_zones();
+ out_destroy_caches:
+ xfs_destroy_caches();
out:
return error;
}
@@ -2095,15 +2713,16 @@ exit_xfs_fs(void)
#ifdef DEBUG
xfs_sysfs_del(&xfs_dbg_kobj);
#endif
+ xchk_global_stats_teardown();
xfs_sysfs_del(&xfsstats.xs_kobj);
free_percpu(xfsstats.xs_stats);
kset_unregister(xfs_kset);
+ debugfs_remove(xfs_debugfs);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
- xfs_buf_terminate();
xfs_mru_cache_uninit();
xfs_destroy_workqueues();
- xfs_destroy_zones();
+ xfs_destroy_caches();
xfs_uuid_table_free();
}