diff options
Diffstat (limited to 'fs/xfs/libxfs/xfs_sb.c')
| -rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 1151 |
1 files changed, 839 insertions, 312 deletions
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index b5a82acd7dfe..cdd16dd805d7 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -12,88 +12,181 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" -#include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_da_format.h" -#include "xfs_da_btree.h" +#include "xfs_health.h" +#include "xfs_ag.h" +#include "xfs_rtbitmap.h" +#include "xfs_exchrange.h" +#include "xfs_rtgroup.h" +#include "xfs_rtrmap_btree.h" +#include "xfs_rtrefcount_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ /* - * Reference counting access wrappers to the perag structures. - * Because we never free per-ag structures, the only thing we - * have to protect against changes is the tree structure itself. + * Check that all the V4 feature bits that the V5 filesystem format requires are + * correctly set. */ -struct xfs_perag * -xfs_perag_get( - struct xfs_mount *mp, - xfs_agnumber_t agno) +static bool +xfs_sb_validate_v5_features( + struct xfs_sb *sbp) { - struct xfs_perag *pag; - int ref = 0; - - rcu_read_lock(); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); - if (pag) { - ASSERT(atomic_read(&pag->pag_ref) >= 0); - ref = atomic_inc_return(&pag->pag_ref); - } - rcu_read_unlock(); - trace_xfs_perag_get(mp, agno, ref, _RET_IP_); - return pag; + /* We must not have any unknown V4 feature bits set */ + if (sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) + return false; + + /* + * The CRC bit is considered an invalid V4 flag, so we have to add it + * manually to the OKBITS mask. + */ + if (sbp->sb_features2 & ~(XFS_SB_VERSION2_OKBITS | + XFS_SB_VERSION2_CRCBIT)) + return false; + + /* Now check all the required V4 feature flags are set. */ + +#define V5_VERS_FLAGS (XFS_SB_VERSION_NLINKBIT | \ + XFS_SB_VERSION_ALIGNBIT | \ + XFS_SB_VERSION_LOGV2BIT | \ + XFS_SB_VERSION_EXTFLGBIT | \ + XFS_SB_VERSION_DIRV2BIT | \ + XFS_SB_VERSION_MOREBITSBIT) + +#define V5_FEAT_FLAGS (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ + XFS_SB_VERSION2_ATTR2BIT | \ + XFS_SB_VERSION2_PROJID32BIT | \ + XFS_SB_VERSION2_CRCBIT) + + if ((sbp->sb_versionnum & V5_VERS_FLAGS) != V5_VERS_FLAGS) + return false; + if ((sbp->sb_features2 & V5_FEAT_FLAGS) != V5_FEAT_FLAGS) + return false; + return true; } /* - * search from @first to find the next perag with the given tag set. + * We current support XFS v5 formats with known features and v4 superblocks with + * at least V2 directories. */ -struct xfs_perag * -xfs_perag_get_tag( - struct xfs_mount *mp, - xfs_agnumber_t first, - int tag) +bool +xfs_sb_good_version( + struct xfs_sb *sbp) { - struct xfs_perag *pag; - int found; - int ref; - - rcu_read_lock(); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, first, 1, tag); - if (found <= 0) { - rcu_read_unlock(); - return NULL; - } - ref = atomic_inc_return(&pag->pag_ref); - rcu_read_unlock(); - trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); - return pag; + /* + * All v5 filesystems are supported, but we must check that all the + * required v4 feature flags are enabled correctly as the code checks + * those flags and not for v5 support. + */ + if (xfs_sb_is_v5(sbp)) + return xfs_sb_validate_v5_features(sbp); + + /* versions prior to v4 are not supported */ + if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4) + return false; + + /* We must not have any unknown v4 feature bits set */ + if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || + ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && + (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) + return false; + + /* V4 filesystems need v2 directories and unwritten extents */ + if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) + return false; + if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) + return false; + + /* It's a supported v4 filesystem */ + return true; } -void -xfs_perag_put( - struct xfs_perag *pag) +uint64_t +xfs_sb_version_to_features( + struct xfs_sb *sbp) { - int ref; + uint64_t features = 0; + + /* optional V4 features */ + if (sbp->sb_rblocks > 0) + features |= XFS_FEAT_REALTIME; + if (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT) + features |= XFS_FEAT_NLINK; + if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT) + features |= XFS_FEAT_ATTR; + if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT) + features |= XFS_FEAT_QUOTA; + if (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT) + features |= XFS_FEAT_ALIGN; + if (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT) + features |= XFS_FEAT_LOGV2; + if (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT) + features |= XFS_FEAT_DALIGN; + if (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT) + features |= XFS_FEAT_EXTFLG; + if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) + features |= XFS_FEAT_SECTOR; + if (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT) + features |= XFS_FEAT_ASCIICI; + if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) { + if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT) + features |= XFS_FEAT_LAZYSBCOUNT; + if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT) + features |= XFS_FEAT_PROJID32; + if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE) + features |= XFS_FEAT_FTYPE; + } - ASSERT(atomic_read(&pag->pag_ref) > 0); - ref = atomic_dec_return(&pag->pag_ref); - trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); + if (!xfs_sb_is_v5(sbp)) + return features; + + /* Always on V5 features */ + features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG | + XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_PROJID32 | + XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO; + + /* Optional V5 features */ + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT) + features |= XFS_FEAT_FINOBT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT) + features |= XFS_FEAT_RMAPBT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK) + features |= XFS_FEAT_REFLINK; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) + features |= XFS_FEAT_INOBTCNT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) + features |= XFS_FEAT_FTYPE; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) + features |= XFS_FEAT_SPINODES; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) + features |= XFS_FEAT_META_UUID; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME) + features |= XFS_FEAT_BIGTIME; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) + features |= XFS_FEAT_NEEDSREPAIR; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NREXT64) + features |= XFS_FEAT_NREXT64; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE) + features |= XFS_FEAT_EXCHANGE_RANGE; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) + features |= XFS_FEAT_PARENT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + features |= XFS_FEAT_METADIR; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) + features |= XFS_FEAT_ZONED; + + return features; } /* Check all the superblock fields we care about when reading one in. */ @@ -102,7 +195,7 @@ xfs_validate_sb_read( struct xfs_mount *mp, struct xfs_sb *sbp) { - if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5) + if (!xfs_sb_is_v5(sbp)) return 0; /* @@ -122,7 +215,7 @@ xfs_validate_sb_read( "Superblock has unknown read-only compatible features (0x%x) enabled.", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + if (!xfs_is_readonly(mp)) { xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write."); xfs_warn(mp, @@ -144,6 +237,73 @@ xfs_validate_sb_read( return 0; } +/* Return the number of extents covered by a single rt bitmap file */ +static xfs_rtbxlen_t +xfs_extents_per_rbm( + struct xfs_sb *sbp) +{ + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) + return sbp->sb_rgextents; + return sbp->sb_rextents; +} + +/* + * Return the payload size of a single rt bitmap block (without the metadata + * header if any). + */ +static inline unsigned int +xfs_rtbmblock_size( + struct xfs_sb *sbp) +{ + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) + return sbp->sb_blocksize - sizeof(struct xfs_rtbuf_blkinfo); + return sbp->sb_blocksize; +} + +static uint64_t +xfs_expected_rbmblocks( + struct xfs_sb *sbp) +{ + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) + return 0; + return howmany_64(xfs_extents_per_rbm(sbp), + NBBY * xfs_rtbmblock_size(sbp)); +} + +/* Validate the realtime geometry */ +bool +xfs_validate_rt_geometry( + struct xfs_sb *sbp) +{ + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) { + if (sbp->sb_rextsize != 1) + return false; + } else { + if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || + sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) + return false; + } + + if (sbp->sb_rblocks == 0) { + if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || + sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) + return false; + return true; + } + + if (sbp->sb_rextents == 0 || + sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) || + sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) || + sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp)) + return false; + + return true; +} + /* Check all the superblock fields we care about when writing one out. */ STATIC int xfs_validate_sb_write( @@ -161,7 +321,7 @@ xfs_validate_sb_write( * secondary superblocks, so allow this usage to continue because * we never read counters from such superblocks. */ - if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && + if (xfs_buf_daddr(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && (sbp->sb_fdblocks > sbp->sb_dblocks || !xfs_verify_icount(mp, sbp->sb_icount) || sbp->sb_ifree > sbp->sb_icount)) { @@ -169,7 +329,7 @@ xfs_validate_sb_write( return -EFSCORRUPTED; } - if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_5) + if (!xfs_sb_is_v5(sbp)) return 0; /* @@ -177,14 +337,8 @@ xfs_validate_sb_write( * the kernel cannot support since we checked for unsupported bits in * the read verifier, which means that memory is corrupt. */ - if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { - xfs_warn(mp, -"Corruption detected in superblock compatible features (0x%x)!", - (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); - return -EFSCORRUPTED; - } - - if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + if (!xfs_is_readonly(mp) && + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Corruption detected in superblock read-only compatible features (0x%x)!", (sbp->sb_features_ro_compat & @@ -218,6 +372,106 @@ xfs_validate_sb_write( return 0; } +int +xfs_compute_rgblklog( + xfs_rtxlen_t rgextents, + xfs_rgblock_t rextsize) +{ + uint64_t rgblocks = (uint64_t)rgextents * rextsize; + + return xfs_highbit64(rgblocks - 1) + 1; +} + +static int +xfs_validate_sb_rtgroups( + struct xfs_mount *mp, + struct xfs_sb *sbp) +{ + uint64_t groups; + int rgblklog; + + if (sbp->sb_rextsize == 0) { + xfs_warn(mp, +"Realtime extent size must not be zero."); + return -EINVAL; + } + + if (sbp->sb_rgextents > XFS_MAX_RGBLOCKS / sbp->sb_rextsize) { + xfs_warn(mp, +"Realtime group size (%u) must be less than %u rt extents.", + sbp->sb_rgextents, + XFS_MAX_RGBLOCKS / sbp->sb_rextsize); + return -EINVAL; + } + + if (sbp->sb_rgextents < XFS_MIN_RGEXTENTS) { + xfs_warn(mp, +"Realtime group size (%u) must be at least %u rt extents.", + sbp->sb_rgextents, XFS_MIN_RGEXTENTS); + return -EINVAL; + } + + if (sbp->sb_rgcount > XFS_MAX_RGNUMBER) { + xfs_warn(mp, +"Realtime groups (%u) must be less than %u.", + sbp->sb_rgcount, XFS_MAX_RGNUMBER); + return -EINVAL; + } + + groups = howmany_64(sbp->sb_rextents, sbp->sb_rgextents); + if (groups != sbp->sb_rgcount) { + xfs_warn(mp, +"Realtime groups (%u) do not cover the entire rt section; need (%llu) groups.", + sbp->sb_rgcount, groups); + return -EINVAL; + } + + /* Exchange-range is required for fsr to work on realtime files */ + if (!(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)) { + xfs_warn(mp, +"Realtime groups feature requires exchange-range support."); + return -EINVAL; + } + + rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, sbp->sb_rextsize); + if (sbp->sb_rgblklog != rgblklog) { + xfs_warn(mp, +"Realtime group log (%d) does not match expected value (%d).", + sbp->sb_rgblklog, rgblklog); + return -EINVAL; + } + + return 0; +} + +static int +xfs_validate_sb_zoned( + struct xfs_mount *mp, + struct xfs_sb *sbp) +{ + if (sbp->sb_frextents != 0) { + xfs_warn(mp, +"sb_frextents must be zero for zoned file systems."); + return -EINVAL; + } + + if (sbp->sb_rtstart && sbp->sb_rtstart < sbp->sb_dblocks) { + xfs_warn(mp, +"sb_rtstart (%lld) overlaps sb_dblocks (%lld).", + sbp->sb_rtstart, sbp->sb_dblocks); + return -EINVAL; + } + + if (sbp->sb_rtreserved && sbp->sb_rtreserved >= sbp->sb_rblocks) { + xfs_warn(mp, +"sb_rtreserved (%lld) larger than sb_rblocks (%lld).", + sbp->sb_rtreserved, sbp->sb_rblocks); + return -EINVAL; + } + + return 0; +} + /* Check the validity of the SB. */ STATIC int xfs_validate_sb_common( @@ -225,48 +479,97 @@ xfs_validate_sb_common( struct xfs_buf *bp, struct xfs_sb *sbp) { + struct xfs_dsb *dsb = bp->b_addr; uint32_t agcount = 0; uint32_t rem; + bool has_dalign; + int error; - if (sbp->sb_magicnum != XFS_SB_MAGIC) { - xfs_warn(mp, "bad magic number"); + if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { + xfs_warn(mp, +"Superblock has bad magic number 0x%x. Not an XFS filesystem?", + be32_to_cpu(dsb->sb_magicnum)); return -EWRONGFS; } if (!xfs_sb_good_version(sbp)) { - xfs_warn(mp, "bad version"); + xfs_warn(mp, +"Superblock has unknown features enabled or corrupted feature masks."); return -EWRONGFS; } - if (xfs_sb_version_has_pquotino(sbp)) { - if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { + /* + * Validate feature flags and state + */ + if (xfs_sb_is_v5(sbp)) { + if (sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { xfs_notice(mp, - "Version 5 of Super block has XFS_OQUOTA bits."); +"Block size (%u bytes) too small for Version 5 superblock (minimum %d bytes)", + sbp->sb_blocksize, XFS_MIN_CRC_BLOCKSIZE); return -EFSCORRUPTED; } - } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | - XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { + + /* V5 has a separate project quota inode */ + if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, -"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); + "Version 5 of Super block has XFS_OQUOTA bits."); return -EFSCORRUPTED; - } + } - /* - * Full inode chunks must be aligned to inode chunk size when - * sparse inodes are enabled to support the sparse chunk - * allocation algorithm and prevent overlapping inode records. - */ - if (xfs_sb_version_hassparseinodes(sbp)) { - uint32_t align; + /* + * Full inode chunks must be aligned to inode chunk size when + * sparse inodes are enabled to support the sparse chunk + * allocation algorithm and prevent overlapping inode records. + */ + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) { + uint32_t align; - align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize - >> sbp->sb_blocklog; - if (sbp->sb_inoalignmt != align) { - xfs_warn(mp, + align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize + >> sbp->sb_blocklog; + if (sbp->sb_inoalignmt != align) { + xfs_warn(mp, "Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", - sbp->sb_inoalignmt, align); + sbp->sb_inoalignmt, align); + return -EINVAL; + } + + if (sbp->sb_spino_align && + (sbp->sb_spino_align > sbp->sb_inoalignmt || + (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) { + xfs_warn(mp, +"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).", + sbp->sb_spino_align, + sbp->sb_inoalignmt); + return -EINVAL; + } + } else if (sbp->sb_spino_align) { + xfs_warn(mp, + "Sparse inode alignment (%u) should be zero.", + sbp->sb_spino_align); return -EINVAL; } + + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { + if (memchr_inv(sbp->sb_pad, 0, sizeof(sbp->sb_pad))) { + xfs_warn(mp, +"Metadir superblock padding fields must be zero."); + return -EINVAL; + } + + error = xfs_validate_sb_rtgroups(mp, sbp); + if (error) + return error; + } + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) { + error = xfs_validate_sb_zoned(mp, sbp); + if (error) + return error; + } + } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | + XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { + xfs_notice(mp, +"Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits."); + return -EFSCORRUPTED; } if (unlikely( @@ -314,7 +617,6 @@ xfs_validate_sb_common( sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || - sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || @@ -332,39 +634,83 @@ xfs_validate_sb_common( return -EFSCORRUPTED; } - if (sbp->sb_unit) { - if (!xfs_sb_version_hasdalign(sbp) || - sbp->sb_unit > sbp->sb_width || - (sbp->sb_width % sbp->sb_unit) != 0) { - xfs_notice(mp, "SB stripe unit sanity check failed"); + /* + * Logs that are too large are not supported at all. Reject them + * outright. Logs that are too small are tolerated on v4 filesystems, + * but we can only check that when mounting the log. Hence we skip + * those checks here. + */ + if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { + xfs_notice(mp, + "Log size 0x%x blocks too large, maximum size is 0x%llx blocks", + sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); + return -EFSCORRUPTED; + } + + if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { + xfs_warn(mp, + "log size 0x%llx bytes too large, maximum size is 0x%llx bytes", + XFS_FSB_TO_B(mp, sbp->sb_logblocks), + XFS_MAX_LOG_BYTES); + return -EFSCORRUPTED; + } + + /* + * Do not allow filesystems with corrupted log sector or stripe units to + * be mounted. We cannot safely size the iclogs or write to the log if + * the log stripe unit is not valid. + */ + if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { + if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { + xfs_notice(mp, + "log sector size in bytes/log2 (0x%x/0x%x) must match", + sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); return -EFSCORRUPTED; } - } else if (xfs_sb_version_hasdalign(sbp)) { - xfs_notice(mp, "SB stripe alignment sanity check failed"); - return -EFSCORRUPTED; - } else if (sbp->sb_width) { - xfs_notice(mp, "SB stripe width sanity check failed"); + } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { + xfs_notice(mp, + "log sector size in bytes/log2 (0x%x/0x%x) are not zero", + sbp->sb_logsectsize, sbp->sb_logsectlog); return -EFSCORRUPTED; } + if (sbp->sb_logsunit > 1) { + if (sbp->sb_logsunit % sbp->sb_blocksize) { + xfs_notice(mp, + "log stripe unit 0x%x bytes must be a multiple of block size", + sbp->sb_logsunit); + return -EFSCORRUPTED; + } + if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { + xfs_notice(mp, + "log stripe unit 0x%x bytes over maximum size (0x%x bytes)", + sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); + return -EFSCORRUPTED; + } + } - if (xfs_sb_version_hascrc(&mp->m_sb) && - sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { - xfs_notice(mp, "v5 SB sanity check failed"); + if (!xfs_validate_rt_geometry(sbp)) { + xfs_notice(mp, + "realtime %sgeometry check failed", + sbp->sb_rblocks ? "" : "zeroed "); return -EFSCORRUPTED; } /* - * Until this is fixed only page-sized or smaller data blocks work. + * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) + * would imply the image is corrupted. */ - if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_warn(mp, - "File system with blocksize %d bytes. " - "Only pagesize (%ld) or less will currently work.", - sbp->sb_blocksize, PAGE_SIZE); - return -ENOSYS; + has_dalign = sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT; + if (!!sbp->sb_unit ^ has_dalign) { + xfs_notice(mp, "SB stripe alignment sanity check failed"); + return -EFSCORRUPTED; } + if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), + XFS_FSB_TO_B(mp, sbp->sb_width), 0, + xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) + return -EFSCORRUPTED; + /* * Currently only very few inode sizes are supported. */ @@ -380,28 +726,20 @@ xfs_validate_sb_common( return -ENOSYS; } - if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || - xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { - xfs_warn(mp, - "file system too large to be mounted on this system."); - return -EFBIG; - } - - /* - * Don't touch the filesystem if a user tool thinks it owns the primary - * superblock. mkfs doesn't clear the flag from secondary supers, so - * we don't check them at all. - */ - if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) { - xfs_warn(mp, "Offline file system operation in progress!"); - return -EFSCORRUPTED; - } return 0; } void xfs_sb_quota_from_disk(struct xfs_sb *sbp) { + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { + sbp->sb_uquotino = NULLFSINO; + sbp->sb_gquotino = NULLFSINO; + sbp->sb_pquotino = NULLFSINO; + return; + } + /* * older mkfs doesn't initialize quota inodes to NULLFSINO. This * leads to in-core values having two different values for a quota @@ -424,7 +762,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) * We need to do these manipilations only if we are working * with an older version of on-disk superblock. */ - if (xfs_sb_version_has_pquotino(sbp)) + if (xfs_sb_is_v5(sbp)) return; if (sbp->sb_qflags & XFS_OQUOTA_ENFD) @@ -454,7 +792,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) static void __xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from, + struct xfs_dsb *from, bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); @@ -517,19 +855,42 @@ __xfs_sb_from_disk( * sb_meta_uuid is only on disk if it differs from sb_uuid and the * feature flag is set; if not set we keep it only in memory. */ - if (xfs_sb_version_hasmetauuid(to)) + if (xfs_sb_is_v5(to) && + (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); else uuid_copy(&to->sb_meta_uuid, &from->sb_uuid); /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); + + if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { + to->sb_metadirino = be64_to_cpu(from->sb_metadirino); + to->sb_rgblklog = from->sb_rgblklog; + memcpy(to->sb_pad, from->sb_pad, sizeof(to->sb_pad)); + to->sb_rgcount = be32_to_cpu(from->sb_rgcount); + to->sb_rgextents = be32_to_cpu(from->sb_rgextents); + to->sb_rbmino = NULLFSINO; + to->sb_rsumino = NULLFSINO; + } else { + to->sb_metadirino = NULLFSINO; + to->sb_rgcount = 1; + to->sb_rgextents = 0; + } + + if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) { + to->sb_rtstart = be64_to_cpu(from->sb_rtstart); + to->sb_rtreserved = be64_to_cpu(from->sb_rtreserved); + } else { + to->sb_rtstart = 0; + to->sb_rtreserved = 0; + } } void xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + struct xfs_dsb *from) { __xfs_sb_from_disk(to, from, true); } @@ -541,8 +902,22 @@ xfs_sb_quota_to_disk( { uint16_t qflags = from->sb_qflags; + if (xfs_sb_is_v5(from) && + (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { + to->sb_qflags = cpu_to_be16(from->sb_qflags); + to->sb_uquotino = cpu_to_be64(0); + to->sb_gquotino = cpu_to_be64(0); + to->sb_pquotino = cpu_to_be64(0); + return; + } + to->sb_uquotino = cpu_to_be64(from->sb_uquotino); - if (xfs_sb_version_has_pquotino(from)) { + + /* + * The in-memory superblock quota state matches the v5 on-disk format so + * just write them out and return + */ + if (xfs_sb_is_v5(from)) { to->sb_qflags = cpu_to_be16(from->sb_qflags); to->sb_gquotino = cpu_to_be64(from->sb_gquotino); to->sb_pquotino = cpu_to_be64(from->sb_pquotino); @@ -550,9 +925,9 @@ xfs_sb_quota_to_disk( } /* - * The in-core version of sb_qflags do not have XFS_OQUOTA_* - * flags, whereas the on-disk version does. So, convert incore - * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. + * For older superblocks (v4), the in-core version of sb_qflags do not + * have XFS_OQUOTA_* flags, whereas the on-disk version does. So, + * convert incore XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. */ qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); @@ -572,7 +947,7 @@ xfs_sb_quota_to_disk( * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it - * it beyond the "end" of the valid non-pquotino superblock. + * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); @@ -652,18 +1027,34 @@ xfs_sb_to_disk( to->sb_features2 = cpu_to_be32(from->sb_features2); to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); - if (xfs_sb_version_hascrc(from)) { - to->sb_features_compat = cpu_to_be32(from->sb_features_compat); - to->sb_features_ro_compat = - cpu_to_be32(from->sb_features_ro_compat); - to->sb_features_incompat = - cpu_to_be32(from->sb_features_incompat); - to->sb_features_log_incompat = - cpu_to_be32(from->sb_features_log_incompat); - to->sb_spino_align = cpu_to_be32(from->sb_spino_align); - to->sb_lsn = cpu_to_be64(from->sb_lsn); - if (xfs_sb_version_hasmetauuid(from)) - uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); + if (!xfs_sb_is_v5(from)) + return; + + to->sb_features_compat = cpu_to_be32(from->sb_features_compat); + to->sb_features_ro_compat = + cpu_to_be32(from->sb_features_ro_compat); + to->sb_features_incompat = + cpu_to_be32(from->sb_features_incompat); + to->sb_features_log_incompat = + cpu_to_be32(from->sb_features_log_incompat); + to->sb_spino_align = cpu_to_be32(from->sb_spino_align); + to->sb_lsn = cpu_to_be64(from->sb_lsn); + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) + uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); + + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { + to->sb_metadirino = cpu_to_be64(from->sb_metadirino); + to->sb_rgblklog = from->sb_rgblklog; + memset(to->sb_pad, 0, sizeof(to->sb_pad)); + to->sb_rgcount = cpu_to_be32(from->sb_rgcount); + to->sb_rgextents = cpu_to_be32(from->sb_rgextents); + to->sb_rbmino = cpu_to_be64(0); + to->sb_rsumino = cpu_to_be64(0); + } + + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) { + to->sb_rtstart = cpu_to_be64(from->sb_rtstart); + to->sb_rtreserved = cpu_to_be64(from->sb_rtreserved); } } @@ -684,8 +1075,8 @@ xfs_sb_read_verify( struct xfs_buf *bp) { struct xfs_sb sb; - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_mount *mp = bp->b_mount; + struct xfs_dsb *dsb = bp->b_addr; int error; /* @@ -699,8 +1090,8 @@ xfs_sb_read_verify( if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { /* Only fail bad secondaries on a known V5 filesystem */ - if (bp->b_bn == XFS_SB_DADDR || - xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_buf_daddr(bp) == XFS_SB_DADDR || + xfs_has_crc(mp)) { error = -EFSBADCRC; goto out_error; } @@ -711,7 +1102,7 @@ xfs_sb_read_verify( * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; @@ -734,7 +1125,7 @@ static void xfs_sb_quiet_read_verify( struct xfs_buf *bp) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ @@ -750,15 +1141,16 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_sb sb; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_dsb *dsb = bp->b_addr; int error; /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; @@ -766,11 +1158,11 @@ xfs_sb_write_verify( if (error) goto out_error; - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_sb_is_v5(&sb)) return; if (bip) - XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + dsb->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); return; @@ -781,150 +1173,132 @@ out_error: const struct xfs_buf_ops xfs_sb_buf_ops = { .name = "xfs_sb", + .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, .verify_read = xfs_sb_read_verify, .verify_write = xfs_sb_write_verify, }; const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { .name = "xfs_sb_quiet", + .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, .verify_read = xfs_sb_quiet_read_verify, .verify_write = xfs_sb_write_verify, }; +/* Compute cached rt geometry from the incore sb. */ +void +xfs_sb_mount_rextsize( + struct xfs_mount *mp, + struct xfs_sb *sbp) +{ + struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + + mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); + mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); + + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { + rgs->blocks = sbp->sb_rgextents * sbp->sb_rextsize; + rgs->blklog = mp->m_sb.sb_rgblklog; + rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog); + rgs->start_fsb = mp->m_sb.sb_rtstart; + if (xfs_sb_has_incompat_feature(sbp, + XFS_SB_FEAT_INCOMPAT_ZONE_GAPS)) + rgs->has_daddr_gaps = true; + } else { + rgs->blocks = 0; + rgs->blklog = 0; + rgs->blkmask = (uint64_t)-1; + } +} + +/* Update incore sb rt extent size, then recompute the cached rt geometry. */ +void +xfs_mount_sb_set_rextsize( + struct xfs_mount *mp, + struct xfs_sb *sbp, + xfs_agblock_t rextsize) +{ + sbp->sb_rextsize = rextsize; + if (xfs_sb_is_v5(sbp) && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) + sbp->sb_rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, + rextsize); + + xfs_sb_mount_rextsize(mp, sbp); +} + /* * xfs_mount_common * * Mount initialization code establishing various mount * fields from the superblock associated with the given - * mount structure + * mount structure. + * + * Inode geometry are calculated in xfs_ialloc_setup_geometry. */ void xfs_sb_mount_common( - struct xfs_mount *mp, - struct xfs_sb *sbp) + struct xfs_mount *mp, + struct xfs_sb *sbp) { - mp->m_agfrotor = mp->m_agirotor = 0; + struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG]; + + mp->m_agfrotor = 0; + atomic_set(&mp->m_agirotor, 0); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; - mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; mp->m_blockmask = sbp->sb_blocksize - 1; - mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; - mp->m_blockwmask = mp->m_blockwsize - 1; + mp->m_blockwsize = xfs_rtbmblock_size(sbp) >> XFS_WORDLOG; + mp->m_rtx_per_rbmblock = mp->m_blockwsize << XFS_NBWORDLOG; + + ags->blocks = mp->m_sb.sb_agblocks; + ags->blklog = mp->m_sb.sb_agblklog; + ags->blkmask = xfs_mask32lo(mp->m_sb.sb_agblklog); - mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); + xfs_sb_mount_rextsize(mp, sbp); + + mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; - mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); - mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; - mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; - - mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; - mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1); - mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0); + mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; - mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true); - mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false); + mp->m_rtrmap_mxr[0] = xfs_rtrmapbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_rtrmap_mxr[1] = xfs_rtrmapbt_maxrecs(mp, sbp->sb_blocksize, false); + mp->m_rtrmap_mnr[0] = mp->m_rtrmap_mxr[0] / 2; + mp->m_rtrmap_mnr[1] = mp->m_rtrmap_mxr[1] / 2; + + mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; - mp->m_bsize = XFS_FSB_TO_BB(mp, 1); - mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, - sbp->sb_inopblock); - mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; + mp->m_rtrefc_mxr[0] = xfs_rtrefcountbt_maxrecs(mp, sbp->sb_blocksize, + true); + mp->m_rtrefc_mxr[1] = xfs_rtrefcountbt_maxrecs(mp, sbp->sb_blocksize, + false); + mp->m_rtrefc_mnr[0] = mp->m_rtrefc_mxr[0] / 2; + mp->m_rtrefc_mnr[1] = mp->m_rtrefc_mxr[1] / 2; - if (sbp->sb_spino_align) - mp->m_ialloc_min_blks = sbp->sb_spino_align; - else - mp->m_ialloc_min_blks = mp->m_ialloc_blks; + mp->m_bsize = XFS_FSB_TO_BB(mp, 1); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); } /* - * xfs_initialize_perag_data - * - * Read in each per-ag structure so we can count up the number of - * allocated inodes, free inodes and used filesystem blocks as this - * information is no longer persistent in the superblock. Once we have - * this information, write it into the in-core superblock structure. - */ -int -xfs_initialize_perag_data( - struct xfs_mount *mp, - xfs_agnumber_t agcount) -{ - xfs_agnumber_t index; - xfs_perag_t *pag; - xfs_sb_t *sbp = &mp->m_sb; - uint64_t ifree = 0; - uint64_t ialloc = 0; - uint64_t bfree = 0; - uint64_t bfreelst = 0; - uint64_t btree = 0; - uint64_t fdblocks; - int error; - - for (index = 0; index < agcount; index++) { - /* - * read the agf, then the agi. This gets us - * all the information we need and populates the - * per-ag structures for us. - */ - error = xfs_alloc_pagf_init(mp, NULL, index, 0); - if (error) - return error; - - error = xfs_ialloc_pagi_init(mp, NULL, index); - if (error) - return error; - pag = xfs_perag_get(mp, index); - ifree += pag->pagi_freecount; - ialloc += pag->pagi_count; - bfree += pag->pagf_freeblks; - bfreelst += pag->pagf_flcount; - btree += pag->pagf_btreeblks; - xfs_perag_put(pag); - } - fdblocks = bfree + bfreelst + btree; - - /* - * If the new summary counts are obviously incorrect, fail the - * mount operation because that implies the AGFs are also corrupt. - * Clear BAD_SUMMARY so that we don't unmount with a dirty log, which - * will prevent xfs_repair from fixing anything. - */ - if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { - xfs_alert(mp, "AGF corruption. Please run xfs_repair."); - error = -EFSCORRUPTED; - goto out; - } - - /* Overwrite incore superblock counters with just-read data */ - spin_lock(&mp->m_sb_lock); - sbp->sb_ifree = ifree; - sbp->sb_icount = ialloc; - sbp->sb_fdblocks = fdblocks; - spin_unlock(&mp->m_sb_lock); - - xfs_reinit_percpu_counters(mp); -out: - mp->m_flags &= ~XFS_MOUNT_BAD_SUMMARY; - return error; -} - -/* * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock * into the superblock buffer to be logged. It does not provide the higher * level of locking that is needed to protect the in-core superblock from @@ -935,15 +1309,38 @@ xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); + struct xfs_buf *bp = xfs_trans_getsb(tp); + + /* + * Lazy sb counters don't update the in-core superblock so do that now. + * If this is at unmount, the counters will be exactly correct, but at + * any other time they will only be ballpark correct because of + * reservations that have been taken out percpu counters. If we have an + * unclean shutdown, this will be corrected by log recovery rebuilding + * the counters from the AGF block counts. + */ + if (xfs_has_lazysbcount(mp)) { + mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount); + mp->m_sb.sb_ifree = min_t(uint64_t, + percpu_counter_sum_positive(&mp->m_ifree), + mp->m_sb.sb_icount); + mp->m_sb.sb_fdblocks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS); + } - mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); - mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); - mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + /* + * sb_frextents was added to the lazy sb counters when the rt groups + * feature was introduced. This counter can go negative due to the way + * we handle nearly-lockless reservations, so we must use the _positive + * variant here to avoid writing out nonsense frextents. + */ + if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) { + mp->m_sb.sb_frextents = + xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS); + } - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); - xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); } /* @@ -990,18 +1387,18 @@ int xfs_update_secondary_sbs( struct xfs_mount *mp) { - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; int saved_error = 0; int error = 0; LIST_HEAD (buffer_list); /* update secondary superblocks. */ - for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) { + while ((pag = xfs_perag_next_from(mp, pag, 1))) { struct xfs_buf *bp; - bp = xfs_buf_get(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), - XFS_FSS_TO_BB(mp, 1), 0); + error = xfs_buf_get(mp->m_ddev_targp, + XFS_AG_DADDR(mp, pag_agno(pag), XFS_SB_DADDR), + XFS_FSS_TO_BB(mp, 1), &bp); /* * If we get an error reading or writing alternate superblocks, * continue. xfs_repair chooses the "best" superblock based @@ -1009,43 +1406,39 @@ xfs_update_secondary_sbs( * superblocks un-updated than updated, and xfs_repair may * pick them over the properly-updated primary. */ - if (!bp) { + if (error) { xfs_warn(mp, "error allocating secondary superblock for ag %d", - agno); + pag_agno(pag)); if (!saved_error) - saved_error = -ENOMEM; + saved_error = error; continue; } bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_buf_delwri_queue(bp, &buffer_list); xfs_buf_relse(bp); /* don't hold too many buffers at once */ - if (agno % 16) + if (pag_agno(pag) % 16) continue; error = xfs_buf_delwri_submit(&buffer_list); if (error) { xfs_warn(mp, "write error %d updating a secondary superblock near ag %d", - error, agno); + error, pag_agno(pag)); if (!saved_error) saved_error = error; continue; } } error = xfs_buf_delwri_submit(&buffer_list); - if (error) { - xfs_warn(mp, - "write error %d updating a secondary superblock near ag %d", - error, agno); - } - + if (error) + xfs_warn(mp, "error %d writing secondary superblocks", error); return saved_error ? saved_error : error; } @@ -1055,19 +1448,26 @@ xfs_update_secondary_sbs( */ int xfs_sync_sb_buf( - struct xfs_mount *mp) + struct xfs_mount *mp, + bool update_rtsb) { struct xfs_trans *tp; struct xfs_buf *bp; + struct xfs_buf *rtsb_bp = NULL; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp); if (error) return error; - bp = xfs_trans_getsb(tp, mp, 0); + bp = xfs_trans_getsb(tp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); + if (update_rtsb) { + rtsb_bp = xfs_log_rtsb(tp, bp); + if (rtsb_bp) + xfs_trans_bhold(tp, rtsb_bp); + } xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) @@ -1076,17 +1476,23 @@ xfs_sync_sb_buf( * write out the sb buffer to get the changes to disk */ error = xfs_bwrite(bp); + if (!error && rtsb_bp) + error = xfs_bwrite(rtsb_bp); out: + if (rtsb_bp) + xfs_buf_relse(rtsb_bp); xfs_buf_relse(bp); return error; } -int +void xfs_fs_geometry( - struct xfs_sb *sbp, + struct xfs_mount *mp, struct xfs_fsop_geom *geo, int struct_version) { + struct xfs_sb *sbp = &mp->m_sb; + memset(geo, 0, sizeof(struct xfs_fsop_geom)); geo->blocksize = sbp->sb_blocksize; @@ -1105,64 +1511,89 @@ xfs_fs_geometry( memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); if (struct_version < 2) - return 0; + return; geo->sunit = sbp->sb_unit; geo->swidth = sbp->sb_width; if (struct_version < 3) - return 0; + return; geo->version = XFS_FSOP_GEOM_VERSION; geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | XFS_FSOP_GEOM_FLAGS_DIRV2 | - XFS_FSOP_GEOM_FLAGS_EXTFLG; - if (xfs_sb_version_hasattr(sbp)) + XFS_FSOP_GEOM_FLAGS_EXTFLG | + XFS_FSOP_GEOM_FLAGS_ATTR2; + if (xfs_has_attr(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; - if (xfs_sb_version_hasquota(sbp)) + if (xfs_has_quota(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; - if (xfs_sb_version_hasalign(sbp)) + if (xfs_has_align(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; - if (xfs_sb_version_hasdalign(sbp)) + if (xfs_has_dalign(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; - if (xfs_sb_version_hassector(sbp)) - geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; - if (xfs_sb_version_hasasciici(sbp)) + if (xfs_has_asciici(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; - if (xfs_sb_version_haslazysbcount(sbp)) + if (xfs_has_lazysbcount(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; - if (xfs_sb_version_hasattr2(sbp)) - geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; - if (xfs_sb_version_hasprojid32bit(sbp)) + if (xfs_has_projid32(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; - if (xfs_sb_version_hascrc(sbp)) + if (xfs_has_crc(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; - if (xfs_sb_version_hasftype(sbp)) + if (xfs_has_ftype(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; - if (xfs_sb_version_hasfinobt(sbp)) + if (xfs_has_finobt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; - if (xfs_sb_version_hassparseinodes(sbp)) + if (xfs_has_sparseinodes(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; - if (xfs_sb_version_hasrmapbt(sbp)) + if (xfs_has_rmapbt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; - if (xfs_sb_version_hasreflink(sbp)) + if (xfs_has_reflink(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; - if (xfs_sb_version_hassector(sbp)) + if (xfs_has_bigtime(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; + if (xfs_has_inobtcounts(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT; + if (xfs_has_parent(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_PARENT; + if (xfs_has_sector(mp)) { + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; geo->logsectsize = sbp->sb_logsectsize; - else + } else { geo->logsectsize = BBSIZE; + } + if (xfs_has_large_extent_counts(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; + if (xfs_has_exchange_range(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE; + if (xfs_has_metadir(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; + if (xfs_has_zoned(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ZONED; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); if (struct_version < 4) - return 0; + return; - if (xfs_sb_version_haslogv2(sbp)) + if (xfs_has_logv2(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; geo->logsunit = sbp->sb_logsunit; - return 0; + if (struct_version < 5) + return; + + geo->version = XFS_FSOP_GEOM_VERSION_V5; + + if (xfs_has_rtgroups(mp)) { + geo->rgcount = sbp->sb_rgcount; + geo->rgextents = sbp->sb_rgextents; + } + if (xfs_has_zoned(mp)) { + geo->rtstart = sbp->sb_rtstart; + geo->rtreserved = sbp->sb_rtreserved; + } } /* Read a secondary superblock. */ @@ -1180,6 +1611,8 @@ xfs_sb_read_secondary( error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_agno_mark_sick(mp, agno, XFS_SICK_AG_SB); if (error) return error; xfs_buf_set_ref(bp, XFS_SSB_REF); @@ -1196,15 +1629,109 @@ xfs_sb_get_secondary( struct xfs_buf **bpp) { struct xfs_buf *bp; + int error; ASSERT(agno != 0 && agno != NULLAGNUMBER); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (!bp) - return -ENOMEM; + XFS_FSS_TO_BB(mp, 1), 0, &bp); + if (error) + return error; bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); *bpp = bp; return 0; } + +/* + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users + * won't be confused by values in error messages. This function returns false + * if the stripe geometry is invalid and the caller is unable to repair the + * stripe configuration later in the mount process. + */ +bool +xfs_validate_stripe_geometry( + struct xfs_mount *mp, + __s64 sunit, + __s64 swidth, + int sectorsize, + bool may_repair, + bool silent) +{ + if (swidth > INT_MAX) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) is too large", swidth); + goto check_override; + } + + if (sunit > swidth) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); + goto check_override; + } + + if (sectorsize && (int)sunit % sectorsize) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) must be a multiple of the sector size (%d)", + sunit, sectorsize); + goto check_override; + } + + if (sunit && !swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe unit (%lld) and stripe width of 0", sunit); + goto check_override; + } + + if (!sunit && swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe width (%lld) and stripe unit of 0", swidth); + goto check_override; + } + + if (sunit && (int)swidth % (int)sunit) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) must be a multiple of the stripe unit (%lld)", + swidth, sunit); + goto check_override; + } + return true; + +check_override: + if (!may_repair) + return false; + /* + * During mount, mp->m_dalign will not be set unless the sunit mount + * option was set. If it was set, ignore the bad stripe alignment values + * and allow the validation and overwrite later in the mount process to + * attempt to overwrite the bad stripe alignment values with the values + * supplied by mount options. + */ + if (!mp->m_dalign) + return false; + if (!silent) + xfs_notice(mp, +"Will try to correct with specified mount options sunit (%d) and swidth (%d)", + BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); + return true; +} + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); +} |
