diff options
Diffstat (limited to 'fs/xfs/xfs_mount.c')
-rw-r--r-- | fs/xfs/xfs_mount.c | 557 |
1 files changed, 428 insertions, 129 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index df370eb5dc15..29276fe60df9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -34,7 +34,13 @@ #include "xfs_health.h" #include "xfs_trace.h" #include "xfs_ag.h" +#include "xfs_rtbitmap.h" +#include "xfs_metafile.h" +#include "xfs_rtgroup.h" +#include "xfs_rtrmap_btree.h" +#include "xfs_rtrefcount_btree.h" #include "scrub/stats.h" +#include "xfs_zone_alloc.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; @@ -131,11 +137,15 @@ xfs_sb_validate_fsb_count( xfs_sb_t *sbp, uint64_t nblocks) { - ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); + uint64_t max_bytes; + ASSERT(sbp->sb_blocklog >= BBSHIFT); + if (check_shl_overflow(nblocks, sbp->sb_blocklog, &max_bytes)) + return -EFBIG; + /* Limited by ULONG_MAX of page cache index */ - if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) + if (max_bytes >> PAGE_SHIFT > ULONG_MAX) return -EFBIG; return 0; } @@ -172,14 +182,11 @@ xfs_readsb( /* * Allocate a (locked) buffer to hold the superblock. This will be kept - * around at all times to optimize access to the superblock. Therefore, - * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count - * elevated. + * around at all times to optimize access to the superblock. */ reread: error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), XBF_NO_IOACCT, &bp, - buf_ops); + BTOBB(sector_size), &bp, buf_ops); if (error) { if (loud) xfs_warn(mp, "SB validate failed with error %d.", error); @@ -230,6 +237,13 @@ reread: mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); + /* + * If logged xattrs are enabled after log recovery finishes, then set + * the opstate so that log recovery will work properly. + */ + if (xfs_sb_version_haslogxattrs(&mp->m_sb)) + xfs_set_using_logged_xattrs(mp); + /* no need to be quiet anymore, so reset the buf ops */ bp->b_ops = &xfs_sb_buf_ops; @@ -400,7 +414,7 @@ xfs_check_sizes( } error = xfs_buf_read_uncached(mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); + XFS_FSS_TO_BB(mp, 1), &bp, NULL); if (error) { xfs_warn(mp, "last sector read failed"); return error; @@ -417,7 +431,7 @@ xfs_check_sizes( } error = xfs_buf_read_uncached(mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + XFS_FSB_TO_BB(mp, 1), &bp, NULL); if (error) { xfs_warn(mp, "log device read failed"); return error; @@ -448,22 +462,38 @@ xfs_mount_reset_sbqflags( return xfs_sync_sb(mp, false); } +static const char *const xfs_free_pool_name[] = { + [XC_FREE_BLOCKS] = "free blocks", + [XC_FREE_RTEXTENTS] = "free rt extents", + [XC_FREE_RTAVAILABLE] = "available rt extents", +}; + uint64_t -xfs_default_resblks(xfs_mount_t *mp) +xfs_default_resblks( + struct xfs_mount *mp, + enum xfs_free_counter ctr) { - uint64_t resblks; - - /* - * We default to 5% or 8192 fsbs of space reserved, whichever is - * smaller. This is intended to cover concurrent allocation - * transactions when we initially hit enospc. These each require a 4 - * block reservation. Hence by default we cover roughly 2000 concurrent - * allocation reservations. - */ - resblks = mp->m_sb.sb_dblocks; - do_div(resblks, 20); - resblks = min_t(uint64_t, resblks, 8192); - return resblks; + switch (ctr) { + case XC_FREE_BLOCKS: + /* + * Default to 5% or 8192 FSBs of space reserved, whichever is + * smaller. + * + * This is intended to cover concurrent allocation transactions + * when we initially hit ENOSPC. These each require a 4 block + * reservation. Hence by default we cover roughly 2000 + * concurrent allocation reservations. + */ + return min(div_u64(mp->m_sb.sb_dblocks, 20), 8192ULL); + case XC_FREE_RTEXTENTS: + case XC_FREE_RTAVAILABLE: + if (IS_ENABLED(CONFIG_XFS_RT) && xfs_has_zoned(mp)) + return xfs_zoned_default_resblks(mp, ctr); + return 0; + default: + ASSERT(0); + return 0; + } } /* Ensure the summary counts are correct. */ @@ -530,7 +560,7 @@ xfs_check_summary_counts( * If we're mounting the rt volume after recovering the log, recompute * frextents from the rtbitmap file to fix the inconsistency. */ - if (xfs_has_realtime(mp) && !xfs_is_clean(mp)) { + if (xfs_has_realtime(mp) && !xfs_has_zoned(mp) && !xfs_is_clean(mp)) { error = xfs_rtalloc_reinit_frextents(mp); if (error) return error; @@ -587,7 +617,7 @@ xfs_unmount_flush_inodes( xfs_extent_busy_wait_all(mp); flush_workqueue(xfs_discard_wq); - set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate); + xfs_set_unmounting(mp); xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); @@ -608,6 +638,22 @@ xfs_mount_setup_inode_geom( xfs_ialloc_setup_geometry(mp); } +/* Mount the metadata directory tree root. */ +STATIC int +xfs_mount_setup_metadir( + struct xfs_mount *mp) +{ + int error; + + /* Load the metadata directory root inode into memory. */ + error = xfs_metafile_iget(mp, mp->m_sb.sb_metadirino, XFS_METAFILE_DIR, + &mp->m_metadirip); + if (error) + xfs_warn(mp, "Failed to load metadir root directory, error %d", + error); + return error; +} + /* Compute maximum possible height for per-AG btree types for this fs. */ static inline void xfs_agbtree_compute_maxlevels( @@ -620,6 +666,167 @@ xfs_agbtree_compute_maxlevels( mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); } +/* Maximum atomic write IO size that the kernel allows. */ +static inline xfs_extlen_t xfs_calc_atomic_write_max(struct xfs_mount *mp) +{ + return rounddown_pow_of_two(XFS_B_TO_FSB(mp, MAX_RW_COUNT)); +} + +static inline unsigned int max_pow_of_two_factor(const unsigned int nr) +{ + return 1 << (ffs(nr) - 1); +} + +/* + * If the data device advertises atomic write support, limit the size of data + * device atomic writes to the greatest power-of-two factor of the AG size so + * that every atomic write unit aligns with the start of every AG. This is + * required so that the per-AG allocations for an atomic write will always be + * aligned compatibly with the alignment requirements of the storage. + * + * If the data device doesn't advertise atomic writes, then there are no + * alignment restrictions and the largest out-of-place write we can do + * ourselves is the number of blocks that user files can allocate from any AG. + */ +static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp) +{ + if (mp->m_ddev_targp->bt_bdev_awu_min > 0) + return max_pow_of_two_factor(mp->m_sb.sb_agblocks); + return rounddown_pow_of_two(mp->m_ag_max_usable); +} + +/* + * Reflink on the realtime device requires rtgroups, and atomic writes require + * reflink. + * + * If the realtime device advertises atomic write support, limit the size of + * data device atomic writes to the greatest power-of-two factor of the rtgroup + * size so that every atomic write unit aligns with the start of every rtgroup. + * This is required so that the per-rtgroup allocations for an atomic write + * will always be aligned compatibly with the alignment requirements of the + * storage. + * + * If the rt device doesn't advertise atomic writes, then there are no + * alignment restrictions and the largest out-of-place write we can do + * ourselves is the number of blocks that user files can allocate from any + * rtgroup. + */ +static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp) +{ + struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + + if (rgs->blocks == 0) + return 0; + if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0) + return max_pow_of_two_factor(rgs->blocks); + return rounddown_pow_of_two(rgs->blocks); +} + +/* Compute the maximum atomic write unit size for each section. */ +static inline void +xfs_calc_atomic_write_unit_max( + struct xfs_mount *mp) +{ + struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG]; + struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; + + const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp); + const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp); + const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp); + const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp); + + ags->awu_max = min3(max_write, max_ioend, max_agsize); + rgs->awu_max = min3(max_write, max_ioend, max_rgsize); + + trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend, + max_agsize, max_rgsize); +} + +/* + * Try to set the atomic write maximum to a new value that we got from + * userspace via mount option. + */ +int +xfs_set_max_atomic_write_opt( + struct xfs_mount *mp, + unsigned long long new_max_bytes) +{ + const xfs_filblks_t new_max_fsbs = XFS_B_TO_FSBT(mp, new_max_bytes); + const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp); + const xfs_extlen_t max_group = + max(mp->m_groups[XG_TYPE_AG].blocks, + mp->m_groups[XG_TYPE_RTG].blocks); + const xfs_extlen_t max_group_write = + max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp)); + int error; + + if (new_max_bytes == 0) + goto set_limit; + + ASSERT(max_write <= U32_MAX); + + /* generic_atomic_write_valid enforces power of two length */ + if (!is_power_of_2(new_max_bytes)) { + xfs_warn(mp, + "max atomic write size of %llu bytes is not a power of 2", + new_max_bytes); + return -EINVAL; + } + + if (new_max_bytes & mp->m_blockmask) { + xfs_warn(mp, + "max atomic write size of %llu bytes not aligned with fsblock", + new_max_bytes); + return -EINVAL; + } + + if (new_max_fsbs > max_write) { + xfs_warn(mp, + "max atomic write size of %lluk cannot be larger than max write size %lluk", + new_max_bytes >> 10, + XFS_FSB_TO_B(mp, max_write) >> 10); + return -EINVAL; + } + + if (new_max_fsbs > max_group) { + xfs_warn(mp, + "max atomic write size of %lluk cannot be larger than allocation group size %lluk", + new_max_bytes >> 10, + XFS_FSB_TO_B(mp, max_group) >> 10); + return -EINVAL; + } + + if (new_max_fsbs > max_group_write) { + xfs_warn(mp, + "max atomic write size of %lluk cannot be larger than max allocation group write size %lluk", + new_max_bytes >> 10, + XFS_FSB_TO_B(mp, max_group_write) >> 10); + return -EINVAL; + } + +set_limit: + error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs); + if (error) { + xfs_warn(mp, + "cannot support completing atomic writes of %lluk", + new_max_bytes >> 10); + return error; + } + + xfs_calc_atomic_write_unit_max(mp); + mp->m_awu_max_bytes = new_max_bytes; + return 0; +} + +/* Compute maximum possible height for realtime btree types for this fs. */ +static inline void +xfs_rtbtree_compute_maxlevels( + struct xfs_mount *mp) +{ + mp->m_rtbtree_maxlevels = max(mp->m_rtrmap_maxlevels, + mp->m_rtrefc_maxlevels); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -640,6 +847,7 @@ xfs_mountfs( uint quotamount = 0; uint quotaflags = 0; int error = 0; + int i; xfs_sb_mount_common(mp, sbp); @@ -688,9 +896,12 @@ xfs_mountfs( xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); xfs_mount_setup_inode_geom(mp); xfs_rmapbt_compute_maxlevels(mp); + xfs_rtrmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); + xfs_rtrefcountbt_compute_maxlevels(mp); xfs_agbtree_compute_maxlevels(mp); + xfs_rtbtree_compute_maxlevels(mp); /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks @@ -706,27 +917,15 @@ xfs_mountfs( /* enable fail_at_unmount as default */ mp->m_fail_unmount = true; - super_set_sysfs_name_id(mp->m_super); - - error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, - NULL, mp->m_super->s_id); + error = xfs_mount_sysfs_init(mp); if (error) - goto out; - - error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype, - &mp->m_kobj, "stats"); - if (error) - goto out_remove_sysfs; + goto out_remove_scrub_stats; xchk_stats_register(mp->m_scrub_stats, mp->m_debugfs); - error = xfs_error_sysfs_init(mp); - if (error) - goto out_remove_scrub_stats; - error = xfs_errortag_init(mp); if (error) - goto out_remove_error_sysfs; + goto out_remove_sysfs; error = xfs_uuid_mount(mp); if (error) @@ -798,17 +997,24 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks, - &mp->m_maxagi); + error = xfs_initialize_perag(mp, 0, sbp->sb_agcount, + mp->m_sb.sb_dblocks, &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed per-ag init: %d", error); goto out_free_dir; } + error = xfs_initialize_rtgroups(mp, 0, sbp->sb_rgcount, + mp->m_sb.sb_rextents); + if (error) { + xfs_warn(mp, "Failed rtgroup init: %d", error); + goto out_free_perag; + } + if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) { xfs_warn(mp, "no log defined"); error = -EFSCORRUPTED; - goto out_free_perag; + goto out_free_rtgroup; } error = xfs_inodegc_register_shrinker(mp); @@ -816,6 +1022,13 @@ xfs_mountfs( goto out_fail_wait; /* + * If we're resuming quota status, pick up the preliminary qflags from + * the ondisk superblock so that we know if we should recover dquots. + */ + if (xfs_is_resuming_quotaon(mp)) + xfs_qm_resume_quotaon(mp); + + /* * Log's mount-time initialization. The first part of recovery can place * some items on the AIL, to be handled when recovery is finished or * cancelled. @@ -828,6 +1041,23 @@ xfs_mountfs( goto out_inodegc_shrinker; } + /* + * If we're resuming quota status and recovered the log, re-sample the + * qflags from the ondisk superblock now that we've recovered it, just + * in case someone shut down enforcement just before a crash. + */ + if (xfs_clear_resuming_quotaon(mp) && xlog_recovery_needed(mp->m_log)) + xfs_qm_resume_quotaon(mp); + + /* + * If logged xattrs are still enabled after log recovery finishes, then + * they'll be available until unmount. Otherwise, turn them off. + */ + if (xfs_sb_version_haslogxattrs(&mp->m_sb)) + xfs_set_using_logged_xattrs(mp); + else + xfs_clear_using_logged_xattrs(mp); + /* Enable background inode inactivation workers. */ xfs_inodegc_start(mp); xfs_blockgc_start(mp); @@ -845,6 +1075,12 @@ xfs_mountfs( mp->m_features |= XFS_FEAT_ATTR2; } + if (xfs_has_metadir(mp)) { + error = xfs_mount_setup_metadir(mp); + if (error) + goto out_free_metadir; + } + /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. @@ -855,7 +1091,7 @@ xfs_mountfs( xfs_warn(mp, "Failed to read root inode 0x%llx, error %d", sbp->sb_rootino, -error); - goto out_log_dealloc; + goto out_free_metadir; } ASSERT(rip != NULL); @@ -953,6 +1189,12 @@ xfs_mountfs( if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp)) xfs_log_clean(mp); + if (xfs_has_zoned(mp)) { + error = xfs_mount_zones(mp); + if (error) + goto out_rtunmount; + } + /* * Complete the quota initialisation, post-log-replay component. */ @@ -968,35 +1210,55 @@ xfs_mountfs( * privileged transactions. This is needed so that transaction * space required for critical operations can dip into this pool * when at ENOSPC. This is needed for operations like create with - * attr, unwritten extent conversion at ENOSPC, etc. Data allocations - * are not allowed to use this reserved space. + * attr, unwritten extent conversion at ENOSPC, garbage collection + * etc. Data allocations are not allowed to use this reserved space. * * This may drive us straight to ENOSPC on mount, but that implies * we were already there on the last unmount. Warn if this occurs. */ if (!xfs_is_readonly(mp)) { - error = xfs_reserve_blocks(mp, xfs_default_resblks(mp)); - if (error) - xfs_warn(mp, - "Unable to allocate reserve blocks. Continuing without reserve pool."); + for (i = 0; i < XC_FREE_NR; i++) { + error = xfs_reserve_blocks(mp, i, + xfs_default_resblks(mp, i)); + if (error) + xfs_warn(mp, +"Unable to allocate reserve blocks. Continuing without reserve pool for %s.", + xfs_free_pool_name[i]); + } /* Reserve AG blocks for future btree expansion. */ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) goto out_agresv; + + xfs_zone_gc_start(mp); } + /* + * Pre-calculate atomic write unit max. This involves computations + * derived from transaction reservations, so we must do this after the + * log is fully initialized. + */ + error = xfs_set_max_atomic_write_opt(mp, mp->m_awu_max_bytes); + if (error) + goto out_agresv; + return 0; out_agresv: xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); + if (xfs_has_zoned(mp)) + xfs_unmount_zones(mp); out_rtunmount: xfs_rtunmount_inodes(mp); out_rele_rip: xfs_irele(rip); /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); + out_free_metadir: + if (mp->m_metadirip) + xfs_irele(mp->m_metadirip); /* * Inactivate all inodes that might still be in memory after a log @@ -1018,7 +1280,6 @@ xfs_mountfs( * quota inodes. */ xfs_unmount_flush_inodes(mp); - out_log_dealloc: xfs_log_mount_cancel(mp); out_inodegc_shrinker: shrinker_free(mp->m_inodegc_shrinker); @@ -1026,21 +1287,20 @@ xfs_mountfs( if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_buftarg_drain(mp->m_logdev_targp); xfs_buftarg_drain(mp->m_ddev_targp); + out_free_rtgroup: + xfs_free_rtgroups(mp, 0, mp->m_sb.sb_rgcount); out_free_perag: - xfs_free_perag(mp); + xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount); out_free_dir: xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); out_remove_errortag: xfs_errortag_del(mp); - out_remove_error_sysfs: - xfs_error_sysfs_del(mp); + out_remove_sysfs: + xfs_mount_sysfs_del(mp); out_remove_scrub_stats: xchk_stats_unregister(mp->m_scrub_stats); - xfs_sysfs_del(&mp->m_stats.xs_kobj); - out_remove_sysfs: - xfs_sysfs_del(&mp->m_kobj); out: return error; } @@ -1066,10 +1326,16 @@ xfs_unmountfs( xfs_inodegc_flush(mp); xfs_blockgc_stop(mp); + if (!test_bit(XFS_OPSTATE_READONLY, &mp->m_opstate)) + xfs_zone_gc_stop(mp); xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); + if (xfs_has_zoned(mp)) + xfs_unmount_zones(mp); xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); + if (mp->m_metadirip) + xfs_irele(mp->m_metadirip); xfs_unmount_flush_inodes(mp); @@ -1089,12 +1355,17 @@ xfs_unmountfs( * we only every apply deltas to the superblock and hence the incore * value does not matter.... */ - error = xfs_reserve_blocks(mp, 0); + error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0); if (error) xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); xfs_unmount_check(mp); + /* + * Indicate that it's ok to clear log incompat bits before cleaning + * the log and writing the unmount record. + */ + xfs_set_done_with_log_incompat(mp); xfs_log_unmount(mp); xfs_da_unmount(mp); xfs_uuid_unmount(mp); @@ -1103,13 +1374,11 @@ xfs_unmountfs( xfs_errortag_clearall(mp); #endif shrinker_free(mp->m_inodegc_shrinker); - xfs_free_perag(mp); - + xfs_free_rtgroups(mp, 0, mp->m_sb.sb_rgcount); + xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount); xfs_errortag_del(mp); - xfs_error_sysfs_del(mp); xchk_stats_unregister(mp->m_scrub_stats); - xfs_sysfs_del(&mp->m_stats.xs_kobj); - xfs_sysfs_del(&mp->m_kobj); + xfs_mount_sysfs_del(mp); } /* @@ -1131,49 +1400,67 @@ xfs_fs_writable( return true; } -/* Adjust m_fdblocks or m_frextents. */ -int -xfs_mod_freecounter( +/* + * Estimate the amount of free space that is not available to userspace and is + * not explicitly reserved from the incore fdblocks. This includes: + * + * - The minimum number of blocks needed to support splitting a bmap btree + * - The blocks currently in use by the freespace btrees because they record + * the actual blocks that will fill per-AG metadata space reservations + */ +uint64_t +xfs_freecounter_unavailable( struct xfs_mount *mp, - struct percpu_counter *counter, - int64_t delta, - bool rsvd) + enum xfs_free_counter ctr) { - int64_t lcounter; - long long res_used; - uint64_t set_aside = 0; - s32 batch; - bool has_resv_pool; + if (ctr != XC_FREE_BLOCKS) + return 0; + return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); +} - ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents); - has_resv_pool = (counter == &mp->m_fdblocks); - if (rsvd) - ASSERT(has_resv_pool); +void +xfs_add_freecounter( + struct xfs_mount *mp, + enum xfs_free_counter ctr, + uint64_t delta) +{ + struct xfs_freecounter *counter = &mp->m_free[ctr]; + uint64_t res_used; - if (delta > 0) { - /* - * If the reserve pool is depleted, put blocks back into it - * first. Most of the time the pool is full. - */ - if (likely(!has_resv_pool || - mp->m_resblks == mp->m_resblks_avail)) { - percpu_counter_add(counter, delta); - return 0; - } + /* + * If the reserve pool is depleted, put blocks back into it first. + * Most of the time the pool is full. + */ + if (likely(counter->res_avail == counter->res_total)) { + percpu_counter_add(&counter->count, delta); + return; + } - spin_lock(&mp->m_sb_lock); - res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); - - if (res_used > delta) { - mp->m_resblks_avail += delta; - } else { - delta -= res_used; - mp->m_resblks_avail = mp->m_resblks; - percpu_counter_add(counter, delta); - } - spin_unlock(&mp->m_sb_lock); - return 0; + spin_lock(&mp->m_sb_lock); + res_used = counter->res_total - counter->res_avail; + if (res_used > delta) { + counter->res_avail += delta; + } else { + delta -= res_used; + counter->res_avail = counter->res_total; + percpu_counter_add(&counter->count, delta); } + spin_unlock(&mp->m_sb_lock); +} + + +/* Adjust in-core free blocks or RT extents. */ +int +xfs_dec_freecounter( + struct xfs_mount *mp, + enum xfs_free_counter ctr, + uint64_t delta, + bool rsvd) +{ + struct xfs_freecounter *counter = &mp->m_free[ctr]; + s32 batch; + + ASSERT(ctr < XC_FREE_NR); /* * Taking blocks away, need to be more accurate the closer we @@ -1183,7 +1470,7 @@ xfs_mod_freecounter( * then make everything serialise as we are real close to * ENOSPC. */ - if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH, + if (__percpu_counter_compare(&counter->count, 2 * XFS_FDBLOCKS_BATCH, XFS_FDBLOCKS_BATCH) < 0) batch = 1; else @@ -1200,34 +1487,34 @@ xfs_mod_freecounter( * problems (i.e. transaction abort, pagecache discards, etc.) than * slightly premature -ENOSPC. */ - if (has_resv_pool) - set_aside = xfs_fdblocks_unavailable(mp); - percpu_counter_add_batch(counter, delta, batch); - if (__percpu_counter_compare(counter, set_aside, - XFS_FDBLOCKS_BATCH) >= 0) { - /* we had space! */ - return 0; - } - - /* - * lock up the sb for dipping into reserves before releasing the space - * that took us to ENOSPC. - */ - spin_lock(&mp->m_sb_lock); - percpu_counter_add(counter, -delta); - if (!has_resv_pool || !rsvd) - goto fdblocks_enospc; - - lcounter = (long long)mp->m_resblks_avail + delta; - if (lcounter >= 0) { - mp->m_resblks_avail = lcounter; + percpu_counter_add_batch(&counter->count, -((int64_t)delta), batch); + if (__percpu_counter_compare(&counter->count, + xfs_freecounter_unavailable(mp, ctr), + XFS_FDBLOCKS_BATCH) < 0) { + /* + * Lock up the sb for dipping into reserves before releasing the + * space that took us to ENOSPC. + */ + spin_lock(&mp->m_sb_lock); + percpu_counter_add(&counter->count, delta); + if (!rsvd) + goto fdblocks_enospc; + if (delta > counter->res_avail) { + if (ctr == XC_FREE_BLOCKS) + xfs_warn_once(mp, +"Reserve blocks depleted! Consider increasing reserve pool size."); + goto fdblocks_enospc; + } + counter->res_avail -= delta; + trace_xfs_freecounter_reserved(mp, ctr, delta, _RET_IP_); spin_unlock(&mp->m_sb_lock); - return 0; } - xfs_warn_once(mp, -"Reserve blocks depleted! Consider increasing reserve pool size."); + + /* we had space! */ + return 0; fdblocks_enospc: + trace_xfs_freecounter_enospc(mp, ctr, delta, _RET_IP_); spin_unlock(&mp->m_sb_lock); return -ENOSPC; } @@ -1364,7 +1651,8 @@ xfs_clear_incompat_log_features( if (!xfs_has_crc(mp) || !xfs_sb_has_incompat_log_feature(&mp->m_sb, XFS_SB_FEAT_INCOMPAT_LOG_ALL) || - xfs_is_shutdown(mp)) + xfs_is_shutdown(mp) || + !xfs_is_done_with_log_incompat(mp)) return false; /* @@ -1399,9 +1687,20 @@ xfs_clear_incompat_log_features( #define XFS_DELALLOC_BATCH (4096) void xfs_mod_delalloc( - struct xfs_mount *mp, - int64_t delta) + struct xfs_inode *ip, + int64_t data_delta, + int64_t ind_delta) { - percpu_counter_add_batch(&mp->m_delalloc_blks, delta, + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) { + percpu_counter_add_batch(&mp->m_delalloc_rtextents, + xfs_blen_to_rtbxlen(mp, data_delta), + XFS_DELALLOC_BATCH); + if (!ind_delta) + return; + data_delta = 0; + } + percpu_counter_add_batch(&mp->m_delalloc_blks, data_delta + ind_delta, XFS_DELALLOC_BATCH); } |