summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_mount.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 07:08:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 07:08:41 -0700
commit1aef882f023eb7c24d6d77f001bd0ba956fdd861 (patch)
tree735c1043f817a8bc9f31fadd224131b3207eebd2 /fs/xfs/xfs_mount.c
parentd869844bd081081bf537e806a44811884230643e (diff)
parent542c311813d5cb2e6f0dfa9557f41c829b8fb6a0 (diff)
Merge tag 'xfs-for-linus-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs update from Dave Chinner: "This update contains: - RENAME_WHITEOUT support - conversion of per-cpu superblock accounting to use generic counters - new inode mmap lock so that we can lock page faults out of truncate, hole punch and other direct extent manipulation functions to avoid racing mmap writes from causing data corruption - rework of direct IO submission and completion to solve data corruption issue when running concurrent extending DIO writes. Also solves problem of running IO completion transactions in interrupt context during size extending AIO writes. - FALLOC_FL_INSERT_RANGE support for inserting holes into a file via direct extent manipulation to avoid needing to copy data within the file - attribute block header field overflow fix for 64k block size filesystems - Lots of changes to log messaging to be more informative and concise when errors occur. Also prevent a lot of unnecessary log spamming due to cascading failures in error conditions. - lots of cleanups and bug fixes One thing of note is the direct IO fixes that we merged last week after the window opened. Even though a little late, they fix a user reported data corruption and have been pretty well tested. I figured there was not much point waiting another 2 weeks for -rc1 to be released just so I could send them to you..." * tag 'xfs-for-linus-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (49 commits) xfs: using generic_file_direct_write() is unnecessary xfs: direct IO EOF zeroing needs to drain AIO xfs: DIO write completion size updates race xfs: DIO writes within EOF don't need an ioend xfs: handle DIO overwrite EOF update completion correctly xfs: DIO needs an ioend for writes xfs: move DIO mapping size calculation xfs: factor DIO write mapping from get_blocks xfs: unlock i_mutex in xfs_break_layouts xfs: kill unnecessary firstused overflow check on attr3 leaf removal xfs: use larger in-core attr firstused field and detect overflow xfs: pass attr geometry to attr leaf header conversion functions xfs: disallow ro->rw remount on norecovery mount xfs: xfs_shift_file_space can be static xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate fs: Add support FALLOC_FL_INSERT_RANGE for fallocate xfs: Fix incorrect positive ENOMEM return xfs: xfs_mru_cache_insert() should use GFP_NOFS xfs: %pF is only for function pointers xfs: fix shadow warning in xfs_da3_root_split() ...
Diffstat (limited to 'fs/xfs/xfs_mount.c')
-rw-r--r--fs/xfs/xfs_mount.c918
1 files changed, 108 insertions, 810 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e63eea2..2ce7ee3b4ec1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
#include "xfs_sysfs.h"
-#ifdef HAVE_PERCPU_SB
-STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
- int);
-STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
- int);
-STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-
-#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
-#endif
-
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
goto reread;
}
- /* Initialize per-cpu counters */
- xfs_icsb_reinit_counters(mp);
+ xfs_reinit_percpu_counters(mp);
/* no need to be quiet anymore, so reset the buf ops */
bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
return 0;
- xfs_icsb_sync_counters(mp, 0);
-
/*
* we don't need to do this if we are updating the superblock
* counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
return xfs_sync_sb(mp, true);
}
-/*
- * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
- * a delta to a specified field in the in-core superblock. Simply
- * switch on the field indicated and apply the delta to that field.
- * Fields are not allowed to dip below zero, so if the delta would
- * do this do not apply it and return EINVAL.
- *
- * The m_sb_lock must be held when this routine is called.
- */
-STATIC int
-xfs_mod_incore_sb_unlocked(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int64_t delta,
- int rsvd)
+int
+xfs_mod_icount(
+ struct xfs_mount *mp,
+ int64_t delta)
{
- int scounter; /* short counter for 32 bit fields */
- long long lcounter; /* long counter for 64 bit fields */
- long long res_used, rem;
-
- /*
- * With the in-core superblock spin lock held, switch
- * on the indicated field. Apply the delta to the
- * proper field. If the fields value would dip below
- * 0, then do not apply the delta and return EINVAL.
- */
- switch (field) {
- case XFS_SBS_ICOUNT:
- lcounter = (long long)mp->m_sb.sb_icount;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_icount = lcounter;
- return 0;
- case XFS_SBS_IFREE:
- lcounter = (long long)mp->m_sb.sb_ifree;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_ifree = lcounter;
- return 0;
- case XFS_SBS_FDBLOCKS:
- lcounter = (long long)
- mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
- res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
-
- if (delta > 0) { /* Putting blocks back */
- if (res_used > delta) {
- mp->m_resblks_avail += delta;
- } else {
- rem = delta - res_used;
- mp->m_resblks_avail = mp->m_resblks;
- lcounter += rem;
- }
- } else { /* Taking blocks away */
- lcounter += delta;
- if (lcounter >= 0) {
- mp->m_sb.sb_fdblocks = lcounter +
- XFS_ALLOC_SET_ASIDE(mp);
- return 0;
- }
-
- /*
- * We are out of blocks, use any available reserved
- * blocks if were allowed to.
- */
- if (!rsvd)
- return -ENOSPC;
-
- lcounter = (long long)mp->m_resblks_avail + delta;
- if (lcounter >= 0) {
- mp->m_resblks_avail = lcounter;
- return 0;
- }
- printk_once(KERN_WARNING
- "Filesystem \"%s\": reserve blocks depleted! "
- "Consider increasing reserve pool size.",
- mp->m_fsname);
- return -ENOSPC;
- }
-
- mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
- return 0;
- case XFS_SBS_FREXTENTS:
- lcounter = (long long)mp->m_sb.sb_frextents;
- lcounter += delta;
- if (lcounter < 0) {
- return -ENOSPC;
- }
- mp->m_sb.sb_frextents = lcounter;
- return 0;
- case XFS_SBS_DBLOCKS:
- lcounter = (long long)mp->m_sb.sb_dblocks;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_dblocks = lcounter;
- return 0;
- case XFS_SBS_AGCOUNT:
- scounter = mp->m_sb.sb_agcount;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_agcount = scounter;
- return 0;
- case XFS_SBS_IMAX_PCT:
- scounter = mp->m_sb.sb_imax_pct;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_imax_pct = scounter;
- return 0;
- case XFS_SBS_REXTSIZE:
- scounter = mp->m_sb.sb_rextsize;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rextsize = scounter;
- return 0;
- case XFS_SBS_RBMBLOCKS:
- scounter = mp->m_sb.sb_rbmblocks;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rbmblocks = scounter;
- return 0;
- case XFS_SBS_RBLOCKS:
- lcounter = (long long)mp->m_sb.sb_rblocks;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rblocks = lcounter;
- return 0;
- case XFS_SBS_REXTENTS:
- lcounter = (long long)mp->m_sb.sb_rextents;
- lcounter += delta;
- if (lcounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rextents = lcounter;
- return 0;
- case XFS_SBS_REXTSLOG:
- scounter = mp->m_sb.sb_rextslog;
- scounter += delta;
- if (scounter < 0) {
- ASSERT(0);
- return -EINVAL;
- }
- mp->m_sb.sb_rextslog = scounter;
- return 0;
- default:
+ /* deltas are +/-64, hence the large batch size of 128. */
+ __percpu_counter_add(&mp->m_icount, delta, 128);
+ if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
ASSERT(0);
+ percpu_counter_add(&mp->m_icount, -delta);
return -EINVAL;
}
+ return 0;
}
-/*
- * xfs_mod_incore_sb() is used to change a field in the in-core
- * superblock structure by the specified delta. This modification
- * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked()
- * routine to do the work.
- */
int
-xfs_mod_incore_sb(
+xfs_mod_ifree(
struct xfs_mount *mp,
- xfs_sb_field_t field,
- int64_t delta,
- int rsvd)
+ int64_t delta)
{
- int status;
-
-#ifdef HAVE_PERCPU_SB
- ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-#endif
- spin_lock(&mp->m_sb_lock);
- status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
- spin_unlock(&mp->m_sb_lock);
-
- return status;
+ percpu_counter_add(&mp->m_ifree, delta);
+ if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
+ ASSERT(0);
+ percpu_counter_add(&mp->m_ifree, -delta);
+ return -EINVAL;
+ }
+ return 0;
}
-/*
- * Change more than one field in the in-core superblock structure at a time.
- *
- * The fields and changes to those fields are specified in the array of
- * xfs_mod_sb structures passed in. Either all of the specified deltas
- * will be applied or none of them will. If any modified field dips below 0,
- * then all modifications will be backed out and EINVAL will be returned.
- *
- * Note that this function may not be used for the superblock values that
- * are tracked with the in-memory per-cpu counters - a direct call to
- * xfs_icsb_modify_counters is required for these.
- */
int
-xfs_mod_incore_sb_batch(
+xfs_mod_fdblocks(
struct xfs_mount *mp,
- xfs_mod_sb_t *msb,
- uint nmsb,
- int rsvd)
+ int64_t delta,
+ bool rsvd)
{
- xfs_mod_sb_t *msbp;
- int error = 0;
+ int64_t lcounter;
+ long long res_used;
+ s32 batch;
+
+ if (delta > 0) {
+ /*
+ * If the reserve pool is depleted, put blocks back into it
+ * first. Most of the time the pool is full.
+ */
+ if (likely(mp->m_resblks == mp->m_resblks_avail)) {
+ percpu_counter_add(&mp->m_fdblocks, delta);
+ return 0;
+ }
+
+ spin_lock(&mp->m_sb_lock);
+ res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
+
+ if (res_used > delta) {
+ mp->m_resblks_avail += delta;
+ } else {
+ delta -= res_used;
+ mp->m_resblks_avail = mp->m_resblks;
+ percpu_counter_add(&mp->m_fdblocks, delta);
+ }
+ spin_unlock(&mp->m_sb_lock);
+ return 0;
+ }
/*
- * Loop through the array of mod structures and apply each individually.
- * If any fail, then back out all those which have already been applied.
- * Do all of this within the scope of the m_sb_lock so that all of the
- * changes will be atomic.
+ * Taking blocks away, need to be more accurate the closer we
+ * are to zero.
+ *
+ * batch size is set to a maximum of 1024 blocks - if we are
+ * allocating of freeing extents larger than this then we aren't
+ * going to be hammering the counter lock so a lock per update
+ * is not a problem.
+ *
+ * If the counter has a value of less than 2 * max batch size,
+ * then make everything serialise as we are real close to
+ * ENOSPC.
+ */
+#define __BATCH 1024
+ if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+ batch = 1;
+ else
+ batch = __BATCH;
+#undef __BATCH
+
+ __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+ if (percpu_counter_compare(&mp->m_fdblocks,
+ XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+ /* we had space! */
+ return 0;
+ }
+
+ /*
+ * lock up the sb for dipping into reserves before releasing the space
+ * that took us to ENOSPC.
*/
spin_lock(&mp->m_sb_lock);
- for (msbp = msb; msbp < (msb + nmsb); msbp++) {
- ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
- msbp->msb_field > XFS_SBS_FDBLOCKS);
+ percpu_counter_add(&mp->m_fdblocks, -delta);
+ if (!rsvd)
+ goto fdblocks_enospc;
- error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
- msbp->msb_delta, rsvd);
- if (error)
- goto unwind;
+ lcounter = (long long)mp->m_resblks_avail + delta;
+ if (lcounter >= 0) {
+ mp->m_resblks_avail = lcounter;
+ spin_unlock(&mp->m_sb_lock);
+ return 0;
}
+ printk_once(KERN_WARNING
+ "Filesystem \"%s\": reserve blocks depleted! "
+ "Consider increasing reserve pool size.",
+ mp->m_fsname);
+fdblocks_enospc:
spin_unlock(&mp->m_sb_lock);
- return 0;
+ return -ENOSPC;
+}
-unwind:
- while (--msbp >= msb) {
- error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
- -msbp->msb_delta, rsvd);
- ASSERT(error == 0);
- }
+int
+xfs_mod_frextents(
+ struct xfs_mount *mp,
+ int64_t delta)
+{
+ int64_t lcounter;
+ int ret = 0;
+
+ spin_lock(&mp->m_sb_lock);
+ lcounter = mp->m_sb.sb_frextents + delta;
+ if (lcounter < 0)
+ ret = -ENOSPC;
+ else
+ mp->m_sb.sb_frextents = lcounter;
spin_unlock(&mp->m_sb_lock);
- return error;
+ return ret;
}
/*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
}
return 0;
}
-
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g. free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function. This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- * 1. m_sb_lock before picking up per-cpu locks
- * 2. per-cpu locks always picked up via for_each_online_cpu() order
- * 3. accurate counter sync requires m_sb_lock + per cpu locks
- * 4. modifying per-cpu counters requires holding per-cpu lock
- * 5. modifying global counters requires holding m_sb_lock
- * 6. enabling or disabling a counter requires holding the m_sb_lock
- * and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
- struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- xfs_icsb_cnts_t *cntp;
- xfs_mount_t *mp;
-
- mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
- cntp = (xfs_icsb_cnts_t *)
- per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- /* Easy Case - initialize the area and locks, and
- * then rebalance when online does everything else for us. */
- memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- xfs_icsb_lock(mp);
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
- xfs_icsb_unlock(mp);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /* Disable all the counters, then fold the dead cpu's
- * count into the total on the global superblock and
- * re-enable the counters. */
- xfs_icsb_lock(mp);
- spin_lock(&mp->m_sb_lock);
- xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
- xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
- xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
-
- mp->m_sb.sb_icount += cntp->icsb_icount;
- mp->m_sb.sb_ifree += cntp->icsb_ifree;
- mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
-
- memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-
- xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
- spin_unlock(&mp->m_sb_lock);
- xfs_icsb_unlock(mp);
- break;
- }
-
- return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int
-xfs_icsb_init_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
- if (mp->m_sb_cnts == NULL)
- return -ENOMEM;
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
- }
-
- mutex_init(&mp->m_icsb_mutex);
-
- /*
- * start with all counters disabled so that the
- * initial balance kicks us off correctly
- */
- mp->m_icsb_counters = -1;
-
-#ifdef CONFIG_HOTPLUG_CPU
- mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
- mp->m_icsb_notifier.priority = 0;
- register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-
- return 0;
-}
-
-void
-xfs_icsb_reinit_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_lock(mp);
- /*
- * start with all counters disabled so that the
- * initial balance kicks us off correctly
- */
- mp->m_icsb_counters = -1;
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
- xfs_icsb_unlock(mp);
-}
-
-void
-xfs_icsb_destroy_counters(
- xfs_mount_t *mp)
-{
- if (mp->m_sb_cnts) {
- unregister_hotcpu_notifier(&mp->m_icsb_notifier);
- free_percpu(mp->m_sb_cnts);
- }
- mutex_destroy(&mp->m_icsb_mutex);
-}
-
-STATIC void
-xfs_icsb_lock_cntr(
- xfs_icsb_cnts_t *icsbp)
-{
- while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
- ndelay(1000);
- }
-}
-
-STATIC void
-xfs_icsb_unlock_cntr(
- xfs_icsb_cnts_t *icsbp)
-{
- clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-
-
-STATIC void
-xfs_icsb_lock_all_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- xfs_icsb_lock_cntr(cntp);
- }
-}
-
-STATIC void
-xfs_icsb_unlock_all_counters(
- xfs_mount_t *mp)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- xfs_icsb_unlock_cntr(cntp);
- }
-}
-
-STATIC void
-xfs_icsb_count(
- xfs_mount_t *mp,
- xfs_icsb_cnts_t *cnt,
- int flags)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-
- if (!(flags & XFS_ICSB_LAZY_COUNT))
- xfs_icsb_lock_all_counters(mp);
-
- for_each_online_cpu(i) {
- cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
- cnt->icsb_icount += cntp->icsb_icount;
- cnt->icsb_ifree += cntp->icsb_ifree;
- cnt->icsb_fdblocks += cntp->icsb_fdblocks;
- }
-
- if (!(flags & XFS_ICSB_LAZY_COUNT))
- xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC int
-xfs_icsb_counter_disabled(
- xfs_mount_t *mp,
- xfs_sb_field_t field)
-{
- ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
- return test_bit(field, &mp->m_icsb_counters);
-}
-
-STATIC void
-xfs_icsb_disable_counter(
- xfs_mount_t *mp,
- xfs_sb_field_t field)
-{
- xfs_icsb_cnts_t cnt;
-
- ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
- /*
- * If we are already disabled, then there is nothing to do
- * here. We check before locking all the counters to avoid
- * the expensive lock operation when being called in the
- * slow path and the counter is already disabled. This is
- * safe because the only time we set or clear this state is under
- * the m_icsb_mutex.
- */
- if (xfs_icsb_counter_disabled(mp, field))
- return;
-
- xfs_icsb_lock_all_counters(mp);
- if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
- /* drain back to superblock */
-
- xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
- switch(field) {
- case XFS_SBS_ICOUNT:
- mp->m_sb.sb_icount = cnt.icsb_icount;
- break;
- case XFS_SBS_IFREE:
- mp->m_sb.sb_ifree = cnt.icsb_ifree;
- break;
- case XFS_SBS_FDBLOCKS:
- mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
- break;
- default:
- BUG();
- }
- }
-
- xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC void
-xfs_icsb_enable_counter(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- uint64_t count,
- uint64_t resid)
-{
- xfs_icsb_cnts_t *cntp;
- int i;
-
- ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
- xfs_icsb_lock_all_counters(mp);
- for_each_online_cpu(i) {
- cntp = per_cpu_ptr(mp->m_sb_cnts, i);
- switch (field) {
- case XFS_SBS_ICOUNT:
- cntp->icsb_icount = count + resid;
- break;
- case XFS_SBS_IFREE:
- cntp->icsb_ifree = count + resid;
- break;
- case XFS_SBS_FDBLOCKS:
- cntp->icsb_fdblocks = count + resid;
- break;
- default:
- BUG();
- break;
- }
- resid = 0;
- }
- clear_bit(field, &mp->m_icsb_counters);
- xfs_icsb_unlock_all_counters(mp);
-}
-
-void
-xfs_icsb_sync_counters_locked(
- xfs_mount_t *mp,
- int flags)
-{
- xfs_icsb_cnts_t cnt;
-
- xfs_icsb_count(mp, &cnt, flags);
-
- if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
- mp->m_sb.sb_icount = cnt.icsb_icount;
- if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
- mp->m_sb.sb_ifree = cnt.icsb_ifree;
- if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
- mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-}
-
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
- xfs_mount_t *mp,
- int flags)
-{
- spin_lock(&mp->m_sb_lock);
- xfs_icsb_sync_counters_locked(mp, flags);
- spin_unlock(&mp->m_sb_lock);
-}
-
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic. inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-
-#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
- (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int min_per_cpu)
-{
- uint64_t count, resid;
- int weight = num_online_cpus();
- uint64_t min = (uint64_t)min_per_cpu;
-
- /* disable counter and sync counter */
- xfs_icsb_disable_counter(mp, field);
-
- /* update counters - first CPU gets residual*/
- switch (field) {
- case XFS_SBS_ICOUNT:
- count = mp->m_sb.sb_icount;
- resid = do_div(count, weight);
- if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
- return;
- break;
- case XFS_SBS_IFREE:
- count = mp->m_sb.sb_ifree;
- resid = do_div(count, weight);
- if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
- return;
- break;
- case XFS_SBS_FDBLOCKS:
- count = mp->m_sb.sb_fdblocks;
- resid = do_div(count, weight);
- if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
- return;
- break;
- default:
- BUG();
- count = resid = 0; /* quiet, gcc */
- break;
- }
-
- xfs_icsb_enable_counter(mp, field, count, resid);
-}
-
-STATIC void
-xfs_icsb_balance_counter(
- xfs_mount_t *mp,
- xfs_sb_field_t fields,
- int min_per_cpu)
-{
- spin_lock(&mp->m_sb_lock);
- xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
- spin_unlock(&mp->m_sb_lock);
-}
-
-int
-xfs_icsb_modify_counters(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int64_t delta,
- int rsvd)
-{
- xfs_icsb_cnts_t *icsbp;
- long long lcounter; /* long counter for 64 bit fields */
- int ret = 0;
-
- might_sleep();
-again:
- preempt_disable();
- icsbp = this_cpu_ptr(mp->m_sb_cnts);
-
- /*
- * if the counter is disabled, go to slow path
- */
- if (unlikely(xfs_icsb_counter_disabled(mp, field)))
- goto slow_path;
- xfs_icsb_lock_cntr(icsbp);
- if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
- xfs_icsb_unlock_cntr(icsbp);
- goto slow_path;
- }
-
- switch (field) {
- case XFS_SBS_ICOUNT:
- lcounter = icsbp->icsb_icount;
- lcounter += delta;
- if (unlikely(lcounter < 0))
- goto balance_counter;
- icsbp->icsb_icount = lcounter;
- break;
-
- case XFS_SBS_IFREE:
- lcounter = icsbp->icsb_ifree;
- lcounter += delta;
- if (unlikely(lcounter < 0))
- goto balance_counter;
- icsbp->icsb_ifree = lcounter;
- break;
-
- case XFS_SBS_FDBLOCKS:
- BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
-
- lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
- lcounter += delta;
- if (unlikely(lcounter < 0))
- goto balance_counter;
- icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
- break;
- default:
- BUG();
- break;
- }
- xfs_icsb_unlock_cntr(icsbp);
- preempt_enable();
- return 0;
-
-slow_path:
- preempt_enable();
-
- /*
- * serialise with a mutex so we don't burn lots of cpu on
- * the superblock lock. We still need to hold the superblock
- * lock, however, when we modify the global structures.
- */
- xfs_icsb_lock(mp);
-
- /*
- * Now running atomically.
- *
- * If the counter is enabled, someone has beaten us to rebalancing.
- * Drop the lock and try again in the fast path....
- */
- if (!(xfs_icsb_counter_disabled(mp, field))) {
- xfs_icsb_unlock(mp);
- goto again;
- }
-
- /*
- * The counter is currently disabled. Because we are
- * running atomically here, we know a rebalance cannot
- * be in progress. Hence we can go straight to operating
- * on the global superblock. We do not call xfs_mod_incore_sb()
- * here even though we need to get the m_sb_lock. Doing so
- * will cause us to re-enter this function and deadlock.
- * Hence we get the m_sb_lock ourselves and then call
- * xfs_mod_incore_sb_unlocked() as the unlocked path operates
- * directly on the global counters.
- */
- spin_lock(&mp->m_sb_lock);
- ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
- spin_unlock(&mp->m_sb_lock);
-
- /*
- * Now that we've modified the global superblock, we
- * may be able to re-enable the distributed counters
- * (e.g. lots of space just got freed). After that
- * we are done.
- */
- if (ret != -ENOSPC)
- xfs_icsb_balance_counter(mp, field, 0);
- xfs_icsb_unlock(mp);
- return ret;
-
-balance_counter:
- xfs_icsb_unlock_cntr(icsbp);
- preempt_enable();
-
- /*
- * We may have multiple threads here if multiple per-cpu
- * counters run dry at the same time. This will mean we can
- * do more balances than strictly necessary but it is not
- * the common slowpath case.
- */
- xfs_icsb_lock(mp);
-
- /*
- * running atomically.
- *
- * This will leave the counter in the correct state for future
- * accesses. After the rebalance, we simply try again and our retry
- * will either succeed through the fast path or slow path without
- * another balance operation being required.
- */
- xfs_icsb_balance_counter(mp, field, delta);
- xfs_icsb_unlock(mp);
- goto again;
-}
-
-#endif