summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_dquot.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_dquot.c')
-rw-r--r--fs/xfs/xfs_dquot.c542
1 files changed, 346 insertions, 196 deletions
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 8fb90da89787..612ca682a513 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -24,13 +24,14 @@
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
#include "xfs_error.h"
+#include "xfs_health.h"
/*
* Lock order:
*
* ip->i_lock
* qi->qi_tree_lock
- * dquot->q_qlock (xfs_dqlock() and friends)
+ * dquot->q_qlock
* dquot->q_flush (xfs_dqflock() and friends)
* qi->qi_lru_lock
*
@@ -44,6 +45,54 @@ static struct kmem_cache *xfs_dquot_cache;
static struct lock_class_key xfs_dquot_group_class;
static struct lock_class_key xfs_dquot_project_class;
+/* Record observations of quota corruption with the health tracking system. */
+static void
+xfs_dquot_mark_sick(
+ struct xfs_dquot *dqp)
+{
+ struct xfs_mount *mp = dqp->q_mount;
+
+ switch (dqp->q_type) {
+ case XFS_DQTYPE_USER:
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_UQUOTA);
+ break;
+ case XFS_DQTYPE_GROUP:
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_GQUOTA);
+ break;
+ case XFS_DQTYPE_PROJ:
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_PQUOTA);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
+/*
+ * Detach the dquot buffer if it's still attached, because we can get called
+ * through dqpurge after a log shutdown. Caller must hold the dqflock or have
+ * otherwise isolated the dquot.
+ */
+void
+xfs_dquot_detach_buf(
+ struct xfs_dquot *dqp)
+{
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_buf *bp = NULL;
+
+ spin_lock(&qlip->qli_lock);
+ if (qlip->qli_item.li_buf) {
+ bp = qlip->qli_item.li_buf;
+ qlip->qli_item.li_buf = NULL;
+ }
+ spin_unlock(&qlip->qli_lock);
+ if (bp) {
+ xfs_buf_lock(bp);
+ list_del_init(&qlip->qli_item.li_bio_list);
+ xfs_buf_relse(bp);
+ }
+}
+
/*
* This is called to free all the memory associated with a dquot
*/
@@ -52,8 +101,9 @@ xfs_qm_dqdestroy(
struct xfs_dquot *dqp)
{
ASSERT(list_empty(&dqp->q_lru));
+ ASSERT(dqp->q_logitem.qli_item.li_buf == NULL);
- kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
+ kvfree(dqp->q_logitem.qli_item.li_lv_shadow);
mutex_destroy(&dqp->q_qlock);
XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
@@ -172,14 +222,14 @@ xfs_qm_adjust_dqtimers(
/*
* initialize a buffer full of dquots and log the whole thing
*/
-STATIC void
+void
xfs_qm_init_dquot_blk(
struct xfs_trans *tp,
- struct xfs_mount *mp,
xfs_dqid_t id,
xfs_dqtype_t type,
struct xfs_buf *bp)
{
+ struct xfs_mount *mp = tp->t_mountp;
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_dqblk *d;
xfs_dqid_t curid;
@@ -253,6 +303,25 @@ xfs_qm_init_dquot_blk(
xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
}
+static void
+xfs_dquot_set_prealloc(
+ struct xfs_dquot_pre *pre,
+ const struct xfs_dquot_res *res)
+{
+ xfs_qcnt_t space;
+
+ pre->q_prealloc_hi_wmark = res->hardlimit;
+ pre->q_prealloc_lo_wmark = res->softlimit;
+
+ space = div_u64(pre->q_prealloc_hi_wmark, 100);
+ if (!pre->q_prealloc_lo_wmark)
+ pre->q_prealloc_lo_wmark = space * 95;
+
+ pre->q_low_space[XFS_QLOWSP_1_PCNT] = space;
+ pre->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
+ pre->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
+}
+
/*
* Initialize the dynamic speculative preallocation thresholds. The lo/hi
* watermarks correspond to the soft and hard limits by default. If a soft limit
@@ -261,22 +330,8 @@ xfs_qm_init_dquot_blk(
void
xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
{
- uint64_t space;
-
- dqp->q_prealloc_hi_wmark = dqp->q_blk.hardlimit;
- dqp->q_prealloc_lo_wmark = dqp->q_blk.softlimit;
- if (!dqp->q_prealloc_lo_wmark) {
- dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
- do_div(dqp->q_prealloc_lo_wmark, 100);
- dqp->q_prealloc_lo_wmark *= 95;
- }
-
- space = dqp->q_prealloc_hi_wmark;
-
- do_div(space, 100);
- dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
- dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
- dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
+ xfs_dquot_set_prealloc(&dqp->q_blk_prealloc, &dqp->q_blk);
+ xfs_dquot_set_prealloc(&dqp->q_rtb_prealloc, &dqp->q_rtb);
}
/*
@@ -317,11 +372,8 @@ xfs_dquot_disk_alloc(
goto err_cancel;
}
- error = xfs_iext_count_may_overflow(quotip, XFS_DATA_FORK,
+ error = xfs_iext_count_extend(tp, quotip, XFS_DATA_FORK,
XFS_IEXT_ADD_NOSPLIT_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, quotip,
- XFS_IEXT_ADD_NOSPLIT_CNT);
if (error)
goto err_cancel;
@@ -333,7 +385,6 @@ xfs_dquot_disk_alloc(
goto err_cancel;
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
- ASSERT(nmaps == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -353,7 +404,7 @@ xfs_dquot_disk_alloc(
* Make a chunk of dquots out of this buffer and log
* the entire thing.
*/
- xfs_qm_init_dquot_blk(tp, mp, dqp->q_id, qtype, bp);
+ xfs_qm_init_dquot_blk(tp, dqp->q_id, qtype, bp);
xfs_buf_set_ref(bp, XFS_DQUOT_REF);
/*
@@ -451,6 +502,8 @@ xfs_dquot_disk_read(
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
&xfs_dquot_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dquot_mark_sick(dqp);
if (error) {
ASSERT(bp == NULL);
return error;
@@ -562,7 +615,8 @@ xfs_dquot_from_disk(
struct xfs_dquot *dqp,
struct xfs_buf *bp)
{
- struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset;
+ struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset);
+ struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq;
/*
* Ensure that we got the type and ID we were looking for.
@@ -573,6 +627,7 @@ xfs_dquot_from_disk(
"Metadata corruption detected at %pS, quota %u",
__this_address, dqp->q_id);
xfs_alert(bp->b_mount, "Unmount and run xfs_repair");
+ xfs_dquot_mark_sick(dqp);
return -EFSCORRUPTED;
}
@@ -746,10 +801,11 @@ xfs_dq_get_next_id(
static struct xfs_dquot *
xfs_qm_dqget_cache_lookup(
struct xfs_mount *mp,
- struct xfs_quotainfo *qi,
- struct radix_tree_root *tree,
- xfs_dqid_t id)
+ xfs_dqid_t id,
+ xfs_dqtype_t type)
{
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
+ struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
restart:
@@ -761,16 +817,12 @@ restart:
return NULL;
}
- xfs_dqlock(dqp);
- if (dqp->q_flags & XFS_DQFLAG_FREEING) {
- xfs_dqunlock(dqp);
+ if (!lockref_get_not_dead(&dqp->q_lockref)) {
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_freeing(dqp);
delay(1);
goto restart;
}
-
- dqp->q_nrefs++;
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_hit(dqp);
@@ -781,37 +833,41 @@ restart:
/*
* Try to insert a new dquot into the in-core cache. If an error occurs the
* caller should throw away the dquot and start over. Otherwise, the dquot
- * is returned locked (and held by the cache) as if there had been a cache
- * hit.
+ * is returned (and held by the cache) as if there had been a cache hit.
+ *
+ * The insert needs to be done under memalloc_nofs context because the radix
+ * tree can do memory allocation during insert. The qi->qi_tree_lock is taken in
+ * memory reclaim when freeing unused dquots, so we cannot have the radix tree
+ * node allocation recursing into filesystem reclaim whilst we hold the
+ * qi_tree_lock.
*/
static int
xfs_qm_dqget_cache_insert(
struct xfs_mount *mp,
- struct xfs_quotainfo *qi,
- struct radix_tree_root *tree,
xfs_dqid_t id,
+ xfs_dqtype_t type,
struct xfs_dquot *dqp)
{
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
+ struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
+ unsigned int nofs_flags;
int error;
+ nofs_flags = memalloc_nofs_save();
mutex_lock(&qi->qi_tree_lock);
error = radix_tree_insert(tree, id, dqp);
if (unlikely(error)) {
- /* Duplicate found! Caller must try again. */
- WARN_ON(error != -EEXIST);
- mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_dup(dqp);
- return error;
+ goto out_unlock;
}
- /* Return a locked dquot to the caller, with a reference taken. */
- xfs_dqlock(dqp);
- dqp->q_nrefs = 1;
-
+ lockref_init(&dqp->q_lockref);
qi->qi_dquots++;
- mutex_unlock(&qi->qi_tree_lock);
- return 0;
+out_unlock:
+ mutex_unlock(&qi->qi_tree_lock);
+ memalloc_nofs_restore(nofs_flags);
+ return error;
}
/* Check our input parameters. */
@@ -841,7 +897,7 @@ xfs_qm_dqget_checks(
/*
* Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a
- * locked dquot, doing an allocation (if requested) as needed.
+ * dquot, doing an allocation (if requested) as needed.
*/
int
xfs_qm_dqget(
@@ -851,8 +907,6 @@ xfs_qm_dqget(
bool can_alloc,
struct xfs_dquot **O_dqpp)
{
- struct xfs_quotainfo *qi = mp->m_quotainfo;
- struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
int error;
@@ -861,28 +915,30 @@ xfs_qm_dqget(
return error;
restart:
- dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id);
- if (dqp) {
- *O_dqpp = dqp;
- return 0;
- }
+ dqp = xfs_qm_dqget_cache_lookup(mp, id, type);
+ if (dqp)
+ goto found;
error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp);
if (error)
return error;
- error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp);
+ error = xfs_qm_dqget_cache_insert(mp, id, type, dqp);
if (error) {
- /*
- * Duplicate found. Just throw away the new dquot and start
- * over.
- */
xfs_qm_dqdestroy(dqp);
- XFS_STATS_INC(mp, xs_qm_dquot_dups);
- goto restart;
+ if (error == -EEXIST) {
+ /*
+ * Duplicate found. Just throw away the new dquot and
+ * start over.
+ */
+ XFS_STATS_INC(mp, xs_qm_dquot_dups);
+ goto restart;
+ }
+ return error;
}
trace_xfs_dqget_miss(dqp);
+found:
*O_dqpp = dqp;
return 0;
}
@@ -937,30 +993,30 @@ xfs_qm_dqget_inode(
struct xfs_inode *ip,
xfs_dqtype_t type,
bool can_alloc,
- struct xfs_dquot **O_dqpp)
+ struct xfs_dquot **dqpp)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_quotainfo *qi = mp->m_quotainfo;
- struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
xfs_dqid_t id;
int error;
+ ASSERT(!*dqpp);
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+
error = xfs_qm_dqget_checks(mp, type);
if (error)
return error;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(xfs_inode_dquot(ip, type) == NULL);
+ ASSERT(!xfs_is_metadir_inode(ip));
id = xfs_qm_id_for_quotatype(ip, type);
restart:
- dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id);
- if (dqp) {
- *O_dqpp = dqp;
- return 0;
- }
+ dqp = xfs_qm_dqget_cache_lookup(mp, id, type);
+ if (dqp)
+ goto found;
/*
* Dquot cache miss. We don't want to keep the inode lock across
@@ -986,7 +1042,6 @@ restart:
if (dqp1) {
xfs_qm_dqdestroy(dqp);
dqp = dqp1;
- xfs_dqlock(dqp);
goto dqret;
}
} else {
@@ -995,21 +1050,26 @@ restart:
return -ESRCH;
}
- error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp);
+ error = xfs_qm_dqget_cache_insert(mp, id, type, dqp);
if (error) {
- /*
- * Duplicate found. Just throw away the new dquot and start
- * over.
- */
xfs_qm_dqdestroy(dqp);
- XFS_STATS_INC(mp, xs_qm_dquot_dups);
- goto restart;
+ if (error == -EEXIST) {
+ /*
+ * Duplicate found. Just throw away the new dquot and
+ * start over.
+ */
+ XFS_STATS_INC(mp, xs_qm_dquot_dups);
+ goto restart;
+ }
+ return error;
}
dqret:
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
trace_xfs_dqget_miss(dqp);
- *O_dqpp = dqp;
+found:
+ trace_xfs_dqattach_get(dqp);
+ *dqpp = dqp;
return 0;
}
@@ -1035,45 +1095,21 @@ xfs_qm_dqget_next(
else if (error != 0)
break;
+ mutex_lock(&dqp->q_qlock);
if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
*dqpp = dqp;
return 0;
}
- xfs_qm_dqput(dqp);
+ mutex_unlock(&dqp->q_qlock);
+ xfs_qm_dqrele(dqp);
}
return error;
}
/*
- * Release a reference to the dquot (decrement ref-count) and unlock it.
- *
- * If there is a group quota attached to this dquot, carefully release that
- * too without tripping over deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
- struct xfs_dquot *dqp)
-{
- ASSERT(dqp->q_nrefs > 0);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- trace_xfs_dqput(dqp);
-
- if (--dqp->q_nrefs == 0) {
- struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
- trace_xfs_dqput_free(dqp);
-
- if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
- XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
- }
- xfs_dqunlock(dqp);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
+ * Release a reference to the dquot.
*/
void
xfs_qm_dqrele(
@@ -1084,14 +1120,16 @@ xfs_qm_dqrele(
trace_xfs_dqrele(dqp);
- xfs_dqlock(dqp);
- /*
- * We don't care to flush it if the dquot is dirty here.
- * That will create stutters that we want to avoid.
- * Instead we do a delayed write when we try to reclaim
- * a dirty dquot. Also xfs_sync will take part of the burden...
- */
- xfs_qm_dqput(dqp);
+ if (lockref_put_or_lock(&dqp->q_lockref))
+ return;
+ if (!--dqp->q_lockref.count) {
+ struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
+
+ trace_xfs_dqrele_free(dqp);
+ if (list_lru_add_obj(&qi->qi_lru, &dqp->q_lru))
+ XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
+ }
+ spin_unlock(&dqp->q_lockref.lock);
}
/*
@@ -1105,9 +1143,11 @@ static void
xfs_qm_dqflush_done(
struct xfs_log_item *lip)
{
- struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip;
- struct xfs_dquot *dqp = qip->qli_dquot;
+ struct xfs_dq_logitem *qlip =
+ container_of(lip, struct xfs_dq_logitem, qli_item);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_ail *ailp = lip->li_ailp;
+ struct xfs_buf *bp = NULL;
xfs_lsn_t tail_lsn;
/*
@@ -1119,12 +1159,11 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
- ((lip->li_lsn == qip->qli_flush_lsn) ||
+ (lip->li_lsn == qlip->qli_flush_lsn ||
test_bit(XFS_LI_FAILED, &lip->li_flags))) {
-
spin_lock(&ailp->ail_lock);
- xfs_clear_li_failed(lip);
- if (lip->li_lsn == qip->qli_flush_lsn) {
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
+ if (lip->li_lsn == qlip->qli_flush_lsn) {
/* xfs_ail_update_finish() drops the AIL lock */
tail_lsn = xfs_ail_delete_one(ailp, lip);
xfs_ail_update_finish(ailp, tail_lsn);
@@ -1134,6 +1173,20 @@ xfs_qm_dqflush_done(
}
/*
+ * If this dquot hasn't been dirtied since initiating the last dqflush,
+ * release the buffer reference. We already unlinked this dquot item
+ * from the buffer.
+ */
+ spin_lock(&qlip->qli_lock);
+ if (!qlip->qli_dirty) {
+ bp = lip->li_buf;
+ lip->li_buf = NULL;
+ }
+ spin_unlock(&qlip->qli_lock);
+ if (bp)
+ xfs_buf_rele(bp);
+
+ /*
* Release the dq's flush lock since we're done with it.
*/
xfs_dqfunlock(dqp);
@@ -1151,18 +1204,6 @@ xfs_buf_dquot_iodone(
}
}
-void
-xfs_buf_dquot_io_fail(
- struct xfs_buf *bp)
-{
- struct xfs_log_item *lip;
-
- spin_lock(&bp->b_mount->m_ail->ail_lock);
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
- xfs_set_li_failed(lip, bp);
- spin_unlock(&bp->b_mount->m_ail->ail_lock);
-}
-
/* Check incore dquot for errors before we flush. */
static xfs_failaddr_t
xfs_qm_dqflush_check(
@@ -1202,6 +1243,115 @@ xfs_qm_dqflush_check(
}
/*
+ * Get the buffer containing the on-disk dquot.
+ *
+ * Requires dquot flush lock, will clear the dirty flag, delete the quota log
+ * item from the AIL, and shut down the system if something goes wrong.
+ */
+static int
+xfs_dquot_read_buf(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_buf *bp = NULL;
+ int error;
+
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
+ mp->m_quotainfo->qi_dqchunklen, 0,
+ &bp, &xfs_dquot_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dquot_mark_sick(dqp);
+ if (error)
+ goto out_abort;
+
+ *bpp = bp;
+ return 0;
+
+out_abort:
+ dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
+ xfs_trans_ail_delete(&dqp->q_logitem.qli_item, 0);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+}
+
+/*
+ * Attach a dquot buffer to this dquot to avoid allocating a buffer during a
+ * dqflush, since dqflush can be called from reclaim context. Caller must hold
+ * the dqlock.
+ */
+int
+xfs_dquot_attach_buf(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
+{
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_log_item *lip = &qlip->qli_item;
+ int error;
+
+ spin_lock(&qlip->qli_lock);
+ if (!lip->li_buf) {
+ struct xfs_buf *bp = NULL;
+
+ spin_unlock(&qlip->qli_lock);
+ error = xfs_dquot_read_buf(tp, dqp, &bp);
+ if (error)
+ return error;
+
+ /*
+ * Hold the dquot buffer so that we retain our ref to it after
+ * detaching it from the transaction, then give that ref to the
+ * dquot log item so that the AIL does not have to read the
+ * dquot buffer to push this item.
+ */
+ xfs_buf_hold(bp);
+ xfs_trans_brelse(tp, bp);
+
+ spin_lock(&qlip->qli_lock);
+ lip->li_buf = bp;
+ }
+ qlip->qli_dirty = true;
+ spin_unlock(&qlip->qli_lock);
+
+ return 0;
+}
+
+/*
+ * Get a new reference the dquot buffer attached to this dquot for a dqflush
+ * operation.
+ *
+ * Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked
+ * bp; or -EAGAIN if the buffer could not be locked.
+ */
+int
+xfs_dquot_use_attached_buf(
+ struct xfs_dquot *dqp,
+ struct xfs_buf **bpp)
+{
+ struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf;
+
+ /*
+ * A NULL buffer can happen if the dquot dirty flag was set but the
+ * filesystem shut down before transaction commit happened. In that
+ * case we're not going to flush anyway.
+ */
+ if (!bp) {
+ ASSERT(xfs_is_shutdown(dqp->q_mount));
+
+ *bpp = NULL;
+ return 0;
+ }
+
+ if (!xfs_buf_trylock(bp))
+ return -EAGAIN;
+
+ xfs_buf_hold(bp);
+ *bpp = bp;
+ return 0;
+}
+
+/*
* Write a modified dquot to disk.
* The dquot must be locked and the flush lock too taken by caller.
* The flush lock will not be unlocked until the dquot reaches the disk,
@@ -1212,46 +1362,31 @@ xfs_qm_dqflush_check(
int
xfs_qm_dqflush(
struct xfs_dquot *dqp,
- struct xfs_buf **bpp)
+ struct xfs_buf *bp)
{
struct xfs_mount *mp = dqp->q_mount;
- struct xfs_log_item *lip = &dqp->q_logitem.qli_item;
- struct xfs_buf *bp;
+ struct xfs_dq_logitem *qlip = &dqp->q_logitem;
+ struct xfs_log_item *lip = &qlip->qli_item;
struct xfs_dqblk *dqblk;
xfs_failaddr_t fa;
int error;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(!completion_done(&dqp->q_flush));
+ ASSERT(atomic_read(&dqp->q_pincount) == 0);
trace_xfs_dqflush(dqp);
-
- *bpp = NULL;
-
- xfs_qm_dqunpin_wait(dqp);
-
- /*
- * Get the buffer containing the on-disk dquot
- */
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
- &bp, &xfs_dquot_buf_ops);
- if (error == -EAGAIN)
- goto out_unlock;
- if (error)
- goto out_abort;
-
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
dqp->q_id, fa);
- xfs_buf_relse(bp);
+ xfs_dquot_mark_sick(dqp);
error = -EFSCORRUPTED;
goto out_abort;
}
/* Flush the incore dquot to the ondisk buffer. */
- dqblk = bp->b_addr + dqp->q_bufoffset;
+ dqblk = xfs_buf_offset(bp, dqp->q_bufoffset);
xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp);
/*
@@ -1259,8 +1394,15 @@ xfs_qm_dqflush(
*/
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
- xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
- &dqp->q_logitem.qli_item.li_lsn);
+ /*
+ * We hold the dquot lock, so nobody can dirty it while we're
+ * scheduling the write out. Clear the dirty-since-flush flag.
+ */
+ spin_lock(&qlip->qli_lock);
+ qlip->qli_dirty = false;
+ spin_unlock(&qlip->qli_lock);
+
+ xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn);
/*
* copy the lsn into the on-disk dquot now while we have the in memory
@@ -1272,7 +1414,7 @@ xfs_qm_dqflush(
* of a dquot without an up-to-date CRC getting to disk.
*/
if (xfs_has_crc(mp)) {
- dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
+ dqblk->dd_lsn = cpu_to_be64(lip->li_lsn);
xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk),
XFS_DQUOT_CRC_OFF);
}
@@ -1281,8 +1423,8 @@ xfs_qm_dqflush(
* Attach the dquot to the buffer so that we can remove this dquot from
* the AIL and release the flush lock once the dquot is synced to disk.
*/
- bp->b_flags |= _XBF_DQUOTS;
- list_add_tail(&dqp->q_logitem.qli_item.li_bio_list, &bp->b_li_list);
+ bp->b_iodone = xfs_buf_dquot_iodone;
+ list_add_tail(&lip->li_bio_list, &bp->b_li_list);
/*
* If the buffer is pinned then push on the log so we won't
@@ -1294,14 +1436,12 @@ xfs_qm_dqflush(
}
trace_xfs_dqflush_done(dqp);
- *bpp = bp;
return 0;
out_abort:
dqp->q_flags &= ~XFS_DQFLAG_DIRTY;
xfs_trans_ail_delete(lip, 0);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-out_unlock:
xfs_dqfunlock(dqp);
return error;
}
@@ -1333,6 +1473,47 @@ xfs_dqlock2(
}
}
+static int
+xfs_dqtrx_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xfs_dqtrx *qa = a;
+ const struct xfs_dqtrx *qb = b;
+
+ if (qa->qt_dquot->q_id > qb->qt_dquot->q_id)
+ return 1;
+ if (qa->qt_dquot->q_id < qb->qt_dquot->q_id)
+ return -1;
+ return 0;
+}
+
+void
+xfs_dqlockn(
+ struct xfs_dqtrx *q)
+{
+ unsigned int i;
+
+ BUILD_BUG_ON(XFS_QM_TRANS_MAXDQS > MAX_LOCKDEP_SUBCLASSES);
+
+ /* Sort in order of dquot id, do not allow duplicates */
+ for (i = 0; i < XFS_QM_TRANS_MAXDQS && q[i].qt_dquot != NULL; i++) {
+ unsigned int j;
+
+ for (j = 0; j < i; j++)
+ ASSERT(q[i].qt_dquot != q[j].qt_dquot);
+ }
+ if (i == 0)
+ return;
+
+ sort(q, i, sizeof(struct xfs_dqtrx), xfs_dqtrx_cmp, NULL);
+
+ mutex_lock(&q[0].qt_dquot->q_qlock);
+ for (i = 1; i < XFS_QM_TRANS_MAXDQS && q[i].qt_dquot != NULL; i++)
+ mutex_lock_nested(&q[i].qt_dquot->q_qlock,
+ XFS_QLOCK_NESTED + i - 1);
+}
+
int __init
xfs_qm_init(void)
{
@@ -1362,34 +1543,3 @@ xfs_qm_exit(void)
kmem_cache_destroy(xfs_dqtrx_cache);
kmem_cache_destroy(xfs_dquot_cache);
}
-
-/*
- * Iterate every dquot of a particular type. The caller must ensure that the
- * particular quota type is active. iter_fn can return negative error codes,
- * or -ECANCELED to indicate that it wants to stop iterating.
- */
-int
-xfs_qm_dqiterate(
- struct xfs_mount *mp,
- xfs_dqtype_t type,
- xfs_qm_dqiterate_fn iter_fn,
- void *priv)
-{
- struct xfs_dquot *dq;
- xfs_dqid_t id = 0;
- int error;
-
- do {
- error = xfs_qm_dqget_next(mp, id, type, &dq);
- if (error == -ENOENT)
- return 0;
- if (error)
- return error;
-
- error = iter_fn(dq, type, priv);
- id = dq->q_id;
- xfs_qm_dqput(dq);
- } while (error == 0 && id != 0);
-
- return error;
-}