summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c434
1 files changed, 206 insertions, 228 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c5107c7bc4bf..b9c9c848146b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -22,6 +22,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
+#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
@@ -608,6 +609,7 @@ xfs_log_mount(
xfs_daddr_t blk_offset,
int num_bblks)
{
+ bool fatal = xfs_sb_version_hascrc(&mp->m_sb);
int error = 0;
int min_logfsbs;
@@ -659,9 +661,20 @@ xfs_log_mount(
XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
XFS_MAX_LOG_BYTES);
error = -EINVAL;
+ } else if (mp->m_sb.sb_logsunit > 1 &&
+ mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
+ xfs_warn(mp,
+ "log stripe unit %u bytes must be a multiple of block size",
+ mp->m_sb.sb_logsunit);
+ error = -EINVAL;
+ fatal = true;
}
if (error) {
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ /*
+ * Log check errors are always fatal on v5; or whenever bad
+ * metadata leads to a crash.
+ */
+ if (fatal) {
xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
ASSERT(0);
goto out_free_log;
@@ -744,6 +757,7 @@ xfs_log_mount_finish(
{
int error = 0;
bool readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
+ bool recovered = mp->m_log->l_flags & XLOG_RECOVERY_NEEDED;
if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -767,19 +781,34 @@ xfs_log_mount_finish(
* something to an unlinked inode, the irele won't cause
* premature truncation and freeing of the inode, which results
* in log recovery failure. We have to evict the unreferenced
- * lru inodes after clearing MS_ACTIVE because we don't
+ * lru inodes after clearing SB_ACTIVE because we don't
* otherwise clean up the lru if there's a subsequent failure in
* xfs_mountfs, which leads to us leaking the inodes if nothing
* else (e.g. quotacheck) references the inodes before the
* mount failure occurs.
*/
- mp->m_super->s_flags |= MS_ACTIVE;
+ mp->m_super->s_flags |= SB_ACTIVE;
error = xlog_recover_finish(mp->m_log);
if (!error)
xfs_log_work_queue(mp);
- mp->m_super->s_flags &= ~MS_ACTIVE;
+ mp->m_super->s_flags &= ~SB_ACTIVE;
evict_inodes(mp->m_super);
+ /*
+ * Drain the buffer LRU after log recovery. This is required for v4
+ * filesystems to avoid leaving around buffers with NULL verifier ops,
+ * but we do it unconditionally to make sure we're always in a clean
+ * cache state after mount.
+ *
+ * Don't push in the error case because the AIL may have pending intents
+ * that aren't removed until recovery is cancelled.
+ */
+ if (!error && recovered) {
+ xfs_log_force(mp, XFS_LOG_SYNC);
+ xfs_ail_push_all_sync(mp->m_ail);
+ }
+ xfs_wait_buftarg(mp->m_ddev_targp);
+
if (readonly)
mp->m_flags |= XFS_MOUNT_RDONLY;
@@ -840,7 +869,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
return 0;
}
- error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ error = xfs_log_force(mp, XFS_LOG_SYNC);
ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
#ifdef DEBUG
@@ -1018,6 +1047,7 @@ xfs_log_item_init(
INIT_LIST_HEAD(&item->li_ail);
INIT_LIST_HEAD(&item->li_cil);
+ INIT_LIST_HEAD(&item->li_bio_list);
}
/*
@@ -1119,7 +1149,7 @@ xlog_assign_tail_lsn_locked(
struct xfs_log_item *lip;
xfs_lsn_t tail_lsn;
- assert_spin_locked(&mp->m_ail->xa_lock);
+ assert_spin_locked(&mp->m_ail->ail_lock);
/*
* To make sure we always have a valid LSN for the log tail we keep
@@ -1142,9 +1172,9 @@ xlog_assign_tail_lsn(
{
xfs_lsn_t tail_lsn;
- spin_lock(&mp->m_ail->xa_lock);
+ spin_lock(&mp->m_ail->ail_lock);
tail_lsn = xlog_assign_tail_lsn_locked(mp);
- spin_unlock(&mp->m_ail->xa_lock);
+ spin_unlock(&mp->m_ail->ail_lock);
return tail_lsn;
}
@@ -1213,7 +1243,7 @@ xlog_space_left(
static void
xlog_iodone(xfs_buf_t *bp)
{
- struct xlog_in_core *iclog = bp->b_fspriv;
+ struct xlog_in_core *iclog = bp->b_log_item;
struct xlog *l = iclog->ic_log;
int aborted = 0;
@@ -1744,7 +1774,7 @@ STATIC int
xlog_bdstrat(
struct xfs_buf *bp)
{
- struct xlog_in_core *iclog = bp->b_fspriv;
+ struct xlog_in_core *iclog = bp->b_log_item;
xfs_buf_lock(bp);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -1890,7 +1920,7 @@ xlog_sync(
}
bp->b_io_length = BTOBB(count);
- bp->b_fspriv = iclog;
+ bp->b_log_item = iclog;
bp->b_flags &= ~XBF_FLUSH;
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
@@ -1929,7 +1959,7 @@ xlog_sync(
XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
xfs_buf_associate_memory(bp,
(char *)&iclog->ic_header + count, split);
- bp->b_fspriv = iclog;
+ bp->b_log_item = iclog;
bp->b_flags &= ~XBF_FLUSH;
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
@@ -2088,7 +2118,9 @@ xlog_print_trans(
/* dump core transaction and ticket info */
xfs_warn(mp, "transaction summary:");
- xfs_warn(mp, " flags = 0x%x", tp->t_flags);
+ xfs_warn(mp, " log res = %d", tp->t_log_res);
+ xfs_warn(mp, " log count = %d", tp->t_log_count);
+ xfs_warn(mp, " flags = 0x%x", tp->t_flags);
xlog_print_tic_res(mp, tp->t_ticket);
@@ -2213,7 +2245,7 @@ xlog_write_setup_ophdr(
break;
default:
xfs_warn(log->l_mp,
- "Bad XFS transaction clientid 0x%x in ticket 0x%p",
+ "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
ophdr->oh_clientid, ticket);
return NULL;
}
@@ -2515,7 +2547,7 @@ next_lv:
if (lv)
vecp = lv->lv_iovecp;
}
- if (record_cnt == 0 && ordered == false) {
+ if (record_cnt == 0 && !ordered) {
if (!lv)
return 0;
break;
@@ -3272,269 +3304,215 @@ xlog_state_switch_iclogs(
* not in the active nor dirty state.
*/
int
-_xfs_log_force(
+xfs_log_force(
struct xfs_mount *mp,
- uint flags,
- int *log_flushed)
+ uint flags)
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
xfs_lsn_t lsn;
XFS_STATS_INC(mp, xs_log_force);
+ trace_xfs_log_force(mp, 0, _RET_IP_);
xlog_cil_force(log);
spin_lock(&log->l_icloglock);
-
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
- /* If the head iclog is not active nor dirty, we just attach
- * ourselves to the head and go to sleep.
- */
- if (iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_DIRTY) {
+ if (iclog->ic_state == XLOG_STATE_DIRTY ||
+ (iclog->ic_state == XLOG_STATE_ACTIVE &&
+ atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
/*
- * If the head is dirty or (active and empty), then
- * we need to look at the previous iclog. If the previous
- * iclog is active or dirty we are done. There is nothing
- * to sync out. Otherwise, we attach ourselves to the
+ * If the head is dirty or (active and empty), then we need to
+ * look at the previous iclog.
+ *
+ * If the previous iclog is active or dirty we are done. There
+ * is nothing to sync out. Otherwise, we attach ourselves to the
* previous iclog and go to sleep.
*/
- if (iclog->ic_state == XLOG_STATE_DIRTY ||
- (atomic_read(&iclog->ic_refcnt) == 0
- && iclog->ic_offset == 0)) {
- iclog = iclog->ic_prev;
- if (iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_DIRTY)
- goto no_sleep;
- else
- goto maybe_sleep;
- } else {
- if (atomic_read(&iclog->ic_refcnt) == 0) {
- /* We are the only one with access to this
- * iclog. Flush it out now. There should
- * be a roundoff of zero to show that someone
- * has already taken care of the roundoff from
- * the previous sync.
- */
- atomic_inc(&iclog->ic_refcnt);
- lsn = be64_to_cpu(iclog->ic_header.h_lsn);
- xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
+ iclog = iclog->ic_prev;
+ if (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
+ goto out_unlock;
+ } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+ if (atomic_read(&iclog->ic_refcnt) == 0) {
+ /*
+ * We are the only one with access to this iclog.
+ *
+ * Flush it out now. There should be a roundoff of zero
+ * to show that someone has already taken care of the
+ * roundoff from the previous sync.
+ */
+ atomic_inc(&iclog->ic_refcnt);
+ lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ xlog_state_switch_iclogs(log, iclog, 0);
+ spin_unlock(&log->l_icloglock);
- if (xlog_state_release_iclog(log, iclog))
- return -EIO;
-
- if (log_flushed)
- *log_flushed = 1;
- spin_lock(&log->l_icloglock);
- if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
- iclog->ic_state != XLOG_STATE_DIRTY)
- goto maybe_sleep;
- else
- goto no_sleep;
- } else {
- /* Someone else is writing to this iclog.
- * Use its call to flush out the data. However,
- * the other thread may not force out this LR,
- * so we mark it WANT_SYNC.
- */
- xlog_state_switch_iclogs(log, iclog, 0);
- goto maybe_sleep;
- }
- }
- }
+ if (xlog_state_release_iclog(log, iclog))
+ return -EIO;
- /* By the time we come around again, the iclog could've been filled
- * which would give it another lsn. If we have a new lsn, just
- * return because the relevant data has been flushed.
- */
-maybe_sleep:
- if (flags & XFS_LOG_SYNC) {
- /*
- * We must check if we're shutting down here, before
- * we wait, while we're holding the l_icloglock.
- * Then we check again after waking up, in case our
- * sleep was disturbed by a bad news.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
+ spin_lock(&log->l_icloglock);
+ if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
+ goto out_unlock;
+ } else {
+ /*
+ * Someone else is writing to this iclog.
+ *
+ * Use its call to flush out the data. However, the
+ * other thread may not force out this LR, so we mark
+ * it WANT_SYNC.
+ */
+ xlog_state_switch_iclogs(log, iclog, 0);
}
- XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ } else {
/*
- * No need to grab the log lock here since we're
- * only deciding whether or not to return EIO
- * and the memory read should be atomic.
+ * If the head iclog is not active nor dirty, we just attach
+ * ourselves to the head and go to sleep if necessary.
*/
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
- } else {
-
-no_sleep:
- spin_unlock(&log->l_icloglock);
+ ;
}
+
+ if (!(flags & XFS_LOG_SYNC))
+ goto out_unlock;
+
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
+ XFS_STATS_INC(mp, xs_log_force_sleep);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ return -EIO;
return 0;
-}
-/*
- * Wrapper for _xfs_log_force(), to be used when caller doesn't care
- * about errors or whether the log was flushed or not. This is the normal
- * interface to use when trying to unpin items or move the log forward.
- */
-void
-xfs_log_force(
- xfs_mount_t *mp,
- uint flags)
-{
- trace_xfs_log_force(mp, 0, _RET_IP_);
- _xfs_log_force(mp, flags, NULL);
+out_unlock:
+ spin_unlock(&log->l_icloglock);
+ return 0;
+out_error:
+ spin_unlock(&log->l_icloglock);
+ return -EIO;
}
-/*
- * Force the in-core log to disk for a specific LSN.
- *
- * Find in-core log with lsn.
- * If it is in the DIRTY state, just return.
- * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
- * state and go to sleep or return.
- * If it is in any other state, go to sleep or return.
- *
- * Synchronous forces are implemented with a signal variable. All callers
- * to force a given lsn to disk will wait on a the sv attached to the
- * specific in-core log. When given in-core log finally completes its
- * write to disk, that thread will wake up all threads waiting on the
- * sv.
- */
-int
-_xfs_log_force_lsn(
+static int
+__xfs_log_force_lsn(
struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags,
- int *log_flushed)
+ int *log_flushed,
+ bool already_slept)
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
- int already_slept = 0;
- ASSERT(lsn != 0);
-
- XFS_STATS_INC(mp, xs_log_force);
-
- lsn = xlog_cil_force_lsn(log, lsn);
- if (lsn == NULLCOMMITLSN)
- return 0;
-
-try_again:
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
- do {
- if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
- iclog = iclog->ic_next;
- continue;
- }
+ while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
+ iclog = iclog->ic_next;
+ if (iclog == log->l_iclog)
+ goto out_unlock;
+ }
- if (iclog->ic_state == XLOG_STATE_DIRTY) {
- spin_unlock(&log->l_icloglock);
- return 0;
- }
+ if (iclog->ic_state == XLOG_STATE_DIRTY)
+ goto out_unlock;
- if (iclog->ic_state == XLOG_STATE_ACTIVE) {
- /*
- * We sleep here if we haven't already slept (e.g.
- * this is the first time we've looked at the correct
- * iclog buf) and the buffer before us is going to
- * be sync'ed. The reason for this is that if we
- * are doing sync transactions here, by waiting for
- * the previous I/O to complete, we can allow a few
- * more transactions into this iclog before we close
- * it down.
- *
- * Otherwise, we mark the buffer WANT_SYNC, and bump
- * up the refcnt so we can release the log (which
- * drops the ref count). The state switch keeps new
- * transaction commits from using this buffer. When
- * the current commits finish writing into the buffer,
- * the refcount will drop to zero and the buffer will
- * go out then.
- */
- if (!already_slept &&
- (iclog->ic_prev->ic_state &
- (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
- ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
+ if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+ /*
+ * We sleep here if we haven't already slept (e.g. this is the
+ * first time we've looked at the correct iclog buf) and the
+ * buffer before us is going to be sync'ed. The reason for this
+ * is that if we are doing sync transactions here, by waiting
+ * for the previous I/O to complete, we can allow a few more
+ * transactions into this iclog before we close it down.
+ *
+ * Otherwise, we mark the buffer WANT_SYNC, and bump up the
+ * refcnt so we can release the log (which drops the ref count).
+ * The state switch keeps new transaction commits from using
+ * this buffer. When the current commits finish writing into
+ * the buffer, the refcount will drop to zero and the buffer
+ * will go out then.
+ */
+ if (!already_slept &&
+ (iclog->ic_prev->ic_state &
+ (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
+ ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
- XFS_STATS_INC(mp, xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_prev->ic_write_wait,
- &log->l_icloglock);
- already_slept = 1;
- goto try_again;
- }
- atomic_inc(&iclog->ic_refcnt);
- xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
- if (xlog_state_release_iclog(log, iclog))
- return -EIO;
- if (log_flushed)
- *log_flushed = 1;
- spin_lock(&log->l_icloglock);
+ xlog_wait(&iclog->ic_prev->ic_write_wait,
+ &log->l_icloglock);
+ return -EAGAIN;
}
+ atomic_inc(&iclog->ic_refcnt);
+ xlog_state_switch_iclogs(log, iclog, 0);
+ spin_unlock(&log->l_icloglock);
+ if (xlog_state_release_iclog(log, iclog))
+ return -EIO;
+ if (log_flushed)
+ *log_flushed = 1;
+ spin_lock(&log->l_icloglock);
+ }
- if ((flags & XFS_LOG_SYNC) && /* sleep */
- !(iclog->ic_state &
- (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
- /*
- * Don't wait on completion if we know that we've
- * gotten a log write error.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
- XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- /*
- * No need to grab the log lock here since we're
- * only deciding whether or not to return EIO
- * and the memory read should be atomic.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
- } else { /* just return */
- spin_unlock(&log->l_icloglock);
- }
+ if (!(flags & XFS_LOG_SYNC) ||
+ (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
+ goto out_unlock;
- return 0;
- } while (iclog != log->l_iclog);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
+
+ XFS_STATS_INC(mp, xs_log_force_sleep);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ return -EIO;
+ return 0;
+out_unlock:
spin_unlock(&log->l_icloglock);
return 0;
+out_error:
+ spin_unlock(&log->l_icloglock);
+ return -EIO;
}
/*
- * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
- * about errors or whether the log was flushed or not. This is the normal
- * interface to use when trying to unpin items or move the log forward.
+ * Force the in-core log to disk for a specific LSN.
+ *
+ * Find in-core log with lsn.
+ * If it is in the DIRTY state, just return.
+ * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
+ * state and go to sleep or return.
+ * If it is in any other state, go to sleep or return.
+ *
+ * Synchronous forces are implemented with a wait queue. All callers trying
+ * to force a given lsn to disk must wait on the queue attached to the
+ * specific in-core log. When given in-core log finally completes its write
+ * to disk, that thread will wake up all threads waiting on the queue.
*/
-void
+int
xfs_log_force_lsn(
- xfs_mount_t *mp,
- xfs_lsn_t lsn,
- uint flags)
+ struct xfs_mount *mp,
+ xfs_lsn_t lsn,
+ uint flags,
+ int *log_flushed)
{
+ int ret;
+ ASSERT(lsn != 0);
+
+ XFS_STATS_INC(mp, xs_log_force);
trace_xfs_log_force(mp, lsn, _RET_IP_);
- _xfs_log_force_lsn(mp, lsn, flags, NULL);
+
+ lsn = xlog_cil_force_lsn(mp->m_log, lsn);
+ if (lsn == NULLCOMMITLSN)
+ return 0;
+
+ ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
+ if (ret == -EAGAIN)
+ ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
+ return ret;
}
/*
@@ -3734,7 +3712,7 @@ xlog_ticket_alloc(
* one of the iclogs. This uses backup pointers stored in a different
* part of the log in case we trash the log structure.
*/
-void
+STATIC void
xlog_verify_dest_ptr(
struct xlog *log,
void *ptr)
@@ -3895,7 +3873,7 @@ xlog_verify_iclog(
}
if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
xfs_warn(log->l_mp,
- "%s: invalid clientid %d op 0x%p offset 0x%lx",
+ "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
__func__, clientid, ophead,
(unsigned long)field_offset);
@@ -4003,7 +3981,7 @@ xfs_log_force_umount(
* to guarantee this.
*/
if (!logerror)
- _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ xfs_log_force(mp, XFS_LOG_SYNC);
/*
* mark the filesystem and the as in a shutdown state and wake