summaryrefslogtreecommitdiff
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c101
1 files changed, 97 insertions, 4 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 03de89b45f27..1f1c25db6f6b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -854,7 +854,37 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
static noinline void wait_for_commit(struct btrfs_transaction *commit,
const enum btrfs_trans_state min_state)
{
- wait_event(commit->commit_wait, commit->state >= min_state);
+ struct btrfs_fs_info *fs_info = commit->fs_info;
+ u64 transid = commit->transid;
+ bool put = false;
+
+ while (1) {
+ wait_event(commit->commit_wait, commit->state >= min_state);
+ if (put)
+ btrfs_put_transaction(commit);
+
+ if (min_state < TRANS_STATE_COMPLETED)
+ break;
+
+ /*
+ * A transaction isn't really completed until all of the
+ * previous transactions are completed, but with fsync we can
+ * end up with SUPER_COMMITTED transactions before a COMPLETED
+ * transaction. Wait for those.
+ */
+
+ spin_lock(&fs_info->trans_lock);
+ commit = list_first_entry_or_null(&fs_info->trans_list,
+ struct btrfs_transaction,
+ list);
+ if (!commit || commit->transid > transid) {
+ spin_unlock(&fs_info->trans_lock);
+ break;
+ }
+ refcount_inc(&commit->use_count);
+ put = true;
+ spin_unlock(&fs_info->trans_lock);
+ }
}
int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
@@ -1320,6 +1350,32 @@ again:
}
/*
+ * If we had a pending drop we need to see if there are any others left in our
+ * dead roots list, and if not clear our bit and wake any waiters.
+ */
+void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
+{
+ /*
+ * We put the drop in progress roots at the front of the list, so if the
+ * first entry doesn't have UNFINISHED_DROP set we can wake everybody
+ * up.
+ */
+ spin_lock(&fs_info->trans_lock);
+ if (!list_empty(&fs_info->dead_roots)) {
+ struct btrfs_root *root = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root,
+ root_list);
+ if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state)) {
+ spin_unlock(&fs_info->trans_lock);
+ return;
+ }
+ }
+ spin_unlock(&fs_info->trans_lock);
+
+ btrfs_wake_unfinished_drop(fs_info);
+}
+
+/*
* dead roots are old snapshots that need to be deleted. This allocates
* a dirty root struct and adds it into the list of dead roots that need to
* be deleted
@@ -1331,7 +1387,12 @@ void btrfs_add_dead_root(struct btrfs_root *root)
spin_lock(&fs_info->trans_lock);
if (list_empty(&root->root_list)) {
btrfs_grab_root(root);
- list_add_tail(&root->root_list, &fs_info->dead_roots);
+
+ /* We want to process the partially complete drops first. */
+ if (test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state))
+ list_add(&root->root_list, &fs_info->dead_roots);
+ else
+ list_add_tail(&root->root_list, &fs_info->dead_roots);
}
spin_unlock(&fs_info->trans_lock);
}
@@ -1981,16 +2042,24 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
/*
- * We use writeback_inodes_sb here because if we used
+ * We use try_to_writeback_inodes_sb() here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
* Currently are holding the fs freeze lock, if we do an async flush
* we'll do btrfs_join_transaction() and deadlock because we need to
* wait for the fs freeze lock. Using the direct flushing we benefit
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
+ *
+ * Note that try_to_writeback_inodes_sb() will only trigger writeback
+ * if it can read lock sb->s_umount. It will always be able to lock it,
+ * except when the filesystem is being unmounted or being frozen, but in
+ * those cases sync_filesystem() is called, which results in calling
+ * writeback_inodes_sb() while holding a write lock on sb->s_umount.
+ * Note that we don't call writeback_inodes_sb() directly, because it
+ * will emit a warning if sb->s_umount is not locked.
*/
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
- writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+ try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
return 0;
}
@@ -2000,6 +2069,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
+/*
+ * Add a pending snapshot associated with the given transaction handle to the
+ * respective handle. This must be called after the transaction commit started
+ * and while holding fs_info->trans_lock.
+ * This serves to guarantee a caller of btrfs_commit_transaction() that it can
+ * safely free the pending snapshot pointer in case btrfs_commit_transaction()
+ * returns an error.
+ */
+static void add_pending_snapshot(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_transaction *cur_trans = trans->transaction;
+
+ if (!trans->pending_snapshot)
+ return;
+
+ lockdep_assert_held(&trans->fs_info->trans_lock);
+ ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
+
+ list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
+}
+
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -2073,6 +2163,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
+ add_pending_snapshot(trans);
+
spin_unlock(&fs_info->trans_lock);
refcount_inc(&cur_trans->use_count);
@@ -2163,6 +2255,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* COMMIT_DOING so make sure to wait for num_writers to == 1 again.
*/
spin_lock(&fs_info->trans_lock);
+ add_pending_snapshot(trans);
cur_trans->state = TRANS_STATE_COMMIT_DOING;
spin_unlock(&fs_info->trans_lock);
wait_event(cur_trans->writer_wait,