summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-09-07 21:25:32 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:12 -0400
commitcaaa66aa546a27e75fb3cf32df1906140f85f1c9 (patch)
tree670ae5eefa4a486f94181b84108e9f562f1a7a88 /fs/bcachefs
parentb301105b48d2805ca0e29b1b0f660cf2232511ee (diff)
bcachefs: Better approach to write vs. read lock deadlocks
Instead of unconditionally upgrading read locks to intent locks in do_bch2_trans_commit(), this patch changes the path that takes write locks to first trylock, and then if trylock fails check if we have a conflicting read lock, and restart the transaction if necessary. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/btree_update_leaf.c108
-rw-r--r--fs/bcachefs/trace.h15
2 files changed, 82 insertions, 41 deletions
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 576f0739fdbd..ab5cca892e1a 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -545,6 +545,54 @@ static inline void normalize_read_intent_locks(struct btree_trans *trans)
bch2_trans_verify_locks(trans);
}
+static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct btree_path *pos)
+{
+ struct btree_path *path;
+ unsigned i;
+
+ trans_for_each_path_inorder(trans, path, i) {
+ //if (path == pos)
+ // break;
+
+ if (path->nodes_locked != path->nodes_intent_locked)
+ return true;
+ }
+
+ return false;
+}
+
+static inline int trans_lock_write(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ trans_for_each_update(trans, i) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ if (!six_trylock_write(&insert_l(i)->b->c.lock)) {
+ if (have_conflicting_read_lock(trans, i->path))
+ goto fail;
+
+ __btree_node_lock_type(trans->c, insert_l(i)->b,
+ SIX_LOCK_write);
+ }
+
+ bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+ }
+
+ return 0;
+fail:
+ while (--i >= trans->updates) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b);
+ }
+
+ trace_trans_restart_would_deadlock_write(trans->ip);
+ return btree_trans_restart(trans);
+}
+
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
@@ -554,11 +602,26 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
- struct btree_path *path;
struct bkey_s_c old;
int ret, u64s_delta = 0;
trans_for_each_update(trans, i) {
+ const char *invalid = bch2_bkey_invalid(c,
+ bkey_i_to_s_c(i->k), i->bkey_type);
+ if (invalid) {
+ char buf[200];
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
+ bch_err(c, "invalid bkey %s on insert from %ps -> %ps: %s\n",
+ buf, (void *) trans->ip,
+ (void *) i->ip_allocated, invalid);
+ bch2_fatal_error(c);
+ return -EINVAL;
+ }
+ btree_insert_entry_checks(trans, i);
+ }
+
+ trans_for_each_update(trans, i) {
struct bkey u;
/*
@@ -599,48 +662,11 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
if (unlikely(ret))
return ret;
- /*
- * Can't be holding any read locks when we go to take write locks:
- * another thread could be holding an intent lock on the same node we
- * have a read lock on, and it'll block trying to take a write lock
- * (because we hold a read lock) and it could be blocking us by holding
- * its own read lock (while we're trying to to take write locks).
- *
- * note - this must be done after bch2_trans_journal_preres_get_cold()
- * or anything else that might call bch2_trans_relock(), since that
- * would just retake the read locks:
- */
- trans_for_each_path(trans, path)
- if (path->nodes_locked != path->nodes_intent_locked &&
- !bch2_btree_path_upgrade(trans, path, path->level + 1)) {
- trace_trans_restart_upgrade(trans->ip, trace_ip,
- path->btree_id, &path->pos);
- return btree_trans_restart(trans);
- }
-
- trans_for_each_update(trans, i) {
- const char *invalid = bch2_bkey_invalid(c,
- bkey_i_to_s_c(i->k), i->bkey_type);
- if (invalid) {
- char buf[200];
-
- bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
- bch_err(c, "invalid bkey %s on insert from %ps -> %ps: %s\n",
- buf, (void *) trans->ip,
- (void *) i->ip_allocated, invalid);
- bch2_fatal_error(c);
- return -EINVAL;
- }
- btree_insert_entry_checks(trans, i);
- }
-
normalize_read_intent_locks(trans);
- trans_for_each_update(trans, i)
- if (!same_leaf_as_prev(trans, i)) {
- btree_node_lock_type(c, insert_l(i)->b, SIX_LOCK_write);
- bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
- }
+ ret = trans_lock_write(trans);
+ if (unlikely(ret))
+ return ret;
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 960dcc8ce3e6..21d026277540 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -756,6 +756,21 @@ TRACE_EVENT(trans_restart_would_deadlock,
__entry->want_pos_snapshot)
);
+TRACE_EVENT(trans_restart_would_deadlock_write,
+ TP_PROTO(unsigned long trans_ip),
+ TP_ARGS(trans_ip),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, trans_ip )
+ ),
+
+ TP_fast_assign(
+ __entry->trans_ip = trans_ip;
+ ),
+
+ TP_printk("%ps", (void *) __entry->trans_ip)
+);
+
TRACE_EVENT(trans_restart_mem_realloced,
TP_PROTO(unsigned long trans_ip, unsigned long caller_ip,
unsigned long bytes),