summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_types.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_types.h')
-rw-r--r--fs/bcachefs/btree_types.h307
1 files changed, 237 insertions, 70 deletions
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index e0c982a4195c..112170fd9c8f 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -138,6 +138,32 @@ struct btree {
struct list_head list;
};
+#define BCH_BTREE_CACHE_NOT_FREED_REASONS() \
+ x(cache_reserve) \
+ x(lock_intent) \
+ x(lock_write) \
+ x(dirty) \
+ x(read_in_flight) \
+ x(write_in_flight) \
+ x(noevict) \
+ x(write_blocked) \
+ x(will_make_reachable) \
+ x(access_bit)
+
+enum bch_btree_cache_not_freed_reasons {
+#define x(n) BCH_BTREE_CACHE_NOT_FREED_##n,
+ BCH_BTREE_CACHE_NOT_FREED_REASONS()
+#undef x
+ BCH_BTREE_CACHE_NOT_FREED_REASONS_NR,
+};
+
+struct btree_cache_list {
+ unsigned idx;
+ struct shrinker *shrink;
+ struct list_head list;
+ size_t nr;
+};
+
struct btree_cache {
struct rhashtable table;
bool table_init_done;
@@ -155,16 +181,19 @@ struct btree_cache {
* should never grow past ~2-3 nodes in practice.
*/
struct mutex lock;
- struct list_head live;
struct list_head freeable;
struct list_head freed_pcpu;
struct list_head freed_nonpcpu;
+ struct btree_cache_list live[2];
- /* Number of elements in live + freeable lists */
- unsigned used;
- unsigned reserve;
- atomic_t dirty;
- struct shrinker *shrink;
+ size_t nr_freeable;
+ size_t nr_reserve;
+ size_t nr_by_btree[BTREE_ID_NR];
+ atomic_long_t nr_dirty;
+
+ /* shrinker stats */
+ size_t nr_freed;
+ u64 not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR];
/*
* If we need to allocate memory for a new btree node and that
@@ -177,8 +206,8 @@ struct btree_cache {
struct bbpos pinned_nodes_start;
struct bbpos pinned_nodes_end;
- u64 pinned_nodes_leaf_mask;
- u64 pinned_nodes_interior_mask;
+ /* btree id mask: 0 for leaves, 1 for interior */
+ u64 pinned_nodes_mask[2];
};
struct btree_node_iter {
@@ -187,36 +216,85 @@ struct btree_node_iter {
} data[MAX_BSETS];
};
+#define BTREE_ITER_FLAGS() \
+ x(slots) \
+ x(intent) \
+ x(prefetch) \
+ x(is_extents) \
+ x(not_extents) \
+ x(cached) \
+ x(with_key_cache) \
+ x(with_updates) \
+ x(with_journal) \
+ x(snapshot_field) \
+ x(all_snapshots) \
+ x(filter_snapshots) \
+ x(nopreserve) \
+ x(cached_nofill) \
+ x(key_cache_fill) \
+
+#define STR_HASH_FLAGS() \
+ x(must_create) \
+ x(must_replace)
+
+#define BTREE_UPDATE_FLAGS() \
+ x(internal_snapshot_node) \
+ x(nojournal) \
+ x(key_cache_reclaim)
+
+
/*
- * Iterate over all possible positions, synthesizing deleted keys for holes:
- */
-static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0;
-/*
- * Indicates that intent locks should be taken on leaf nodes, because we expect
- * to be doing updates:
- */
-static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1;
-/*
- * Causes the btree iterator code to prefetch additional btree nodes from disk:
- */
-static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2;
-/*
- * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
- * @pos or the first key strictly greater than @pos
+ * BTREE_TRIGGER_norun - don't run triggers at all
+ *
+ * BTREE_TRIGGER_transactional - we're running transactional triggers as part of
+ * a transaction commit: triggers may generate new updates
+ *
+ * BTREE_TRIGGER_atomic - we're running atomic triggers during a transaction
+ * commit: we have our journal reservation, we're holding btree node write
+ * locks, and we know the transaction is going to commit (returning an error
+ * here is a fatal error, causing us to go emergency read-only)
+ *
+ * BTREE_TRIGGER_gc - we're in gc/fsck: running triggers to recalculate e.g. disk usage
+ *
+ * BTREE_TRIGGER_insert - @new is entering the btree
+ * BTREE_TRIGGER_overwrite - @old is leaving the btree
*/
-static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3;
-static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4;
-static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5;
-static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6;
-static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7;
-static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8;
-static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9;
-static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10;
-static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11;
-static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12;
-static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13;
-static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14;
-#define __BTREE_ITER_FLAGS_END 15
+#define BTREE_TRIGGER_FLAGS() \
+ x(norun) \
+ x(transactional) \
+ x(atomic) \
+ x(check_repair) \
+ x(gc) \
+ x(insert) \
+ x(overwrite) \
+ x(is_root)
+
+enum {
+#define x(n) BTREE_ITER_FLAG_BIT_##n,
+ BTREE_ITER_FLAGS()
+ STR_HASH_FLAGS()
+ BTREE_UPDATE_FLAGS()
+ BTREE_TRIGGER_FLAGS()
+#undef x
+};
+
+/* iter flags must fit in a u16: */
+//BUILD_BUG_ON(BTREE_ITER_FLAG_BIT_key_cache_fill > 15);
+
+enum btree_iter_update_trigger_flags {
+#define x(n) BTREE_ITER_##n = 1U << BTREE_ITER_FLAG_BIT_##n,
+ BTREE_ITER_FLAGS()
+#undef x
+#define x(n) STR_HASH_##n = 1U << BTREE_ITER_FLAG_BIT_##n,
+ STR_HASH_FLAGS()
+#undef x
+#define x(n) BTREE_UPDATE_##n = 1U << BTREE_ITER_FLAG_BIT_##n,
+ BTREE_UPDATE_FLAGS()
+#undef x
+#define x(n) BTREE_TRIGGER_##n = 1U << BTREE_ITER_FLAG_BIT_##n,
+ BTREE_TRIGGER_FLAGS()
+#undef x
+};
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
@@ -286,7 +364,6 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
* @nodes_intent_locked - bitmask indicating which locks are intent locks
*/
struct btree_iter {
- struct btree_trans *trans;
btree_path_idx_t path;
btree_path_idx_t update_path;
btree_path_idx_t key_cache_path;
@@ -307,7 +384,7 @@ struct btree_iter {
*/
struct bkey k;
- /* BTREE_ITER_WITH_JOURNAL: */
+ /* BTREE_ITER_with_journal: */
size_t journal_idx;
#ifdef TRACK_PATH_ALLOCATED
unsigned long ip_allocated;
@@ -322,17 +399,15 @@ struct bkey_cached {
unsigned long flags;
u16 u64s;
- bool valid;
- u32 btree_trans_barrier_seq;
struct bkey_cached_key key;
struct rhash_head hash;
- struct list_head list;
struct journal_entry_pin journal;
u64 seq;
struct bkey_i *k;
+ struct rcu_head rcu;
};
static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
@@ -344,6 +419,7 @@ static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
struct btree_insert_entry {
unsigned flags;
+ u8 sort_order;
u8 bkey_type;
enum btree_id btree_id:8;
u8 level:4;
@@ -398,6 +474,18 @@ struct btree_trans_paths {
struct btree_path paths[];
};
+struct trans_kmalloc_trace {
+ unsigned long ip;
+ size_t bytes;
+};
+typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace;
+
+struct btree_trans_subbuf {
+ u16 base;
+ u16 u64s;
+ u16 size;;
+};
+
struct btree_trans {
struct bch_fs *c;
@@ -409,15 +497,22 @@ struct btree_trans {
void *mem;
unsigned mem_top;
unsigned mem_bytes;
+ unsigned realloc_bytes_required;
+#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
+ darray_trans_kmalloc_trace trans_kmalloc_trace;
+#endif
btree_path_idx_t nr_sorted;
btree_path_idx_t nr_paths;
btree_path_idx_t nr_paths_max;
+ btree_path_idx_t nr_updates;
u8 fn_idx;
- u8 nr_updates;
u8 lock_must_abort;
bool lock_may_not_fail:1;
bool srcu_held:1;
+ bool locked:1;
+ bool pf_memalloc_nofs:1;
+ bool write_locked:1;
bool used_mempool:1;
bool in_traverse_all:1;
bool paths_sorted:1;
@@ -425,13 +520,19 @@ struct btree_trans {
bool journal_transaction_names:1;
bool journal_replay_not_finished:1;
bool notrace_relock_fail:1;
- bool write_locked:1;
enum bch_errcode restarted:16;
u32 restart_count;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ u32 restart_count_this_trans;
+#endif
u64 last_begin_time;
unsigned long last_begin_ip;
unsigned long last_restarted_ip;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ bch_stacktrace last_restarted_trace;
+#endif
+ unsigned long last_unlock_ip;
unsigned long srcu_lock_time;
const char *fn;
@@ -440,9 +541,8 @@ struct btree_trans {
int srcu_idx;
/* update path: */
- u16 journal_entries_u64s;
- u16 journal_entries_size;
- struct jset_entry *journal_entries;
+ struct btree_trans_subbuf journal_entries;
+ struct btree_trans_subbuf accounting;
struct btree_trans_commit_hook *hooks;
struct journal_entry_pin *journal_pin;
@@ -455,8 +555,12 @@ struct btree_trans {
unsigned journal_u64s;
unsigned extra_disk_res; /* XXX kill */
- struct replicas_delta_list *fs_usage_deltas;
+ __BKEY_PADDED(btree_path_down, BKEY_BTREE_PTR_VAL_U64s_MAX);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
/* Entries before this are zeroed out on every bch2_trans_get() call */
struct list_head list;
@@ -514,7 +618,11 @@ enum btree_write_type {
x(dying) \
x(fake) \
x(need_rewrite) \
- x(never_write)
+ x(need_rewrite_error) \
+ x(need_rewrite_degraded) \
+ x(need_rewrite_ptr_written_zero) \
+ x(never_write) \
+ x(pinned)
enum btree_flags {
/* First bits for btree node write type */
@@ -537,6 +645,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
BTREE_FLAGS()
#undef x
+#define BTREE_NODE_REWRITE_REASON() \
+ x(none) \
+ x(unknown) \
+ x(error) \
+ x(degraded) \
+ x(ptr_written_zero)
+
+enum btree_node_rewrite_reason {
+#define x(n) BTREE_NODE_REWRITE_##n,
+ BTREE_NODE_REWRITE_REASON()
+#undef x
+};
+
+static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b)
+{
+ if (btree_node_need_rewrite_ptr_written_zero(b))
+ return BTREE_NODE_REWRITE_ptr_written_zero;
+ if (btree_node_need_rewrite_degraded(b))
+ return BTREE_NODE_REWRITE_degraded;
+ if (btree_node_need_rewrite_error(b))
+ return BTREE_NODE_REWRITE_error;
+ if (btree_node_need_rewrite(b))
+ return BTREE_NODE_REWRITE_unknown;
+ return BTREE_NODE_REWRITE_none;
+}
+
static inline struct btree_write *btree_current_write(struct btree *b)
{
return b->writes + btree_node_write_idx(b);
@@ -556,13 +690,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b)
static inline void *
__btree_node_offset_to_ptr(const struct btree *b, u16 offset)
{
- return (void *) ((u64 *) b->data + 1 + offset);
+ return (void *) ((u64 *) b->data + offset);
}
static inline u16
__btree_node_ptr_to_offset(const struct btree *b, const void *p)
{
- u16 ret = (u64 *) p - 1 - (u64 *) b->data;
+ u16 ret = (u64 *) p - (u64 *) b->data;
EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
return ret;
@@ -687,58 +821,91 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS)
-static inline bool btree_node_type_needs_gc(enum btree_node_type type)
+static inline bool btree_node_type_has_trans_triggers(enum btree_node_type type)
{
- return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type);
+ return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS;
}
-static inline bool btree_node_type_is_extents(enum btree_node_type type)
+static inline bool btree_node_type_has_atomic_triggers(enum btree_node_type type)
{
- const unsigned mask = 0
-#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
+ return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS;
+}
+
+static inline bool btree_node_type_has_triggers(enum btree_node_type type)
+{
+ return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS;
+}
+
+static inline bool btree_id_is_extents(enum btree_id btree)
+{
+ const u64 mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << type) & mask;
+ return BIT_ULL(btree) & mask;
}
-static inline bool btree_id_is_extents(enum btree_id btree)
+static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
- return btree_node_type_is_extents(__btree_node_type(0, btree));
+ return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1);
}
-static inline bool btree_type_has_snapshots(enum btree_id id)
+static inline bool btree_type_has_snapshots(enum btree_id btree)
{
- const unsigned mask = 0
-#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
+ const u64 mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(btree) & mask;
}
-static inline bool btree_type_has_snapshot_field(enum btree_id id)
+static inline bool btree_type_has_snapshot_field(enum btree_id btree)
{
- const unsigned mask = 0
-#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
+ const u64 mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(btree) & mask;
}
-static inline bool btree_type_has_ptrs(enum btree_id id)
+static inline bool btree_type_has_ptrs(enum btree_id btree)
{
- const unsigned mask = 0
-#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
+ const u64 mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr)
BCH_BTREE_IDS()
#undef x
;
- return (1U << id) & mask;
+ return BIT_ULL(btree) & mask;
+}
+
+static inline bool btree_type_uses_write_buffer(enum btree_id btree)
+{
+ const u64 mask = 0
+#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr)
+ BCH_BTREE_IDS()
+#undef x
+ ;
+
+ return BIT_ULL(btree) & mask;
+}
+
+static inline u8 btree_trigger_order(enum btree_id btree)
+{
+ switch (btree) {
+ case BTREE_ID_alloc:
+ return U8_MAX;
+ case BTREE_ID_stripes:
+ return U8_MAX - 1;
+ default:
+ return btree;
+ }
}
struct btree_root {