diff options
Diffstat (limited to 'fs/bcachefs/btree_types.h')
-rw-r--r-- | fs/bcachefs/btree_types.h | 307 |
1 files changed, 237 insertions, 70 deletions
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e0c982a4195c..112170fd9c8f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -138,6 +138,32 @@ struct btree { struct list_head list; }; +#define BCH_BTREE_CACHE_NOT_FREED_REASONS() \ + x(cache_reserve) \ + x(lock_intent) \ + x(lock_write) \ + x(dirty) \ + x(read_in_flight) \ + x(write_in_flight) \ + x(noevict) \ + x(write_blocked) \ + x(will_make_reachable) \ + x(access_bit) + +enum bch_btree_cache_not_freed_reasons { +#define x(n) BCH_BTREE_CACHE_NOT_FREED_##n, + BCH_BTREE_CACHE_NOT_FREED_REASONS() +#undef x + BCH_BTREE_CACHE_NOT_FREED_REASONS_NR, +}; + +struct btree_cache_list { + unsigned idx; + struct shrinker *shrink; + struct list_head list; + size_t nr; +}; + struct btree_cache { struct rhashtable table; bool table_init_done; @@ -155,16 +181,19 @@ struct btree_cache { * should never grow past ~2-3 nodes in practice. */ struct mutex lock; - struct list_head live; struct list_head freeable; struct list_head freed_pcpu; struct list_head freed_nonpcpu; + struct btree_cache_list live[2]; - /* Number of elements in live + freeable lists */ - unsigned used; - unsigned reserve; - atomic_t dirty; - struct shrinker *shrink; + size_t nr_freeable; + size_t nr_reserve; + size_t nr_by_btree[BTREE_ID_NR]; + atomic_long_t nr_dirty; + + /* shrinker stats */ + size_t nr_freed; + u64 not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR]; /* * If we need to allocate memory for a new btree node and that @@ -177,8 +206,8 @@ struct btree_cache { struct bbpos pinned_nodes_start; struct bbpos pinned_nodes_end; - u64 pinned_nodes_leaf_mask; - u64 pinned_nodes_interior_mask; + /* btree id mask: 0 for leaves, 1 for interior */ + u64 pinned_nodes_mask[2]; }; struct btree_node_iter { @@ -187,36 +216,85 @@ struct btree_node_iter { } data[MAX_BSETS]; }; +#define BTREE_ITER_FLAGS() \ + x(slots) \ + x(intent) \ + x(prefetch) \ + x(is_extents) \ + x(not_extents) \ + x(cached) \ + x(with_key_cache) \ + x(with_updates) \ + x(with_journal) \ + x(snapshot_field) \ + x(all_snapshots) \ + x(filter_snapshots) \ + x(nopreserve) \ + x(cached_nofill) \ + x(key_cache_fill) \ + +#define STR_HASH_FLAGS() \ + x(must_create) \ + x(must_replace) + +#define BTREE_UPDATE_FLAGS() \ + x(internal_snapshot_node) \ + x(nojournal) \ + x(key_cache_reclaim) + + /* - * Iterate over all possible positions, synthesizing deleted keys for holes: - */ -static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; -/* - * Indicates that intent locks should be taken on leaf nodes, because we expect - * to be doing updates: - */ -static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 1; -/* - * Causes the btree iterator code to prefetch additional btree nodes from disk: - */ -static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 2; -/* - * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for - * @pos or the first key strictly greater than @pos + * BTREE_TRIGGER_norun - don't run triggers at all + * + * BTREE_TRIGGER_transactional - we're running transactional triggers as part of + * a transaction commit: triggers may generate new updates + * + * BTREE_TRIGGER_atomic - we're running atomic triggers during a transaction + * commit: we have our journal reservation, we're holding btree node write + * locks, and we know the transaction is going to commit (returning an error + * here is a fatal error, causing us to go emergency read-only) + * + * BTREE_TRIGGER_gc - we're in gc/fsck: running triggers to recalculate e.g. disk usage + * + * BTREE_TRIGGER_insert - @new is entering the btree + * BTREE_TRIGGER_overwrite - @old is leaving the btree */ -static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 3; -static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 4; -static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 5; -static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 6; -static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 7; -static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 8; -static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 9; -static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; -static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 11; -static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 12; -static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 13; -static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 14; -#define __BTREE_ITER_FLAGS_END 15 +#define BTREE_TRIGGER_FLAGS() \ + x(norun) \ + x(transactional) \ + x(atomic) \ + x(check_repair) \ + x(gc) \ + x(insert) \ + x(overwrite) \ + x(is_root) + +enum { +#define x(n) BTREE_ITER_FLAG_BIT_##n, + BTREE_ITER_FLAGS() + STR_HASH_FLAGS() + BTREE_UPDATE_FLAGS() + BTREE_TRIGGER_FLAGS() +#undef x +}; + +/* iter flags must fit in a u16: */ +//BUILD_BUG_ON(BTREE_ITER_FLAG_BIT_key_cache_fill > 15); + +enum btree_iter_update_trigger_flags { +#define x(n) BTREE_ITER_##n = 1U << BTREE_ITER_FLAG_BIT_##n, + BTREE_ITER_FLAGS() +#undef x +#define x(n) STR_HASH_##n = 1U << BTREE_ITER_FLAG_BIT_##n, + STR_HASH_FLAGS() +#undef x +#define x(n) BTREE_UPDATE_##n = 1U << BTREE_ITER_FLAG_BIT_##n, + BTREE_UPDATE_FLAGS() +#undef x +#define x(n) BTREE_TRIGGER_##n = 1U << BTREE_ITER_FLAG_BIT_##n, + BTREE_TRIGGER_FLAGS() +#undef x +}; enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, @@ -286,7 +364,6 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path) * @nodes_intent_locked - bitmask indicating which locks are intent locks */ struct btree_iter { - struct btree_trans *trans; btree_path_idx_t path; btree_path_idx_t update_path; btree_path_idx_t key_cache_path; @@ -307,7 +384,7 @@ struct btree_iter { */ struct bkey k; - /* BTREE_ITER_WITH_JOURNAL: */ + /* BTREE_ITER_with_journal: */ size_t journal_idx; #ifdef TRACK_PATH_ALLOCATED unsigned long ip_allocated; @@ -322,17 +399,15 @@ struct bkey_cached { unsigned long flags; u16 u64s; - bool valid; - u32 btree_trans_barrier_seq; struct bkey_cached_key key; struct rhash_head hash; - struct list_head list; struct journal_entry_pin journal; u64 seq; struct bkey_i *k; + struct rcu_head rcu; }; static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) @@ -344,6 +419,7 @@ static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b) struct btree_insert_entry { unsigned flags; + u8 sort_order; u8 bkey_type; enum btree_id btree_id:8; u8 level:4; @@ -398,6 +474,18 @@ struct btree_trans_paths { struct btree_path paths[]; }; +struct trans_kmalloc_trace { + unsigned long ip; + size_t bytes; +}; +typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace; + +struct btree_trans_subbuf { + u16 base; + u16 u64s; + u16 size;; +}; + struct btree_trans { struct bch_fs *c; @@ -409,15 +497,22 @@ struct btree_trans { void *mem; unsigned mem_top; unsigned mem_bytes; + unsigned realloc_bytes_required; +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_trans_kmalloc_trace trans_kmalloc_trace; +#endif btree_path_idx_t nr_sorted; btree_path_idx_t nr_paths; btree_path_idx_t nr_paths_max; + btree_path_idx_t nr_updates; u8 fn_idx; - u8 nr_updates; u8 lock_must_abort; bool lock_may_not_fail:1; bool srcu_held:1; + bool locked:1; + bool pf_memalloc_nofs:1; + bool write_locked:1; bool used_mempool:1; bool in_traverse_all:1; bool paths_sorted:1; @@ -425,13 +520,19 @@ struct btree_trans { bool journal_transaction_names:1; bool journal_replay_not_finished:1; bool notrace_relock_fail:1; - bool write_locked:1; enum bch_errcode restarted:16; u32 restart_count; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + u32 restart_count_this_trans; +#endif u64 last_begin_time; unsigned long last_begin_ip; unsigned long last_restarted_ip; +#ifdef CONFIG_BCACHEFS_DEBUG + bch_stacktrace last_restarted_trace; +#endif + unsigned long last_unlock_ip; unsigned long srcu_lock_time; const char *fn; @@ -440,9 +541,8 @@ struct btree_trans { int srcu_idx; /* update path: */ - u16 journal_entries_u64s; - u16 journal_entries_size; - struct jset_entry *journal_entries; + struct btree_trans_subbuf journal_entries; + struct btree_trans_subbuf accounting; struct btree_trans_commit_hook *hooks; struct journal_entry_pin *journal_pin; @@ -455,8 +555,12 @@ struct btree_trans { unsigned journal_u64s; unsigned extra_disk_res; /* XXX kill */ - struct replicas_delta_list *fs_usage_deltas; + __BKEY_PADDED(btree_path_down, BKEY_BTREE_PTR_VAL_U64s_MAX); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* Entries before this are zeroed out on every bch2_trans_get() call */ struct list_head list; @@ -514,7 +618,11 @@ enum btree_write_type { x(dying) \ x(fake) \ x(need_rewrite) \ - x(never_write) + x(need_rewrite_error) \ + x(need_rewrite_degraded) \ + x(need_rewrite_ptr_written_zero) \ + x(never_write) \ + x(pinned) enum btree_flags { /* First bits for btree node write type */ @@ -537,6 +645,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \ BTREE_FLAGS() #undef x +#define BTREE_NODE_REWRITE_REASON() \ + x(none) \ + x(unknown) \ + x(error) \ + x(degraded) \ + x(ptr_written_zero) + +enum btree_node_rewrite_reason { +#define x(n) BTREE_NODE_REWRITE_##n, + BTREE_NODE_REWRITE_REASON() +#undef x +}; + +static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b) +{ + if (btree_node_need_rewrite_ptr_written_zero(b)) + return BTREE_NODE_REWRITE_ptr_written_zero; + if (btree_node_need_rewrite_degraded(b)) + return BTREE_NODE_REWRITE_degraded; + if (btree_node_need_rewrite_error(b)) + return BTREE_NODE_REWRITE_error; + if (btree_node_need_rewrite(b)) + return BTREE_NODE_REWRITE_unknown; + return BTREE_NODE_REWRITE_none; +} + static inline struct btree_write *btree_current_write(struct btree *b) { return b->writes + btree_node_write_idx(b); @@ -556,13 +690,13 @@ static inline struct bset_tree *bset_tree_last(struct btree *b) static inline void * __btree_node_offset_to_ptr(const struct btree *b, u16 offset) { - return (void *) ((u64 *) b->data + 1 + offset); + return (void *) ((u64 *) b->data + offset); } static inline u16 __btree_node_ptr_to_offset(const struct btree *b, const void *p) { - u16 ret = (u64 *) p - 1 - (u64 *) b->data; + u16 ret = (u64 *) p - (u64 *) b->data; EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p); return ret; @@ -687,58 +821,91 @@ const char *bch2_btree_node_type_str(enum btree_node_type); (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS) -static inline bool btree_node_type_needs_gc(enum btree_node_type type) +static inline bool btree_node_type_has_trans_triggers(enum btree_node_type type) { - return BTREE_NODE_TYPE_HAS_TRIGGERS & BIT_ULL(type); + return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS; } -static inline bool btree_node_type_is_extents(enum btree_node_type type) +static inline bool btree_node_type_has_atomic_triggers(enum btree_node_type type) { - const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1)) + return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS; +} + +static inline bool btree_node_type_has_triggers(enum btree_node_type type) +{ + return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS; +} + +static inline bool btree_id_is_extents(enum btree_id btree) +{ + const u64 mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_extents)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << type) & mask; + return BIT_ULL(btree) & mask; } -static inline bool btree_id_is_extents(enum btree_id btree) +static inline bool btree_node_type_is_extents(enum btree_node_type type) { - return btree_node_type_is_extents(__btree_node_type(0, btree)); + return type != BKEY_TYPE_btree && btree_id_is_extents(type - 1); } -static inline bool btree_type_has_snapshots(enum btree_id id) +static inline bool btree_type_has_snapshots(enum btree_id btree) { - const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr) + const u64 mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_snapshots)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(btree) & mask; } -static inline bool btree_type_has_snapshot_field(enum btree_id id) +static inline bool btree_type_has_snapshot_field(enum btree_id btree) { - const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr) + const u64 mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_IS_snapshot_field|BTREE_IS_snapshots))) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(btree) & mask; } -static inline bool btree_type_has_ptrs(enum btree_id id) +static inline bool btree_type_has_ptrs(enum btree_id btree) { - const unsigned mask = 0 -#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr) + const u64 mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_data)) << nr) BCH_BTREE_IDS() #undef x ; - return (1U << id) & mask; + return BIT_ULL(btree) & mask; +} + +static inline bool btree_type_uses_write_buffer(enum btree_id btree) +{ + const u64 mask = 0 +#define x(name, nr, flags, ...) |((!!((flags) & BTREE_IS_write_buffer)) << nr) + BCH_BTREE_IDS() +#undef x + ; + + return BIT_ULL(btree) & mask; +} + +static inline u8 btree_trigger_order(enum btree_id btree) +{ + switch (btree) { + case BTREE_ID_alloc: + return U8_MAX; + case BTREE_ID_stripes: + return U8_MAX - 1; + default: + return btree; + } } struct btree_root { |