summaryrefslogtreecommitdiff
path: root/fs/btrfs/delayed-ref.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/delayed-ref.h')
-rw-r--r--fs/btrfs/delayed-ref.h368
1 files changed, 270 insertions, 98 deletions
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index d2af974f68a1..5ce940532144 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -6,20 +6,65 @@
#ifndef BTRFS_DELAYED_REF_H
#define BTRFS_DELAYED_REF_H
+#include <linux/types.h>
#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <uapi/linux/btrfs_tree.h>
+#include "fs.h"
+#include "messages.h"
+
+struct btrfs_trans_handle;
+struct btrfs_fs_info;
/* these are the possible values of struct btrfs_delayed_ref_node->action */
-#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
-#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
-#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
-#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
+enum btrfs_delayed_ref_action {
+ /* Add one backref to the tree */
+ BTRFS_ADD_DELAYED_REF = 1,
+ /* Delete one backref from the tree */
+ BTRFS_DROP_DELAYED_REF,
+ /* Record a full extent allocation */
+ BTRFS_ADD_DELAYED_EXTENT,
+ /* Not changing ref count on head ref */
+ BTRFS_UPDATE_DELAYED_HEAD,
+} __packed;
+
+struct btrfs_data_ref {
+ /* For EXTENT_DATA_REF */
+
+ /* Inode which refers to this data extent */
+ u64 objectid;
+
+ /*
+ * file_offset - extent_offset
+ *
+ * file_offset is the key.offset of the EXTENT_DATA key.
+ * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
+ */
+ u64 offset;
+};
+
+struct btrfs_tree_ref {
+ /*
+ * Level of this tree block.
+ *
+ * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
+ */
+ int level;
+
+ /* For non-skinny metadata, no special member needed */
+};
struct btrfs_delayed_ref_node {
struct rb_node ref_node;
/*
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
* ref_head->ref_add_list, then we do not need to iterate the
- * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes.
+ * refs rbtree in the corresponding delayed ref head
+ * (struct btrfs_delayed_ref_head::ref_tree).
*/
struct list_head add_list;
@@ -32,6 +77,15 @@ struct btrfs_delayed_ref_node {
/* seq number to keep track of insertion order */
u64 seq;
+ /* The ref_root for this ref */
+ u64 ref_root;
+
+ /*
+ * The parent for this ref, if this isn't set the ref_root is the
+ * reference owner.
+ */
+ u64 parent;
+
/* ref count on this data structure */
refcount_t refs;
@@ -48,17 +102,17 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
- /* is this node still in the rbtree? */
- unsigned int is_head:1;
- unsigned int in_tree:1;
+
+ union {
+ struct btrfs_tree_ref tree_ref;
+ struct btrfs_data_ref data_ref;
+ };
};
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
- u8 level;
bool update_key;
bool update_flags;
- bool is_data;
u64 flags_to_set;
};
@@ -71,20 +125,20 @@ struct btrfs_delayed_extent_op {
struct btrfs_delayed_ref_head {
u64 bytenr;
u64 num_bytes;
- refcount_t refs;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
*/
struct mutex mutex;
+ refcount_t refs;
+
+ /* Protects 'ref_tree' and 'ref_add_list'. */
spinlock_t lock;
struct rb_root_cached ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
- struct rb_node href_node;
-
struct btrfs_delayed_extent_op *extent_op;
/*
@@ -103,15 +157,19 @@ struct btrfs_delayed_ref_head {
int ref_mod;
/*
- * For qgroup reserved space freeing.
- *
- * ref_root and reserved will be recorded after
- * BTRFS_ADD_DELAYED_EXTENT is called.
- * And will be used to free reserved qgroup space at
- * run_delayed_refs() time.
+ * The root that triggered the allocation when must_insert_reserved is
+ * set to true.
+ */
+ u64 owning_root;
+
+ /*
+ * Track reserved bytes when setting must_insert_reserved. On success
+ * or cleanup, we will need to free the reservation.
*/
- u64 qgroup_ref_root;
- u64 qgroup_reserved;
+ u64 reserved_bytes;
+
+ /* Tree block level, for metadata only. */
+ u8 level;
/*
* when a new extent is allocated, it is just reserved in memory
@@ -125,57 +183,70 @@ struct btrfs_delayed_ref_head {
* we need to update the in ram accounting to properly reflect
* the free has happened.
*/
- unsigned int must_insert_reserved:1;
- unsigned int is_data:1;
- unsigned int is_system:1;
- unsigned int processing:1;
-};
+ bool must_insert_reserved;
-struct btrfs_delayed_tree_ref {
- struct btrfs_delayed_ref_node node;
- u64 root;
- u64 parent;
- int level;
+ bool is_data;
+ bool is_system;
+ bool processing;
+ /*
+ * Indicate if it's currently in the data structure that tracks head
+ * refs (struct btrfs_delayed_ref_root::head_refs).
+ */
+ bool tracked;
};
-struct btrfs_delayed_data_ref {
- struct btrfs_delayed_ref_node node;
- u64 root;
- u64 parent;
- u64 objectid;
- u64 offset;
+enum btrfs_delayed_ref_flags {
+ /* Indicate that we are flushing delayed refs for the commit */
+ BTRFS_DELAYED_REFS_FLUSHING,
};
struct btrfs_delayed_ref_root {
- /* head ref rbtree */
- struct rb_root_cached href_root;
-
- /* dirty extent records */
- struct rb_root dirty_extent_root;
+ /*
+ * Track head references.
+ * The keys correspond to the logical address of the extent ("bytenr")
+ * right shifted by fs_info->sectorsize_bits. This is both to get a more
+ * dense index space (optimizes xarray structure) and because indexes in
+ * xarrays are of "unsigned long" type, meaning they are 32 bits wide on
+ * 32 bits platforms, limiting the extent range to 4G which is too low
+ * and makes it unusable (truncated index values) on 32 bits platforms.
+ * Protected by the spinlock 'lock' defined below.
+ */
+ struct xarray head_refs;
- /* this spin lock protects the rbtree and the entries inside */
- spinlock_t lock;
+ /*
+ * Track dirty extent records.
+ * The keys correspond to the logical address of the extent ("bytenr")
+ * right shifted by fs_info->sectorsize_bits, for same reasons as above.
+ */
+ struct xarray dirty_extents;
- /* how many delayed ref updates we've queued, used by the
- * throttling code
+ /*
+ * Protects the xarray head_refs, its entries and the following fields:
+ * num_heads, num_heads_ready, pending_csums and run_delayed_start.
*/
- atomic_t num_entries;
+ spinlock_t lock;
- /* total number of head nodes in tree */
+ /* Total number of head refs, protected by the spinlock 'lock'. */
unsigned long num_heads;
- /* total number of head nodes ready for processing */
+ /*
+ * Total number of head refs ready for processing, protected by the
+ * spinlock 'lock'.
+ */
unsigned long num_heads_ready;
+ /*
+ * Track space reserved for deleting csums of data extents.
+ * Protected by the spinlock 'lock'.
+ */
u64 pending_csums;
+ unsigned long flags;
+
/*
- * set when the tree is flushing before a transaction commit,
- * used by the throttling code to decide if new updates need
- * to be run right away
+ * Track from which bytenr to start searching ref heads.
+ * Protected by the spinlock 'lock'.
*/
- int flushing;
-
u64 run_delayed_start;
/*
@@ -187,14 +258,91 @@ struct btrfs_delayed_ref_root {
u64 qgroup_to_skip;
};
+enum btrfs_ref_type {
+ BTRFS_REF_NOT_SET,
+ BTRFS_REF_DATA,
+ BTRFS_REF_METADATA,
+} __packed;
+
+struct btrfs_ref {
+ enum btrfs_ref_type type;
+ enum btrfs_delayed_ref_action action;
+
+ /*
+ * Whether this extent should go through qgroup record.
+ *
+ * Normally false, but for certain cases like delayed subtree scan,
+ * setting this flag can hugely reduce qgroup overhead.
+ */
+ bool skip_qgroup;
+
+ u64 bytenr;
+ u64 num_bytes;
+ u64 owning_root;
+
+ /*
+ * The root that owns the reference for this reference, this will be set
+ * or ->parent will be set, depending on what type of reference this is.
+ */
+ u64 ref_root;
+
+ /* Bytenr of the parent tree block */
+ u64 parent;
+ union {
+ struct btrfs_data_ref data_ref;
+ struct btrfs_tree_ref tree_ref;
+ };
+
+#ifdef CONFIG_BTRFS_DEBUG
+ /* Through which root is this modification. */
+ u64 real_root;
+#endif
+};
+
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
-extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
-extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
+extern struct kmem_cache *btrfs_delayed_ref_node_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int __init btrfs_delayed_ref_init(void);
void __cold btrfs_delayed_ref_exit(void);
+static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info,
+ int num_delayed_refs)
+{
+ u64 num_bytes;
+
+ num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_delayed_refs);
+
+ /*
+ * We have to check the mount option here because we could be enabling
+ * the free space tree for the first time and don't have the compat_ro
+ * option set yet.
+ *
+ * We need extra reservations if we have the free space tree because
+ * we'll have to modify that tree as well.
+ */
+ if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
+ num_bytes *= 2;
+
+ return num_bytes;
+}
+
+static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info,
+ int num_csum_items)
+{
+ /*
+ * Deleting csum items does not result in new nodes/leaves and does not
+ * require changing the free space tree, only the csum tree, so this is
+ * all we need.
+ */
+ return btrfs_calc_metadata_size(fs_info, num_csum_items);
+}
+
+void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
+ bool skip_qgroup);
+void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
+ u64 mod_root, bool skip_qgroup);
+
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
{
@@ -208,24 +356,16 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
}
-static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
+void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref);
+
+static inline u64 btrfs_ref_head_to_space_flags(
+ struct btrfs_delayed_ref_head *head_ref)
{
- WARN_ON(refcount_read(&ref->refs) == 0);
- if (refcount_dec_and_test(&ref->refs)) {
- WARN_ON(ref->in_tree);
- switch (ref->type) {
- case BTRFS_TREE_BLOCK_REF_KEY:
- case BTRFS_SHARED_BLOCK_REF_KEY:
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
- break;
- case BTRFS_EXTENT_DATA_REF_KEY:
- case BTRFS_SHARED_DATA_REF_KEY:
- kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
- break;
- default:
- BUG();
- }
- }
+ if (head_ref->is_data)
+ return BTRFS_BLOCK_GROUP_DATA;
+ else if (head_ref->is_system)
+ return BTRFS_BLOCK_GROUP_SYSTEM;
+ return BTRFS_BLOCK_GROUP_METADATA;
}
static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
@@ -235,53 +375,85 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
}
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int *old_ref_mod, int *new_ref_mod);
+ struct btrfs_ref *generic_ref,
+ struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root,
- u64 owner, u64 offset, u64 reserved, int action,
- int *old_ref_mod, int *new_ref_mod);
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
+ struct btrfs_ref *generic_ref,
+ u64 reserved);
+int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+ u64 bytenr, u64 num_bytes, u8 level,
struct btrfs_delayed_extent_op *extent_op);
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+btrfs_find_delayed_ref_head(const struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
u64 bytenr);
-int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_head *head);
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
{
mutex_unlock(&head->mutex);
}
-void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_head *btrfs_select_ref_head(
+ const struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs);
+void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head);
+struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head);
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
-/*
- * helper functions to cast a node into its container
- */
-static inline struct btrfs_delayed_tree_ref *
-btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums);
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
+void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info);
+void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info);
+void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
+void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush);
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
+bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
+ u64 root, u64 parent);
+void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans);
+
+static inline u64 btrfs_delayed_ref_owner(const struct btrfs_delayed_ref_node *node)
+{
+ if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ node->type == BTRFS_SHARED_DATA_REF_KEY)
+ return node->data_ref.objectid;
+ return node->tree_ref.level;
+}
+
+static inline u64 btrfs_delayed_ref_offset(const struct btrfs_delayed_ref_node *node)
{
- return container_of(node, struct btrfs_delayed_tree_ref, node);
+ if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ node->type == BTRFS_SHARED_DATA_REF_KEY)
+ return node->data_ref.offset;
+ return 0;
}
-static inline struct btrfs_delayed_data_ref *
-btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
+static inline u8 btrfs_ref_type(const struct btrfs_ref *ref)
{
- return container_of(node, struct btrfs_delayed_data_ref, node);
+ ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA);
+
+ if (ref->type == BTRFS_REF_DATA) {
+ if (ref->parent)
+ return BTRFS_SHARED_DATA_REF_KEY;
+ else
+ return BTRFS_EXTENT_DATA_REF_KEY;
+ } else {
+ if (ref->parent)
+ return BTRFS_SHARED_BLOCK_REF_KEY;
+ else
+ return BTRFS_TREE_BLOCK_REF_KEY;
+ }
+
+ return 0;
}
#endif