diff options
Diffstat (limited to 'fs/btrfs/delayed-ref.h')
| -rw-r--r-- | fs/btrfs/delayed-ref.h | 489 |
1 files changed, 330 insertions, 159 deletions
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 70b962cc177d..5ce940532144 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -1,31 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ -#ifndef __DELAYED_REF__ -#define __DELAYED_REF__ + +#ifndef BTRFS_DELAYED_REF_H +#define BTRFS_DELAYED_REF_H + +#include <linux/types.h> +#include <linux/refcount.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <uapi/linux/btrfs_tree.h> +#include "fs.h" +#include "messages.h" + +struct btrfs_trans_handle; +struct btrfs_fs_info; /* these are the possible values of struct btrfs_delayed_ref_node->action */ -#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ -#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ -#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ -#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ +enum btrfs_delayed_ref_action { + /* Add one backref to the tree */ + BTRFS_ADD_DELAYED_REF = 1, + /* Delete one backref from the tree */ + BTRFS_DROP_DELAYED_REF, + /* Record a full extent allocation */ + BTRFS_ADD_DELAYED_EXTENT, + /* Not changing ref count on head ref */ + BTRFS_UPDATE_DELAYED_HEAD, +} __packed; + +struct btrfs_data_ref { + /* For EXTENT_DATA_REF */ + + /* Inode which refers to this data extent */ + u64 objectid; + + /* + * file_offset - extent_offset + * + * file_offset is the key.offset of the EXTENT_DATA key. + * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data. + */ + u64 offset; +}; + +struct btrfs_tree_ref { + /* + * Level of this tree block. + * + * Shared for skinny (TREE_BLOCK_REF) and normal tree ref. + */ + int level; + + /* For non-skinny metadata, no special member needed */ +}; struct btrfs_delayed_ref_node { - struct rb_node rb_node; + struct rb_node ref_node; + /* + * If action is BTRFS_ADD_DELAYED_REF, also link this node to + * ref_head->ref_add_list, then we do not need to iterate the + * refs rbtree in the corresponding delayed ref head + * (struct btrfs_delayed_ref_head::ref_tree). + */ + struct list_head add_list; /* the starting bytenr of the extent */ u64 bytenr; @@ -36,8 +77,17 @@ struct btrfs_delayed_ref_node { /* seq number to keep track of insertion order */ u64 seq; + /* The ref_root for this ref */ + u64 ref_root; + + /* + * The parent for this ref, if this isn't set the ref_root is the + * reference owner. + */ + u64 parent; + /* ref count on this data structure */ - atomic_t refs; + refcount_t refs; /* * how many refs is this entry adding or deleting. For @@ -52,18 +102,18 @@ struct btrfs_delayed_ref_node { unsigned int action:8; unsigned int type:8; - /* is this node still in the rbtree? */ - unsigned int is_head:1; - unsigned int in_tree:1; + + union { + struct btrfs_tree_ref tree_ref; + struct btrfs_data_ref data_ref; + }; }; struct btrfs_delayed_extent_op { struct btrfs_disk_key key; + bool update_key; + bool update_flags; u64 flags_to_set; - int level; - unsigned int update_key:1; - unsigned int update_flags:1; - unsigned int is_data:1; }; /* @@ -73,17 +123,54 @@ struct btrfs_delayed_extent_op { * reference count modifications we've queued up. */ struct btrfs_delayed_ref_head { - struct btrfs_delayed_ref_node node; - + u64 bytenr; + u64 num_bytes; /* * the mutex is held while running the refs, and it is also * held when checking the sum of reference modifications. */ struct mutex mutex; - struct list_head cluster; + refcount_t refs; + + /* Protects 'ref_tree' and 'ref_add_list'. */ + spinlock_t lock; + struct rb_root_cached ref_tree; + /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ + struct list_head ref_add_list; struct btrfs_delayed_extent_op *extent_op; + + /* + * This is used to track the final ref_mod from all the refs associated + * with this head ref, this is not adjusted as delayed refs are run, + * this is meant to track if we need to do the csum accounting or not. + */ + int total_ref_mod; + + /* + * This is the current outstanding mod references for this bytenr. This + * is used with lookup_extent_info to get an accurate reference count + * for a bytenr, so it is adjusted as delayed refs are run so that any + * on disk reference count + ref_mod is accurate. + */ + int ref_mod; + + /* + * The root that triggered the allocation when must_insert_reserved is + * set to true. + */ + u64 owning_root; + + /* + * Track reserved bytes when setting must_insert_reserved. On success + * or cleanup, we will need to free the reservation. + */ + u64 reserved_bytes; + + /* Tree block level, for metadata only. */ + u8 level; + /* * when a new extent is allocated, it is just reserved in memory * The actual extent isn't inserted into the extent allocation tree @@ -96,68 +183,165 @@ struct btrfs_delayed_ref_head { * we need to update the in ram accounting to properly reflect * the free has happened. */ - unsigned int must_insert_reserved:1; - unsigned int is_data:1; -}; + bool must_insert_reserved; -struct btrfs_delayed_tree_ref { - struct btrfs_delayed_ref_node node; - u64 root; - u64 parent; - int level; + bool is_data; + bool is_system; + bool processing; + /* + * Indicate if it's currently in the data structure that tracks head + * refs (struct btrfs_delayed_ref_root::head_refs). + */ + bool tracked; }; -struct btrfs_delayed_data_ref { - struct btrfs_delayed_ref_node node; - u64 root; - u64 parent; - u64 objectid; - u64 offset; +enum btrfs_delayed_ref_flags { + /* Indicate that we are flushing delayed refs for the commit */ + BTRFS_DELAYED_REFS_FLUSHING, }; struct btrfs_delayed_ref_root { - struct rb_root root; + /* + * Track head references. + * The keys correspond to the logical address of the extent ("bytenr") + * right shifted by fs_info->sectorsize_bits. This is both to get a more + * dense index space (optimizes xarray structure) and because indexes in + * xarrays are of "unsigned long" type, meaning they are 32 bits wide on + * 32 bits platforms, limiting the extent range to 4G which is too low + * and makes it unusable (truncated index values) on 32 bits platforms. + * Protected by the spinlock 'lock' defined below. + */ + struct xarray head_refs; - /* this spin lock protects the rbtree and the entries inside */ - spinlock_t lock; + /* + * Track dirty extent records. + * The keys correspond to the logical address of the extent ("bytenr") + * right shifted by fs_info->sectorsize_bits, for same reasons as above. + */ + struct xarray dirty_extents; - /* how many delayed ref updates we've queued, used by the - * throttling code + /* + * Protects the xarray head_refs, its entries and the following fields: + * num_heads, num_heads_ready, pending_csums and run_delayed_start. */ - unsigned long num_entries; + spinlock_t lock; - /* total number of head nodes in tree */ + /* Total number of head refs, protected by the spinlock 'lock'. */ unsigned long num_heads; - /* total number of head nodes ready for processing */ + /* + * Total number of head refs ready for processing, protected by the + * spinlock 'lock'. + */ unsigned long num_heads_ready; /* - * bumped when someone is making progress on the delayed - * refs, so that other procs know they are just adding to - * contention intead of helping + * Track space reserved for deleting csums of data extents. + * Protected by the spinlock 'lock'. + */ + u64 pending_csums; + + unsigned long flags; + + /* + * Track from which bytenr to start searching ref heads. + * Protected by the spinlock 'lock'. */ - atomic_t procs_running_refs; - atomic_t ref_seq; - wait_queue_head_t wait; + u64 run_delayed_start; /* - * set when the tree is flushing before a transaction commit, - * used by the throttling code to decide if new updates need - * to be run right away + * To make qgroup to skip given root. + * This is for snapshot, as btrfs_qgroup_inherit() will manually + * modify counters for snapshot and its source, so we should skip + * the snapshot in new_root/old_roots or it will get calculated twice */ - int flushing; + u64 qgroup_to_skip; +}; - u64 run_delayed_start; +enum btrfs_ref_type { + BTRFS_REF_NOT_SET, + BTRFS_REF_DATA, + BTRFS_REF_METADATA, +} __packed; + +struct btrfs_ref { + enum btrfs_ref_type type; + enum btrfs_delayed_ref_action action; + + /* + * Whether this extent should go through qgroup record. + * + * Normally false, but for certain cases like delayed subtree scan, + * setting this flag can hugely reduce qgroup overhead. + */ + bool skip_qgroup; + + u64 bytenr; + u64 num_bytes; + u64 owning_root; + + /* + * The root that owns the reference for this reference, this will be set + * or ->parent will be set, depending on what type of reference this is. + */ + u64 ref_root; + + /* Bytenr of the parent tree block */ + u64 parent; + union { + struct btrfs_data_ref data_ref; + struct btrfs_tree_ref tree_ref; + }; + +#ifdef CONFIG_BTRFS_DEBUG + /* Through which root is this modification. */ + u64 real_root; +#endif }; extern struct kmem_cache *btrfs_delayed_ref_head_cachep; -extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; -extern struct kmem_cache *btrfs_delayed_data_ref_cachep; +extern struct kmem_cache *btrfs_delayed_ref_node_cachep; extern struct kmem_cache *btrfs_delayed_extent_op_cachep; -int btrfs_delayed_ref_init(void); -void btrfs_delayed_ref_exit(void); +int __init btrfs_delayed_ref_init(void); +void __cold btrfs_delayed_ref_exit(void); + +static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info, + int num_delayed_refs) +{ + u64 num_bytes; + + num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_delayed_refs); + + /* + * We have to check the mount option here because we could be enabling + * the free space tree for the first time and don't have the compat_ro + * option set yet. + * + * We need extra reservations if we have the free space tree because + * we'll have to modify that tree as well. + */ + if (btrfs_test_opt(fs_info, FREE_SPACE_TREE)) + num_bytes *= 2; + + return num_bytes; +} + +static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info, + int num_csum_items) +{ + /* + * Deleting csum items does not result in new nodes/leaves and does not + * require changing the free space tree, only the csum tree, so this is + * all we need. + */ + return btrfs_calc_metadata_size(fs_info, num_csum_items); +} + +void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root, + bool skip_qgroup); +void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset, + u64 mod_root, bool skip_qgroup); static inline struct btrfs_delayed_extent_op * btrfs_alloc_delayed_extent_op(void) @@ -172,117 +356,104 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) kmem_cache_free(btrfs_delayed_extent_op_cachep, op); } -static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) +void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref); + +static inline u64 btrfs_ref_head_to_space_flags( + struct btrfs_delayed_ref_head *head_ref) { - WARN_ON(atomic_read(&ref->refs) == 0); - if (atomic_dec_and_test(&ref->refs)) { - WARN_ON(ref->in_tree); - switch (ref->type) { - case BTRFS_TREE_BLOCK_REF_KEY: - case BTRFS_SHARED_BLOCK_REF_KEY: - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); - break; - case BTRFS_EXTENT_DATA_REF_KEY: - case BTRFS_SHARED_DATA_REF_KEY: - kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); - break; - case 0: - kmem_cache_free(btrfs_delayed_ref_head_cachep, ref); - break; - default: - BUG(); - } - } + if (head_ref->is_data) + return BTRFS_BLOCK_GROUP_DATA; + else if (head_ref->is_system) + return BTRFS_BLOCK_GROUP_SYSTEM; + return BTRFS_BLOCK_GROUP_METADATA; +} + +static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) +{ + if (refcount_dec_and_test(&head->refs)) + kmem_cache_free(btrfs_delayed_ref_head_cachep, head); } -int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op, - int for_cow); -int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, - u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op, - int for_cow); -int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, +int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, + struct btrfs_ref *generic_ref, + struct btrfs_delayed_extent_op *extent_op); +int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, + struct btrfs_ref *generic_ref, + u64 reserved); +int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes, u8 level, struct btrfs_delayed_extent_op *extent_op); -void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, +void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); struct btrfs_delayed_ref_head * -btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); -int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_head *head); +btrfs_find_delayed_ref_head(const struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + u64 bytenr); static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) { mutex_unlock(&head->mutex); } +void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); -int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, - struct list_head *cluster, u64 search_start); -void btrfs_release_ref_cluster(struct list_head *cluster); - -int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - u64 seq); - -/* - * delayed refs with a ref_seq > 0 must be held back during backref walking. - * this only applies to items in one of the fs-trees. for_cow items never need - * to be held back, so they won't get a ref_seq number. - */ -static inline int need_ref_seq(int for_cow, u64 rootid) +struct btrfs_delayed_ref_head *btrfs_select_ref_head( + const struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs); +void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); +struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head); + +int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); + +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums); +void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); +void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); +void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info); +void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); +void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); +int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, + enum btrfs_reserve_flush_enum flush); +bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent); +void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans); + +static inline u64 btrfs_delayed_ref_owner(const struct btrfs_delayed_ref_node *node) { - if (for_cow) - return 0; - - if (rootid == BTRFS_FS_TREE_OBJECTID) - return 1; - - if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) - return 1; - - return 0; + if (node->type == BTRFS_EXTENT_DATA_REF_KEY || + node->type == BTRFS_SHARED_DATA_REF_KEY) + return node->data_ref.objectid; + return node->tree_ref.level; } -/* - * a node might live in a head or a regular ref, this lets you - * test for the proper type to use. - */ -static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) +static inline u64 btrfs_delayed_ref_offset(const struct btrfs_delayed_ref_node *node) { - return node->is_head; + if (node->type == BTRFS_EXTENT_DATA_REF_KEY || + node->type == BTRFS_SHARED_DATA_REF_KEY) + return node->data_ref.offset; + return 0; } -/* - * helper functions to cast a node into its container - */ -static inline struct btrfs_delayed_tree_ref * -btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) +static inline u8 btrfs_ref_type(const struct btrfs_ref *ref) { - WARN_ON(btrfs_delayed_ref_is_head(node)); - return container_of(node, struct btrfs_delayed_tree_ref, node); -} + ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA); + + if (ref->type == BTRFS_REF_DATA) { + if (ref->parent) + return BTRFS_SHARED_DATA_REF_KEY; + else + return BTRFS_EXTENT_DATA_REF_KEY; + } else { + if (ref->parent) + return BTRFS_SHARED_BLOCK_REF_KEY; + else + return BTRFS_TREE_BLOCK_REF_KEY; + } -static inline struct btrfs_delayed_data_ref * -btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) -{ - WARN_ON(btrfs_delayed_ref_is_head(node)); - return container_of(node, struct btrfs_delayed_data_ref, node); + return 0; } -static inline struct btrfs_delayed_ref_head * -btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) -{ - WARN_ON(!btrfs_delayed_ref_is_head(node)); - return container_of(node, struct btrfs_delayed_ref_head, node); -} #endif |
