summaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.h')
-rw-r--r--fs/btrfs/volumes.h250
1 files changed, 202 insertions, 48 deletions
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2128a032c3b7..34b854c1a303 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -6,22 +6,46 @@
#ifndef BTRFS_VOLUMES_H
#define BTRFS_VOLUMES_H
+#include <linux/blk_types.h>
+#include <linux/blkdev.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
#include <linux/sort.h>
-#include <linux/btrfs.h>
-#include "async-thread.h"
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/log2.h>
+#include <linux/kobject.h>
+#include <linux/refcount.h>
+#include <linux/completion.h>
+#include <linux/rbtree.h>
+#include <uapi/linux/btrfs.h>
+#include <uapi/linux/btrfs_tree.h>
#include "messages.h"
-#include "tree-checker.h"
-#include "rcu-string.h"
+#include "extent-io-tree.h"
+
+struct block_device;
+struct bdev_handle;
+struct btrfs_fs_info;
+struct btrfs_block_group;
+struct btrfs_trans_handle;
+struct btrfs_transaction;
+struct btrfs_zoned_device_info;
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+/*
+ * Arbitrary maximum size of one discard request to limit potentially long time
+ * spent in blkdev_issue_discard().
+ */
+#define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
#define BTRFS_STRIPE_LEN_SHIFT (16)
#define BTRFS_STRIPE_LEN_MASK (BTRFS_STRIPE_LEN - 1)
-static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
+static_assert(ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
/* Used by sanity check for btrfs_raid_types. */
#define const_ffs(n) (__builtin_ctzll(n) + 1)
@@ -34,8 +58,7 @@ static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
*/
static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
-static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
- ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
+static_assert(ilog2(BTRFS_BLOCK_GROUP_RAID0) > ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
/* ilog2() can handle both constants and variables */
#define BTRFS_BG_FLAG_TO_INDEX(profile) \
@@ -77,7 +100,10 @@ enum btrfs_raid_types {
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)
-struct btrfs_zoned_device_info;
+/* Special value encoding failure to write primary super block. */
+#define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2)
+
+struct btrfs_fs_devices;
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
@@ -86,17 +112,16 @@ struct btrfs_device {
struct btrfs_fs_devices *fs_devices;
struct btrfs_fs_info *fs_info;
- struct rcu_string __rcu *name;
+ /* Device path or NULL if missing. */
+ const char __rcu *name;
u64 generation;
+ struct file *bdev_file;
struct block_device *bdev;
struct btrfs_zoned_device_info *zone_info;
- /* block device holder for blkdev_get/put */
- void *holder;
-
/*
* Device's major-minor number. Must be set even if the device is not
* opened (bdev == NULL), unless the device is missing.
@@ -129,6 +154,12 @@ struct btrfs_device {
/* type and info about this device */
u64 type;
+ /*
+ * Counter of super block write errors, values larger than
+ * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
+ */
+ atomic_t sb_write_errors;
+
/* minimal io size for this device */
u32 sector_size;
@@ -268,6 +299,9 @@ enum btrfs_chunk_allocation_policy {
BTRFS_CHUNK_ALLOC_ZONED,
};
+#define BTRFS_DEFAULT_RR_MIN_CONTIG_READ (SZ_256K)
+/* Keep in sync with raid_attr table, current maximum is RAID1C4. */
+#define BTRFS_RAID1_MAX_MIRRORS (4)
/*
* Read policies for mirrored block group profiles, read picks the stripe based
* on these policies.
@@ -275,9 +309,34 @@ enum btrfs_chunk_allocation_policy {
enum btrfs_read_policy {
/* Use process PID to choose the stripe */
BTRFS_READ_POLICY_PID,
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /* Balancing RAID1 reads across all striped devices (round-robin). */
+ BTRFS_READ_POLICY_RR,
+ /* Read from a specific device. */
+ BTRFS_READ_POLICY_DEVID,
+#endif
BTRFS_NR_READ_POLICY,
};
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+/*
+ * Checksum mode - offload it to workqueues or do it synchronously in
+ * btrfs_submit_chunk().
+ */
+enum btrfs_offload_csum_mode {
+ /*
+ * Choose offloading checksum or do it synchronously automatically.
+ * Do it synchronously if the checksum is fast, or offload to workqueues
+ * otherwise.
+ */
+ BTRFS_OFFLOAD_CSUM_AUTO,
+ /* Always offload checksum to workqueues. */
+ BTRFS_OFFLOAD_CSUM_FORCE_ON,
+ /* Never offload checksum to workqueues. */
+ BTRFS_OFFLOAD_CSUM_FORCE_OFF,
+};
+#endif
+
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
@@ -290,6 +349,19 @@ struct btrfs_fs_devices {
* - Following shall be true at all times:
* - metadata_uuid == btrfs_header::fsid
* - metadata_uuid == btrfs_dev_item::fsid
+ *
+ * - Relations between fsid and metadata_uuid in sb and fs_devices:
+ * - Normal:
+ * fs_devices->fsid == fs_devices->metadata_uuid == sb->fsid
+ * sb->metadata_uuid == 0
+ *
+ * - When the BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag is set:
+ * fs_devices->fsid == sb->fsid
+ * fs_devices->metadata_uuid == sb->metadata_uuid
+ *
+ * - When in-memory fs_devices->temp_fsid is true
+ * fs_devices->fsid = random
+ * fs_devices->metadata_uuid == sb->fsid
*/
u8 metadata_uuid[BTRFS_FSID_SIZE];
@@ -349,13 +421,26 @@ struct btrfs_fs_devices {
/* Count fs-devices opened. */
int opened;
+ /*
+ * Counter of the processes that are holding this fs_devices but not
+ * yet opened.
+ * This is for mounting handling, as we can only open the fs_devices
+ * after a super block is created. But we cannot take uuid_mutex
+ * during sget_fc(), thus we have to hold the fs_devices (meaning it
+ * cannot be released) until a super block is returned.
+ */
+ int holding;
+
/* Set when we find or add a device that doesn't have the nonrot flag set. */
bool rotating;
/* Devices support TRIM/discard commands. */
bool discardable;
- bool fsid_change;
/* The filesystem is a seed filesystem. */
bool seeding;
+ /* The mount needs to use a randomly generated fsid. */
+ bool temp_fsid;
+ /* Enable/disable the filesystem stats tracking. */
+ bool collect_fs_stats;
struct btrfs_fs_info *fs_info;
/* sysfs kobjects */
@@ -368,6 +453,20 @@ struct btrfs_fs_devices {
/* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy;
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /*
+ * Minimum contiguous reads before switching to next device, the unit
+ * is one block/sectorsize.
+ */
+ u32 rr_min_contig_read;
+
+ /* Device to be used for reading in case of RAID1. */
+ u64 read_devid;
+
+ /* Checksum mode - offload it or do it synchronously. */
+ enum btrfs_offload_csum_mode offload_csum_mode;
+#endif
};
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
@@ -381,12 +480,11 @@ struct btrfs_fs_devices {
struct btrfs_io_stripe {
struct btrfs_device *dev;
- union {
- /* Block mapping */
- u64 physical;
- /* For the endio handler */
- struct btrfs_io_context *bioc;
- };
+ /* Block mapping. */
+ u64 physical;
+ bool rst_search_commit_root;
+ /* For the endio handler. */
+ struct btrfs_io_context *bioc;
};
struct btrfs_discard_stripe {
@@ -396,7 +494,7 @@ struct btrfs_discard_stripe {
};
/*
- * Context for IO subsmission for device stripe.
+ * Context for IO submission for device stripe.
*
* - Track the unfinished mirrors for mirror based profiles
* Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
@@ -414,10 +512,17 @@ struct btrfs_discard_stripe {
struct btrfs_io_context {
refcount_t refs;
struct btrfs_fs_info *fs_info;
- u64 map_type; /* get from map_lookup->type */
+ /* Taken from struct btrfs_chunk_map::type. */
+ u64 map_type;
struct bio *orig_bio;
atomic_t error;
u16 max_errors;
+ bool use_rst;
+
+ u64 logical;
+ u64 size;
+ /* Raid stripe tree ordered entry. */
+ struct list_head rst_ordered_entry;
/*
* The total number of stripes, including the extra duplicated
@@ -512,21 +617,33 @@ struct btrfs_raid_attr {
extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
-struct map_lookup {
+struct btrfs_chunk_map {
+ struct rb_node rb_node;
+ /* For mount time dev extent verification. */
+ int verified_stripes;
+ refcount_t refs;
+ u64 start;
+ u64 chunk_len;
+ u64 stripe_size;
u64 type;
int io_align;
int io_width;
int num_stripes;
int sub_stripes;
- int verified_stripes; /* For mount time dev extent verification */
struct btrfs_io_stripe stripes[];
};
-#define map_lookup_size(n) (sizeof(struct map_lookup) + \
- (sizeof(struct btrfs_io_stripe) * (n)))
+#define btrfs_chunk_map_size(n) (sizeof(struct btrfs_chunk_map) + \
+ (sizeof(struct btrfs_io_stripe) * (n)))
+
+static inline void btrfs_free_chunk_map(struct btrfs_chunk_map *map)
+{
+ if (map && refcount_dec_and_test(&map->refs)) {
+ ASSERT(RB_EMPTY_NODE(&map->rb_node));
+ kfree(map);
+ }
+}
-struct btrfs_balance_args;
-struct btrfs_balance_progress;
struct btrfs_balance_control {
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
@@ -544,6 +661,11 @@ struct btrfs_dev_lookup_args {
u64 devid;
u8 *uuid;
u8 *fsid;
+ /*
+ * If devt is specified, all other members will be ignored as it is
+ * enough to uniquely locate a device.
+ */
+ dev_t devt;
bool missing;
};
@@ -559,7 +681,7 @@ enum btrfs_map_op {
BTRFS_MAP_GET_READ_MIRRORS,
};
-static inline enum btrfs_map_op btrfs_op(struct bio *bio)
+static inline enum btrfs_map_op btrfs_op(const struct bio *bio)
{
switch (bio_op(bio)) {
case REQ_OP_WRITE:
@@ -581,7 +703,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
}
/*
- * Do the type safe converstion from stripe_nr to offset inside the chunk.
+ * Do the type safe conversion from stripe_nr to offset inside the chunk.
*
* @stripe_nr is u32, with left shift it can overflow u32 for chunks larger
* than 4G. This does the proper type cast to avoid overflow.
@@ -596,8 +718,7 @@ void btrfs_put_bioc(struct btrfs_io_context *bioc);
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
- struct btrfs_io_stripe *smap, int *mirror_num_ret,
- int need_raid_map);
+ struct btrfs_io_stripe *smap, int *mirror_num_ret);
int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
struct btrfs_io_stripe *smap, u64 logical,
u32 length, int mirror_num);
@@ -607,11 +728,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
- u64 type);
-void btrfs_mapping_tree_free(struct extent_map_tree *tree);
+ struct btrfs_space_info *space_info,
+ u64 type);
+void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
blk_mode_t flags, void *holder);
-struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags);
+struct btrfs_device *btrfs_scan_one_device(const char *path, bool mount_arg_dev);
int btrfs_forget_devices(dev_t devt);
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
@@ -629,7 +751,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
- struct block_device **bdev, void **holder);
+ struct file **bdev_file);
void __exit btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
@@ -645,10 +767,9 @@ void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf);
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
-int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
+int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+ bool verbose);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
-int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
-int btrfs_uuid_scan_kthread(void *data);
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
@@ -659,17 +780,28 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
-int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
- u64 logical, u64 len);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
u64 logical);
-u64 btrfs_calc_stripe_length(const struct extent_map *em);
+u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map);
int btrfs_nr_parity_stripes(u64 type);
int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
struct btrfs_block_group *bg);
int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
-struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
- u64 logical, u64 length);
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+struct btrfs_chunk_map *btrfs_alloc_chunk_map(int num_stripes, gfp_t gfp);
+int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
+#endif
+
+struct btrfs_chunk_map *btrfs_find_chunk_map(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length);
+struct btrfs_chunk_map *btrfs_find_chunk_map_nolock(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length);
+struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length);
+void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
+struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
+ int copy_num, bool drop_cache);
void btrfs_release_disk_super(struct btrfs_super_block *super);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
@@ -728,7 +860,26 @@ static inline const char *btrfs_dev_name(const struct btrfs_device *device)
if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
return "<missing disk>";
else
- return rcu_str_deref(device->name);
+ return rcu_dereference(device->name);
+}
+
+static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocation_policy pol)
+{
+ WARN_ONCE(1, "unknown allocation policy %d, fallback to regular", pol);
+}
+
+static inline void btrfs_fs_devices_inc_holding(struct btrfs_fs_devices *fs_devices)
+{
+ lockdep_assert_held(&uuid_mutex);
+ ASSERT(fs_devices->holding >= 0);
+ fs_devices->holding++;
+}
+
+static inline void btrfs_fs_devices_dec_holding(struct btrfs_fs_devices *fs_devices)
+{
+ lockdep_assert_held(&uuid_mutex);
+ ASSERT(fs_devices->holding > 0);
+ fs_devices->holding--;
}
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
@@ -736,9 +887,7 @@ void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
-void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device);
enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
int btrfs_bg_type_to_factor(u64 flags);
@@ -747,6 +896,11 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
-u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
+const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb);
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
+ u64 logical, u16 total_stripes);
+#endif
#endif