summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-10-30 10:42:06 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2023-10-30 10:42:06 -1000
commitd5acbc60fafbe0fc94c552ce916dd592cd4c6371 (patch)
treec2d70058845399ebcf894e551e6b0c053dd3e836 /include
parent8829687a4ac1d484639425a691da46f6e361aec1 (diff)
parentc6e8f898f56fae2cb5bc4396bec480f23cd8b066 (diff)
Merge tag 'for-6.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "New features: - raid-stripe-tree New tree for logical file extent mapping where the physical mapping may not match on multiple devices. This is now used in zoned mode to implement RAID0/RAID1* profiles, but can be used in non-zoned mode as well. The support for RAID56 is in development and will eventually fix the problems with the current implementation. This is a backward incompatible feature and has to be enabled at mkfs time. - simple quota accounting (squota) A simplified mode of qgroup that accounts all space on the initial extent owners (a subvolume), the snapshots are then cheap to create and delete. The deletion of snapshots in fully accounting qgroups is a known CPU/IO performance bottleneck. The squota is not suitable for the general use case but works well for containers where the original subvolume exists for the whole time. This is a backward incompatible feature as it needs extending some structures, but can be enabled on an existing filesystem. - temporary filesystem fsid (temp_fsid) The fsid identifies a filesystem and is hard coded in the structures, which disallows mounting the same fsid found on different devices. For a single device filesystem this is not strictly necessary, a new temporary fsid can be generated on mount e.g. after a device is cloned. This will be used by Steam Deck for root partition A/B testing, or can be used for VM root images. Other user visible changes: - filesystems with partially finished metadata_uuid conversion cannot be mounted anymore and the uuid fixup has to be done by btrfs-progs (btrfstune). Performance improvements: - reduce reservations for checksum deletions (with enabled free space tree by factor of 4), on a sample workload on file with many extents the deletion time decreased by 12% - make extent state merges more efficient during insertions, reduce rb-tree iterations (run time of critical functions reduced by 5%) Core changes: - the integrity check functionality has been removed, this was a debugging feature and removal does not affect other integrity checks like checksums or tree-checker - space reservation changes: - more efficient delayed ref reservations, this avoids building up too much work or overusing or exhausting the global block reserve in some situations - move delayed refs reservation to the transaction start time, this prevents some ENOSPC corner cases related to exhaustion of global reserve - improvements in reducing excessive reservations for block group items - adjust overcommit logic in near full situations, account for one more chunk to eventually allocate metadata chunk, this is mostly relevant for small filesystems (<10GiB) - single device filesystems are scanned but not registered (except seed devices), this allows temp_fsid to work - qgroup iterations do not need GFP_ATOMIC allocations anymore - cleanups, refactoring, reduced data structure size, function parameter simplifications, error handling fixes" * tag 'for-6.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (156 commits) btrfs: open code timespec64 in struct btrfs_inode btrfs: remove redundant log root tree index assignment during log sync btrfs: remove redundant initialization of variable dirty in btrfs_update_time() btrfs: sysfs: show temp_fsid feature btrfs: disable the device add feature for temp-fsid btrfs: disable the seed feature for temp-fsid btrfs: update comment for temp-fsid, fsid, and metadata_uuid btrfs: remove pointless empty log context list check when syncing log btrfs: update comment for struct btrfs_inode::lock btrfs: remove pointless barrier from btrfs_sync_file() btrfs: add and use helpers for reading and writing last_trans_committed btrfs: add and use helpers for reading and writing fs_info->generation btrfs: add and use helpers for reading and writing log_transid btrfs: add and use helpers for reading and writing last_log_commit btrfs: support cloned-device mount capability btrfs: add helper function find_fsid_by_disk btrfs: stop reserving excessive space for block group item insertions btrfs: stop reserving excessive space for block group item updates btrfs: reorder btrfs_inode to fill gaps btrfs: open code btrfs_ordered_inode_tree in btrfs_inode ...
Diffstat (limited to 'include')
-rw-r--r--include/trace/events/btrfs.h83
-rw-r--r--include/uapi/linux/btrfs.h3
-rw-r--r--include/uapi/linux/btrfs_tree.h60
3 files changed, 140 insertions, 6 deletions
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index b2db2c2f1c57..279a7a0c90c0 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1561,7 +1561,6 @@ DECLARE_EVENT_CLASS(btrfs__work,
__field( const void *, wq )
__field( const void *, func )
__field( const void *, ordered_func )
- __field( const void *, ordered_free )
__field( const void *, normal_work )
),
@@ -1570,14 +1569,12 @@ DECLARE_EVENT_CLASS(btrfs__work,
__entry->wq = work->wq;
__entry->func = work->func;
__entry->ordered_func = work->ordered_func;
- __entry->ordered_free = work->ordered_free;
__entry->normal_work = &work->normal_work;
),
- TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%ps ordered_func=%p "
- "ordered_free=%p",
+ TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%ps ordered_func=%p",
__entry->work, __entry->normal_work, __entry->wq,
- __entry->func, __entry->ordered_func, __entry->ordered_free)
+ __entry->func, __entry->ordered_func)
);
/*
@@ -2497,6 +2494,82 @@ DEFINE_EVENT(btrfs_raid56_bio, raid56_write,
TP_ARGS(rbio, bio, trace_info)
);
+TRACE_EVENT(btrfs_insert_one_raid_extent,
+
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 logical, u64 length,
+ int num_stripes),
+
+ TP_ARGS(fs_info, logical, length, num_stripes),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, logical )
+ __field( u64, length )
+ __field( int, num_stripes )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->logical = logical;
+ __entry->length = length;
+ __entry->num_stripes = num_stripes;
+ ),
+
+ TP_printk_btrfs("logical=%llu length=%llu num_stripes=%d",
+ __entry->logical, __entry->length,
+ __entry->num_stripes)
+);
+
+TRACE_EVENT(btrfs_raid_extent_delete,
+
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 end,
+ u64 found_start, u64 found_end),
+
+ TP_ARGS(fs_info, start, end, found_start, found_end),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, start )
+ __field( u64, end )
+ __field( u64, found_start )
+ __field( u64, found_end )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->start = start;
+ __entry->end = end;
+ __entry->found_start = found_start;
+ __entry->found_end = found_end;
+ ),
+
+ TP_printk_btrfs("start=%llu end=%llu found_start=%llu found_end=%llu",
+ __entry->start, __entry->end, __entry->found_start,
+ __entry->found_end)
+);
+
+TRACE_EVENT(btrfs_get_raid_extent_offset,
+
+ TP_PROTO(const struct btrfs_fs_info *fs_info, u64 logical, u64 length,
+ u64 physical, u64 devid),
+
+ TP_ARGS(fs_info, logical, length, physical, devid),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, logical )
+ __field( u64, length )
+ __field( u64, physical )
+ __field( u64, devid )
+ ),
+
+ TP_fast_assign_btrfs(fs_info,
+ __entry->logical = logical;
+ __entry->length = length;
+ __entry->physical = physical;
+ __entry->devid = devid;
+ ),
+
+ TP_printk_btrfs("logical=%llu length=%llu physical=%llu devid=%llu",
+ __entry->logical, __entry->length, __entry->physical,
+ __entry->devid)
+);
+
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index dbb8b96da50d..7c29d82db9ee 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -333,6 +333,8 @@ struct btrfs_ioctl_fs_info_args {
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13)
+#define BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE (1ULL << 14)
+#define BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA (1ULL << 16)
struct btrfs_ioctl_feature_flags {
__u64 compat_flags;
@@ -753,6 +755,7 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_QUOTA_CTL_ENABLE 1
#define BTRFS_QUOTA_CTL_DISABLE 2
#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3
+#define BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA 4
struct btrfs_ioctl_quota_ctl_args {
__u64 cmd;
__u64 status;
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index fc3c32186d7e..c25fc9614594 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -73,6 +73,9 @@
/* Holds the block group items for extent tree v2. */
#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
+/* Tracks RAID stripes in block groups. */
+#define BTRFS_RAID_STRIPE_TREE_OBJECTID 12ULL
+
/* device stats in the device tree */
#define BTRFS_DEV_STATS_OBJECTID 0ULL
@@ -231,6 +234,14 @@
#define BTRFS_SHARED_DATA_REF_KEY 184
/*
+ * Special inline ref key which stores the id of the subvolume which originally
+ * created the extent. This subvolume owns the extent permanently from the
+ * perspective of simple quotas. Needed to know which subvolume to free quota
+ * usage from when the extent is deleted.
+ */
+#define BTRFS_EXTENT_OWNER_REF_KEY 188
+
+/*
* block groups give us hints into the extent allocation trees. Which
* blocks are free etc etc
*/
@@ -261,6 +272,8 @@
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
+#define BTRFS_RAID_STRIPE_KEY 230
+
/*
* Records the overall state of the qgroups.
* There's only one instance of this key present,
@@ -719,6 +732,30 @@ struct btrfs_free_space_header {
__le64 num_bitmaps;
} __attribute__ ((__packed__));
+struct btrfs_raid_stride {
+ /* The id of device this raid extent lives on. */
+ __le64 devid;
+ /* The physical location on disk. */
+ __le64 physical;
+} __attribute__ ((__packed__));
+
+/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */
+#define BTRFS_STRIPE_RAID0 1
+#define BTRFS_STRIPE_RAID1 2
+#define BTRFS_STRIPE_DUP 3
+#define BTRFS_STRIPE_RAID10 4
+#define BTRFS_STRIPE_RAID5 5
+#define BTRFS_STRIPE_RAID6 6
+#define BTRFS_STRIPE_RAID1C3 7
+#define BTRFS_STRIPE_RAID1C4 8
+
+struct btrfs_stripe_extent {
+ __u8 encoding;
+ __u8 reserved[7];
+ /* An array of raid strides this stripe is composed of. */
+ struct btrfs_raid_stride strides[];
+} __attribute__ ((__packed__));
+
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
@@ -787,6 +824,10 @@ struct btrfs_shared_data_ref {
__le32 count;
} __attribute__ ((__packed__));
+struct btrfs_extent_owner_ref {
+ __le64 root_id;
+} __attribute__ ((__packed__));
+
struct btrfs_extent_inline_ref {
__u8 type;
__le64 offset;
@@ -1204,9 +1245,17 @@ static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
*/
#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
+/*
+ * Whether or not this filesystem is using simple quotas. Not exactly the
+ * incompat bit, because we support using simple quotas, disabling it, then
+ * going back to full qgroup quotas.
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE (1ULL << 3)
+
#define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \
BTRFS_QGROUP_STATUS_FLAG_RESCAN | \
- BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
+ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | \
+ BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE)
#define BTRFS_QGROUP_STATUS_VERSION 1
@@ -1228,6 +1277,15 @@ struct btrfs_qgroup_status_item {
* of the scan. It contains a logical address
*/
__le64 rescan;
+
+ /*
+ * The generation when quotas were last enabled. Used by simple quotas to
+ * avoid decrementing when freeing an extent that was written before
+ * enable.
+ *
+ * Set only if flags contain BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE.
+ */
+ __le64 enable_gen;
} __attribute__ ((__packed__));
struct btrfs_qgroup_info_item {