summaryrefslogtreecommitdiff
path: root/fs/ext4/mballoc.h
diff options
context:
space:
mode:
authorHarshad Shirwadkar <harshadshirwadkar@gmail.com>2021-04-01 10:21:27 -0700
committerTheodore Ts'o <tytso@mit.edu>2021-04-09 11:34:59 -0400
commit196e402adf2e4cd66f101923409f1970ec5f1af3 (patch)
tree28a178b4e874a5789fb208c7e4e1c3fc3b93a078 /fs/ext4/mballoc.h
parent4b68f6df105966f04f45f1eca0561b86f2b3551d (diff)
ext4: improve cr 0 / cr 1 group scanning
Instead of traversing through groups linearly, scan groups in specific orders at cr 0 and cr 1. At cr 0, we want to find groups that have the largest free order >= the order of the request. So, with this patch, we maintain lists for each possible order and insert each group into a list based on the largest free order in its buddy bitmap. During cr 0 allocation, we traverse these lists in the increasing order of largest free orders. This allows us to find a group with the best available cr 0 match in constant time. If nothing can be found, we fallback to cr 1 immediately. At CR1, the story is slightly different. We want to traverse in the order of increasing average fragment size. For CR1, we maintain a rb tree of groupinfos which is sorted by average fragment size. Instead of traversing linearly, at CR1, we traverse in the order of increasing average fragment size, starting at the most optimal group. This brings down cr 1 search complexity to log(num groups). For cr >= 2, we just perform the linear search as before. Also, in case of lock contention, we intermittently fallback to linear search even in CR 0 and CR 1 cases. This allows us to proceed during the allocation path even in case of high contention. There is an opportunity to do optimization at CR2 too. That's because at CR2 we only consider groups where bb_free counter (number of free blocks) is greater than the request extent size. That's left as future work. All the changes introduced in this patch are protected under a new mount option "mb_optimize_scan". With this patchset, following experiment was performed: Created a highly fragmented disk of size 65TB. The disk had no contiguous 2M regions. Following command was run consecutively for 3 times: time dd if=/dev/urandom of=file bs=2M count=10 Here are the results with and without cr 0/1 optimizations introduced in this patch: |---------+------------------------------+---------------------------| | | Without CR 0/1 Optimizations | With CR 0/1 Optimizations | |---------+------------------------------+---------------------------| | 1st run | 5m1.871s | 2m47.642s | | 2nd run | 2m28.390s | 0m0.611s | | 3rd run | 2m26.530s | 0m1.255s | |---------+------------------------------+---------------------------| Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com> Reported-by: kernel test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Reviewed-by: Andreas Dilger <adilger@dilger.ca> Link: https://lore.kernel.org/r/20210401172129.189766-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.h')
-rw-r--r--fs/ext4/mballoc.h17
1 files changed, 16 insertions, 1 deletions
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 68111a10cfee..02585e3cbcad 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -79,6 +79,18 @@
#define MB_DEFAULT_MAX_INODE_PREALLOC 512
/*
+ * Number of groups to search linearly before performing group scanning
+ * optimization.
+ */
+#define MB_DEFAULT_LINEAR_LIMIT 4
+
+/*
+ * Minimum number of groups that should be present in the file system to perform
+ * group scanning optimizations.
+ */
+#define MB_DEFAULT_LINEAR_SCAN_THRESHOLD 16
+
+/*
* Number of valid buddy orders
*/
#define MB_NUM_ORDERS(sb) ((sb)->s_blocksize_bits + 2)
@@ -166,11 +178,14 @@ struct ext4_allocation_context {
/* copy of the best found extent taken before preallocation efforts */
struct ext4_free_extent ac_f_ex;
+ ext4_group_t ac_last_optimal_group;
+ __u32 ac_groups_considered;
+ __u32 ac_flags; /* allocation hints */
__u16 ac_groups_scanned;
+ __u16 ac_groups_linear_remaining;
__u16 ac_found;
__u16 ac_tail;
__u16 ac_buddy;
- __u16 ac_flags; /* allocation hints */
__u8 ac_status;
__u8 ac_criteria;
__u8 ac_2order; /* if request is to allocate 2^N blocks and