summaryrefslogtreecommitdiff
path: root/fs/btrfs/compression.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 14:04:23 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 14:04:23 -0800
commit31466f3ed710e5761077190809e694f55aed5deb (patch)
tree82b1313807242796e74a29d27282fc11f30f7cd0 /fs/btrfs/compression.c
parent6787dc24b72b88404ae652c914014e51ddf1c4fa (diff)
parent3acbcbfc8f06d4ade2aab2ebba0a2542a05ce90c (diff)
Merge tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Features or user visible changes: - fallocate: implement zero range mode - avoid losing data raid profile when deleting a device - tree item checker: more checks for directory items and xattrs Notable fixes: - raid56 recovery: don't use cached stripes, that could be potentially changed and a later RMW or recovery would lead to corruptions or failures - let raid56 try harder to rebuild damaged data, reading from all stripes if necessary - fix scrub to repair raid56 in a similar way as in the case above Other: - cleanups: device freeing, removed some call indirections, redundant bio_put/_get, unused parameters, refactorings and renames - RCU list traversal fixups - simplify mount callchain, remove recursing back when mounting a subvolume - plug for fsync, may improve bio merging on multiple devices - compression heurisic: replace heap sort with radix sort, gains some performance - add extent map selftests, buffered write vs dio" * tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits) btrfs: drop devid as device_list_add() arg btrfs: get device pointer from device_list_add() btrfs: set the total_devices in device_list_add() btrfs: move pr_info into device_list_add btrfs: make btrfs_free_stale_devices() to match the path btrfs: rename btrfs_free_stale_devices() arg to skip_dev btrfs: make btrfs_free_stale_devices() argument optional btrfs: make btrfs_free_stale_device() to iterate all stales btrfs: no need to check for btrfs_fs_devices::seeding btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it Btrfs: noinline merge_extent_mapping Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping Btrfs: extent map selftest: dio write vs dio read Btrfs: extent map selftest: buffered write vs dio read Btrfs: add extent map selftests Btrfs: move extent map specific code to extent_map.c Btrfs: add helper for em merge logic Btrfs: fix unexpected EEXIST from btrfs_get_extent Btrfs: fix incorrect block_len in merge_extent_mapping btrfs: Remove unused readahead spinlock ...
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r--fs/btrfs/compression.c137
1 files changed, 118 insertions, 19 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 75610d23d197..07d049c0c20f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -33,7 +33,6 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
-#include <linux/sort.h>
#include <linux/log2.h>
#include "ctree.h"
#include "disk-io.h"
@@ -45,6 +44,21 @@
#include "extent_io.h"
#include "extent_map.h"
+static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
+
+const char* btrfs_compress_type2str(enum btrfs_compression_type type)
+{
+ switch (type) {
+ case BTRFS_COMPRESS_ZLIB:
+ case BTRFS_COMPRESS_LZO:
+ case BTRFS_COMPRESS_ZSTD:
+ case BTRFS_COMPRESS_NONE:
+ return btrfs_compress_types[type];
+ }
+
+ return NULL;
+}
+
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
- bio_get(bio);
-
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
- bio_put(bio);
-
bio = btrfs_bio_alloc(bdev, first_byte);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
@@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
first_byte += PAGE_SIZE;
cond_resched();
}
- bio_get(bio);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
- bio_put(bio);
return 0;
}
@@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
- bio_get(comp_bio);
-
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
- bio_put(comp_bio);
-
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
@@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
cur_disk_byte += PAGE_SIZE;
}
- bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
- bio_put(comp_bio);
return 0;
fail2:
@@ -752,6 +754,8 @@ struct heuristic_ws {
u32 sample_size;
/* Buckets store counters for each byte value */
struct bucket_item *bucket;
+ /* Sorting buffer */
+ struct bucket_item *bucket_b;
struct list_head list;
};
@@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws)
kvfree(workspace->sample);
kfree(workspace->bucket);
+ kfree(workspace->bucket_b);
kfree(workspace);
}
@@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void)
if (!ws->bucket)
goto fail;
+ ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
+ if (!ws->bucket_b)
+ goto fail;
+
INIT_LIST_HEAD(&ws->list);
return &ws->list;
fail:
@@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws)
return entropy_sum * 100 / entropy_max;
}
-/* Compare buckets by size, ascending */
-static int bucket_comp_rev(const void *lv, const void *rv)
+#define RADIX_BASE 4U
+#define COUNTERS_SIZE (1U << RADIX_BASE)
+
+static u8 get4bits(u64 num, int shift) {
+ u8 low4bits;
+
+ num >>= shift;
+ /* Reverse order */
+ low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
+ return low4bits;
+}
+
+/*
+ * Use 4 bits as radix base
+ * Use 16 u32 counters for calculating new possition in buf array
+ *
+ * @array - array that will be sorted
+ * @array_buf - buffer array to store sorting results
+ * must be equal in size to @array
+ * @num - array size
+ */
+static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
+ int num)
{
- const struct bucket_item *l = (const struct bucket_item *)lv;
- const struct bucket_item *r = (const struct bucket_item *)rv;
+ u64 max_num;
+ u64 buf_num;
+ u32 counters[COUNTERS_SIZE];
+ u32 new_addr;
+ u32 addr;
+ int bitlen;
+ int shift;
+ int i;
- return r->count - l->count;
+ /*
+ * Try avoid useless loop iterations for small numbers stored in big
+ * counters. Example: 48 33 4 ... in 64bit array
+ */
+ max_num = array[0].count;
+ for (i = 1; i < num; i++) {
+ buf_num = array[i].count;
+ if (buf_num > max_num)
+ max_num = buf_num;
+ }
+
+ buf_num = ilog2(max_num);
+ bitlen = ALIGN(buf_num, RADIX_BASE * 2);
+
+ shift = 0;
+ while (shift < bitlen) {
+ memset(counters, 0, sizeof(counters));
+
+ for (i = 0; i < num; i++) {
+ buf_num = array[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]++;
+ }
+
+ for (i = 1; i < COUNTERS_SIZE; i++)
+ counters[i] += counters[i - 1];
+
+ for (i = num - 1; i >= 0; i--) {
+ buf_num = array[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]--;
+ new_addr = counters[addr];
+ array_buf[new_addr] = array[i];
+ }
+
+ shift += RADIX_BASE;
+
+ /*
+ * Normal radix expects to move data from a temporary array, to
+ * the main one. But that requires some CPU time. Avoid that
+ * by doing another sort iteration to original array instead of
+ * memcpy()
+ */
+ memset(counters, 0, sizeof(counters));
+
+ for (i = 0; i < num; i ++) {
+ buf_num = array_buf[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]++;
+ }
+
+ for (i = 1; i < COUNTERS_SIZE; i++)
+ counters[i] += counters[i - 1];
+
+ for (i = num - 1; i >= 0; i--) {
+ buf_num = array_buf[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]--;
+ new_addr = counters[addr];
+ array[new_addr] = array_buf[i];
+ }
+
+ shift += RADIX_BASE;
+ }
}
/*
@@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws)
struct bucket_item *bucket = ws->bucket;
/* Sort in reverse order */
- sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
+ radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count;