diff options
Diffstat (limited to 'fs/btrfs/compression.c')
| -rw-r--r-- | fs/btrfs/compression.c | 720 |
1 files changed, 469 insertions, 251 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8818ed5c390f..6b3357287b42 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -20,12 +20,11 @@ #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> +#include <linux/shrinker.h> #include <crypto/hash.h> #include "misc.h" #include "ctree.h" #include "fs.h" -#include "disk-io.h" -#include "transaction.h" #include "btrfs_inode.h" #include "bio.h" #include "ordered-data.h" @@ -33,8 +32,7 @@ #include "extent_io.h" #include "extent_map.h" #include "subpage.h" -#include "zoned.h" -#include "file-item.h" +#include "messages.h" #include "super.h" static struct bio_set btrfs_compressed_bioset; @@ -69,9 +67,7 @@ static struct compressed_bio *alloc_compressed_bio(struct btrfs_inode *inode, bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op, GFP_NOFS, &btrfs_compressed_bioset)); - btrfs_bio_init(bbio, inode->root->fs_info, end_io, NULL); - bbio->inode = inode; - bbio->file_offset = start; + btrfs_bio_init(bbio, inode, start, end_io, NULL); return to_compressed_bio(bbio); } @@ -92,20 +88,20 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len) } static int compression_compress_pages(int type, struct list_head *ws, - struct address_space *mapping, u64 start, struct page **pages, - unsigned long *out_pages, unsigned long *total_in, - unsigned long *total_out) + struct btrfs_inode *inode, u64 start, + struct folio **folios, unsigned long *out_folios, + unsigned long *total_in, unsigned long *total_out) { switch (type) { case BTRFS_COMPRESS_ZLIB: - return zlib_compress_pages(ws, mapping, start, pages, - out_pages, total_in, total_out); + return zlib_compress_folios(ws, inode, start, folios, + out_folios, total_in, total_out); case BTRFS_COMPRESS_LZO: - return lzo_compress_pages(ws, mapping, start, pages, - out_pages, total_in, total_out); + return lzo_compress_folios(ws, inode, start, folios, + out_folios, total_in, total_out); case BTRFS_COMPRESS_ZSTD: - return zstd_compress_pages(ws, mapping, start, pages, - out_pages, total_in, total_out); + return zstd_compress_folios(ws, inode, start, folios, + out_folios, total_in, total_out); case BTRFS_COMPRESS_NONE: default: /* @@ -117,7 +113,7 @@ static int compression_compress_pages(int type, struct list_head *ws, * Not a big deal, just need to inform caller that we * haven't allocated any pages yet. */ - *out_pages = 0; + *out_folios = 0; return -E2BIG; } } @@ -140,16 +136,16 @@ static int compression_decompress_bio(struct list_head *ws, } static int compression_decompress(int type, struct list_head *ws, - const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + const u8 *data_in, struct folio *dest_folio, + unsigned long dest_pgoff, size_t srclen, size_t destlen) { switch (type) { - case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); - case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); - case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_folio, + dest_pgoff, srclen, destlen); + case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_folio, + dest_pgoff, srclen, destlen); + case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_folio, + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* @@ -160,16 +156,118 @@ static int compression_decompress(int type, struct list_head *ws, } } -static void btrfs_free_compressed_pages(struct compressed_bio *cb) +static void btrfs_free_compressed_folios(struct compressed_bio *cb) { - for (unsigned int i = 0; i < cb->nr_pages; i++) - put_page(cb->compressed_pages[i]); - kfree(cb->compressed_pages); + for (unsigned int i = 0; i < cb->nr_folios; i++) + btrfs_free_compr_folio(cb->compressed_folios[i]); + kfree(cb->compressed_folios); } static int btrfs_decompress_bio(struct compressed_bio *cb); -static void end_compressed_bio_read(struct btrfs_bio *bbio) +/* + * Global cache of last unused pages for compression/decompression. + */ +static struct btrfs_compr_pool { + struct shrinker *shrinker; + spinlock_t lock; + struct list_head list; + int count; + int thresh; +} compr_pool; + +static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc) +{ + int ret; + + /* + * We must not read the values more than once if 'ret' gets expanded in + * the return statement so we don't accidentally return a negative + * number, even if the first condition finds it positive. + */ + ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh); + + return ret > 0 ? ret : 0; +} + +static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc) +{ + LIST_HEAD(remove); + struct list_head *tmp, *next; + int freed; + + if (compr_pool.count == 0) + return SHRINK_STOP; + + /* For now, just simply drain the whole list. */ + spin_lock(&compr_pool.lock); + list_splice_init(&compr_pool.list, &remove); + freed = compr_pool.count; + compr_pool.count = 0; + spin_unlock(&compr_pool.lock); + + list_for_each_safe(tmp, next, &remove) { + struct page *page = list_entry(tmp, struct page, lru); + + ASSERT(page_ref_count(page) == 1); + put_page(page); + } + + return freed; +} + +/* + * Common wrappers for page allocation from compression wrappers + */ +struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info) +{ + struct folio *folio = NULL; + + /* For bs > ps cases, no cached folio pool for now. */ + if (fs_info->block_min_order) + goto alloc; + + spin_lock(&compr_pool.lock); + if (compr_pool.count > 0) { + folio = list_first_entry(&compr_pool.list, struct folio, lru); + list_del_init(&folio->lru); + compr_pool.count--; + } + spin_unlock(&compr_pool.lock); + + if (folio) + return folio; + +alloc: + return folio_alloc(GFP_NOFS, fs_info->block_min_order); +} + +void btrfs_free_compr_folio(struct folio *folio) +{ + bool do_free = false; + + /* The folio is from bs > ps fs, no cached pool for now. */ + if (folio_order(folio)) + goto free; + + spin_lock(&compr_pool.lock); + if (compr_pool.count > compr_pool.thresh) { + do_free = true; + } else { + list_add(&folio->lru, &compr_pool.list); + compr_pool.count++; + } + spin_unlock(&compr_pool.lock); + + if (!do_free) + return; + +free: + ASSERT(folio_ref_count(folio) == 1); + folio_put(folio); +} + +static void end_bbio_compressed_read(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); blk_status_t status = bbio->bio.bi_status; @@ -177,7 +275,7 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio) if (!status) status = errno_to_blk_status(btrfs_decompress_bio(cb)); - btrfs_free_compressed_pages(cb); + btrfs_free_compressed_folios(cb); btrfs_bio_end_io(cb->orig_bbio, status); bio_put(&bbio->bio); } @@ -189,16 +287,16 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio) static noinline void end_compressed_writeback(const struct compressed_bio *cb) { struct inode *inode = &cb->bbio.inode->vfs_inode; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - unsigned long index = cb->start >> PAGE_SHIFT; - unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; + struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); + pgoff_t index = cb->start >> PAGE_SHIFT; + const pgoff_t end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct folio_batch fbatch; - const int errno = blk_status_to_errno(cb->bbio.bio.bi_status); int i; int ret; - if (errno) - mapping_set_error(inode->i_mapping, errno); + ret = blk_status_to_errno(cb->bbio.bio.bi_status); + if (ret) + mapping_set_error(inode->i_mapping, ret); folio_batch_init(&fbatch); while (index <= end_index) { @@ -211,30 +309,14 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb) for (i = 0; i < ret; i++) { struct folio *folio = fbatch.folios[i]; - btrfs_page_clamp_clear_writeback(fs_info, &folio->page, - cb->start, cb->len); + btrfs_folio_clamp_clear_writeback(fs_info, folio, + cb->start, cb->len); } folio_batch_release(&fbatch); } /* the inode may be gone now */ } -static void btrfs_finish_compressed_write_work(struct work_struct *work) -{ - struct compressed_bio *cb = - container_of(work, struct compressed_bio, write_end_work); - - btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len, - cb->bbio.bio.bi_status == BLK_STS_OK); - - if (cb->writeback) - end_compressed_writeback(cb); - /* Note, our inode could be gone now */ - - btrfs_free_compressed_pages(cb); - bio_put(&cb->bbio.bio); -} - /* * Do the cleanup once all the compressed pages hit the disk. This will clear * writeback on the file pages and free the compressed pages. @@ -242,26 +324,36 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work) * This also calls the writeback end hooks for the file pages so that metadata * and checksums can be updated in the file. */ -static void end_compressed_bio_write(struct btrfs_bio *bbio) +static void end_bbio_compressed_write(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); - struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; - queue_work(fs_info->compressed_write_workers, &cb->write_end_work); + btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len, + cb->bbio.bio.bi_status == BLK_STS_OK); + + if (cb->writeback) + end_compressed_writeback(cb); + /* Note, our inode could be gone now. */ + btrfs_free_compressed_folios(cb); + bio_put(&cb->bbio.bio); } -static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb) +static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) { struct bio *bio = &cb->bbio.bio; u32 offset = 0; + unsigned int findex = 0; while (offset < cb->compressed_len) { - u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE); + struct folio *folio = cb->compressed_folios[findex]; + u32 len = min_t(u32, cb->compressed_len - offset, folio_size(folio)); + int ret; /* Maximum compressed extent is smaller than bio size limit. */ - __bio_add_page(bio, cb->compressed_pages[offset >> PAGE_SHIFT], - len, 0); + ret = bio_add_folio(bio, folio, len, 0); + ASSERT(ret); offset += len; + findex++; } } @@ -275,12 +367,12 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb) * the end io hooks. */ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, - struct page **compressed_pages, - unsigned int nr_pages, + struct folio **compressed_folios, + unsigned int nr_folios, blk_opf_t write_flags, bool writeback) { - struct btrfs_inode *inode = BTRFS_I(ordered->inode); + struct btrfs_inode *inode = ordered->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct compressed_bio *cb; @@ -289,19 +381,18 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, cb = alloc_compressed_bio(inode, ordered->file_offset, REQ_OP_WRITE | write_flags, - end_compressed_bio_write); + end_bbio_compressed_write); cb->start = ordered->file_offset; cb->len = ordered->num_bytes; - cb->compressed_pages = compressed_pages; + cb->compressed_folios = compressed_folios; cb->compressed_len = ordered->disk_num_bytes; cb->writeback = writeback; - INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work); - cb->nr_pages = nr_pages; + cb->nr_folios = nr_folios; cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; cb->bbio.ordered = ordered; - btrfs_add_compressed_bio_pages(cb); + btrfs_add_compressed_bio_folios(cb); - btrfs_submit_bio(&cb->bbio, 0); + btrfs_submit_bbio(&cb->bbio, 0); } /* @@ -320,13 +411,13 @@ static noinline int add_ra_bio_pages(struct inode *inode, struct compressed_bio *cb, int *memstall, unsigned long *pflags) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - unsigned long end_index; + struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); + pgoff_t end_index; struct bio *orig_bio = &cb->orig_bbio->bio; u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size; u64 isize = i_size_read(inode); int ret; - struct page *page; + struct folio *folio; struct extent_map *em; struct address_space *mapping = inode->i_mapping; struct extent_map_tree *em_tree; @@ -346,22 +437,30 @@ static noinline int add_ra_bio_pages(struct inode *inode, * This makes readahead less effective, so here disable readahead for * subpage for now, until full compressed write is supported. */ - if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE) + if (fs_info->sectorsize < PAGE_SIZE) + return 0; + + /* For bs > ps cases, we don't support readahead for compressed folios for now. */ + if (fs_info->block_min_order) return 0; end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; while (cur < compressed_end) { - u64 page_end; - u64 pg_index = cur >> PAGE_SHIFT; + pgoff_t page_end; + pgoff_t pg_index = cur >> PAGE_SHIFT; u32 add_size; if (pg_index > end_index) break; - page = xa_load(&mapping->i_pages, pg_index); - if (page && !xa_is_value(page)) { - sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >> + folio = filemap_get_folio(mapping, pg_index); + if (!IS_ERR(folio)) { + u64 folio_sz = folio_size(folio); + u64 offset = offset_in_folio(folio, cur); + + folio_put(folio); + sectors_missed += (folio_sz - offset) >> fs_info->sectorsize_bits; /* Beyond threshold, no need to continue */ @@ -372,38 +471,38 @@ static noinline int add_ra_bio_pages(struct inode *inode, * Jump to next page start as we already have page for * current offset. */ - cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE; + cur += (folio_sz - offset); continue; } - page = __page_cache_alloc(mapping_gfp_constraint(mapping, - ~__GFP_FS)); - if (!page) + folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS), + 0, NULL); + if (!folio) break; - if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) { - put_page(page); + if (filemap_add_folio(mapping, folio, pg_index, GFP_NOFS)) { /* There is already a page, skip to page end */ - cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE; + cur += folio_size(folio); + folio_put(folio); continue; } - if (!*memstall && PageWorkingset(page)) { + if (!*memstall && folio_test_workingset(folio)) { psi_memstall_enter(pflags); *memstall = 1; } - ret = set_page_extent_mapped(page); + ret = set_folio_extent_mapped(folio); if (ret < 0) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); break; } - page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1; - lock_extent(tree, cur, page_end, NULL); + page_end = (pg_index << PAGE_SHIFT) + folio_size(folio) - 1; + btrfs_lock_extent(tree, cur, page_end, NULL); read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur); + em = btrfs_lookup_extent_mapping(em_tree, cur, page_end + 1 - cur); read_unlock(&em_tree->lock); /* @@ -412,32 +511,33 @@ static noinline int add_ra_bio_pages(struct inode *inode, * to this compressed extent on disk. */ if (!em || cur < em->start || - (cur + fs_info->sectorsize > extent_map_end(em)) || - (em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) { - free_extent_map(em); - unlock_extent(tree, cur, page_end, NULL); - unlock_page(page); - put_page(page); + (cur + fs_info->sectorsize > btrfs_extent_map_end(em)) || + (btrfs_extent_map_block_start(em) >> SECTOR_SHIFT) != + orig_bio->bi_iter.bi_sector) { + btrfs_free_extent_map(em); + btrfs_unlock_extent(tree, cur, page_end, NULL); + folio_unlock(folio); + folio_put(folio); break; } - free_extent_map(em); + add_size = min(em->start + em->len, page_end + 1) - cur; + btrfs_free_extent_map(em); + btrfs_unlock_extent(tree, cur, page_end, NULL); - if (page->index == end_index) { - size_t zero_offset = offset_in_page(isize); + if (folio_contains(folio, end_index)) { + size_t zero_offset = offset_in_folio(folio, isize); if (zero_offset) { int zeros; - zeros = PAGE_SIZE - zero_offset; - memzero_page(page, zero_offset, zeros); + zeros = folio_size(folio) - zero_offset; + folio_zero_range(folio, zero_offset, zeros); } } - add_size = min(em->start + em->len, page_end + 1) - cur; - ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur)); - if (ret != add_size) { - unlock_extent(tree, cur, page_end, NULL); - unlock_page(page); - put_page(page); + if (!bio_add_folio(orig_bio, folio, add_size, + offset_in_folio(folio, cur))) { + folio_unlock(folio); + folio_put(folio); break; } /* @@ -446,8 +546,8 @@ static noinline int add_ra_bio_pages(struct inode *inode, * subpage::readers and to unlock the page. */ if (fs_info->sectorsize < PAGE_SIZE) - btrfs_subpage_start_reader(fs_info, page, cur, add_size); - put_page(page); + btrfs_folio_set_lock(fs_info, folio, cur, add_size); + folio_put(folio); cur += add_size; } return 0; @@ -477,45 +577,47 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) struct extent_map *em; unsigned long pflags; int memstall = 0; - blk_status_t ret; - int ret2; + blk_status_t status; + int ret; /* we need the actual starting offset of this extent in the file */ read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); + em = btrfs_lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); read_unlock(&em_tree->lock); if (!em) { - ret = BLK_STS_IOERR; + status = BLK_STS_IOERR; goto out; } - ASSERT(em->compress_type != BTRFS_COMPRESS_NONE); - compressed_len = em->block_len; + ASSERT(btrfs_extent_map_is_compressed(em)); + compressed_len = em->disk_num_bytes; cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ, - end_compressed_bio_read); + end_bbio_compressed_read); - cb->start = em->orig_start; + cb->start = em->start - em->offset; em_len = em->len; em_start = em->start; cb->len = bbio->bio.bi_iter.bi_size; cb->compressed_len = compressed_len; - cb->compress_type = em->compress_type; + cb->compress_type = btrfs_extent_map_compression(em); cb->orig_bbio = bbio; + cb->bbio.csum_search_commit_root = bbio->csum_search_commit_root; - free_extent_map(em); + btrfs_free_extent_map(em); - cb->nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_pages = kcalloc(cb->nr_pages, sizeof(struct page *), GFP_NOFS); - if (!cb->compressed_pages) { - ret = BLK_STS_RESOURCE; + cb->nr_folios = DIV_ROUND_UP(compressed_len, btrfs_min_folio_size(fs_info)); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); + if (!cb->compressed_folios) { + status = BLK_STS_RESOURCE; goto out_free_bio; } - ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages); - if (ret2) { - ret = BLK_STS_RESOURCE; + ret = btrfs_alloc_folio_array(cb->nr_folios, fs_info->block_min_order, + cb->compressed_folios); + if (ret) { + status = BLK_STS_RESOURCE; goto out_free_compressed_pages; } @@ -525,20 +627,20 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) /* include any pages we added in add_ra-bio_pages */ cb->len = bbio->bio.bi_iter.bi_size; cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector; - btrfs_add_compressed_bio_pages(cb); + btrfs_add_compressed_bio_folios(cb); if (memstall) psi_memstall_leave(&pflags); - btrfs_submit_bio(&cb->bbio, 0); + btrfs_submit_bbio(&cb->bbio, 0); return; out_free_compressed_pages: - kfree(cb->compressed_pages); + kfree(cb->compressed_folios); out_free_bio: bio_put(&cb->bbio.bio); out: - btrfs_bio_end_io(bbio, ret); + btrfs_bio_end_io(bbio, status); } /* @@ -588,8 +690,6 @@ struct heuristic_ws { struct list_head list; }; -static struct workspace_manager heuristic_wsm; - static void free_heuristic_ws(struct list_head *ws) { struct heuristic_ws *workspace; @@ -602,7 +702,7 @@ static void free_heuristic_ws(struct list_head *ws) kfree(workspace); } -static struct list_head *alloc_heuristic_ws(unsigned int level) +static struct list_head *alloc_heuristic_ws(struct btrfs_fs_info *fs_info) { struct heuristic_ws *ws; @@ -629,11 +729,9 @@ fail: return ERR_PTR(-ENOMEM); } -const struct btrfs_compress_op btrfs_heuristic_compress = { - .workspace_manager = &heuristic_wsm, -}; +const struct btrfs_compress_levels btrfs_heuristic_compress = { 0 }; -static const struct btrfs_compress_op * const btrfs_compress_op[] = { +static const struct btrfs_compress_levels * const btrfs_compress_levels[] = { /* The heuristic is represented as compression type 0 */ &btrfs_heuristic_compress, &btrfs_zlib_compress, @@ -641,13 +739,13 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = { &btrfs_zstd_compress, }; -static struct list_head *alloc_workspace(int type, unsigned int level) +static struct list_head *alloc_workspace(struct btrfs_fs_info *fs_info, int type, int level) { switch (type) { - case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level); - case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level); - case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(level); - case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level); + case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(fs_info); + case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(fs_info, level); + case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(fs_info); + case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(fs_info, level); default: /* * This can't happen, the type is validated several times @@ -673,44 +771,58 @@ static void free_workspace(int type, struct list_head *ws) } } -static void btrfs_init_workspace_manager(int type) +static int alloc_workspace_manager(struct btrfs_fs_info *fs_info, + enum btrfs_compression_type type) { - struct workspace_manager *wsm; + struct workspace_manager *gwsm; struct list_head *workspace; - wsm = btrfs_compress_op[type]->workspace_manager; - INIT_LIST_HEAD(&wsm->idle_ws); - spin_lock_init(&wsm->ws_lock); - atomic_set(&wsm->total_ws, 0); - init_waitqueue_head(&wsm->ws_wait); + ASSERT(fs_info->compr_wsm[type] == NULL); + gwsm = kzalloc(sizeof(*gwsm), GFP_KERNEL); + if (!gwsm) + return -ENOMEM; + + INIT_LIST_HEAD(&gwsm->idle_ws); + spin_lock_init(&gwsm->ws_lock); + atomic_set(&gwsm->total_ws, 0); + init_waitqueue_head(&gwsm->ws_wait); + fs_info->compr_wsm[type] = gwsm; /* * Preallocate one workspace for each compression type so we can * guarantee forward progress in the worst case */ - workspace = alloc_workspace(type, 0); + workspace = alloc_workspace(fs_info, type, 0); if (IS_ERR(workspace)) { - pr_warn( - "BTRFS: cannot preallocate compression workspace, will try later\n"); + btrfs_warn(fs_info, + "cannot preallocate compression workspace for %s, will try later", + btrfs_compress_type2str(type)); } else { - atomic_set(&wsm->total_ws, 1); - wsm->free_ws = 1; - list_add(workspace, &wsm->idle_ws); + atomic_set(&gwsm->total_ws, 1); + gwsm->free_ws = 1; + list_add(workspace, &gwsm->idle_ws); } + return 0; } -static void btrfs_cleanup_workspace_manager(int type) +static void free_workspace_manager(struct btrfs_fs_info *fs_info, + enum btrfs_compression_type type) { - struct workspace_manager *wsman; struct list_head *ws; - - wsman = btrfs_compress_op[type]->workspace_manager; - while (!list_empty(&wsman->idle_ws)) { - ws = wsman->idle_ws.next; + struct workspace_manager *gwsm = fs_info->compr_wsm[type]; + + /* ZSTD uses its own workspace manager, should enter here. */ + ASSERT(type != BTRFS_COMPRESS_ZSTD && type < BTRFS_NR_COMPRESS_TYPES); + if (!gwsm) + return; + fs_info->compr_wsm[type] = NULL; + while (!list_empty(&gwsm->idle_ws)) { + ws = gwsm->idle_ws.next; list_del(ws); free_workspace(type, ws); - atomic_dec(&wsman->total_ws); + atomic_dec(&gwsm->total_ws); } + kfree(gwsm); } /* @@ -719,9 +831,9 @@ static void btrfs_cleanup_workspace_manager(int type) * Preallocation makes a forward progress guarantees and we do not return * errors. */ -struct list_head *btrfs_get_workspace(int type, unsigned int level) +struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level) { - struct workspace_manager *wsm; + struct workspace_manager *wsm = fs_info->compr_wsm[type]; struct list_head *workspace; int cpus = num_online_cpus(); unsigned nofs_flag; @@ -731,7 +843,7 @@ struct list_head *btrfs_get_workspace(int type, unsigned int level) wait_queue_head_t *ws_wait; int *free_ws; - wsm = btrfs_compress_op[type]->workspace_manager; + ASSERT(wsm); idle_ws = &wsm->idle_ws; ws_lock = &wsm->ws_lock; total_ws = &wsm->total_ws; @@ -767,7 +879,7 @@ again: * context of btrfs_compress_bio/btrfs_compress_pages */ nofs_flag = memalloc_nofs_save(); - workspace = alloc_workspace(type, level); + workspace = alloc_workspace(fs_info, type, level); memalloc_nofs_restore(nofs_flag); if (IS_ERR(workspace)) { @@ -789,22 +901,22 @@ again: /* once per minute */ 60 * HZ, /* no burst */ 1); - if (__ratelimit(&_rs)) { - pr_warn("BTRFS: no compression workspaces, low memory, retrying\n"); - } + if (__ratelimit(&_rs)) + btrfs_warn(fs_info, + "no compression workspaces, low memory, retrying"); } goto again; } return workspace; } -static struct list_head *get_workspace(int type, int level) +static struct list_head *get_workspace(struct btrfs_fs_info *fs_info, int type, int level) { switch (type) { - case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level); - case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level); - case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level); - case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level); + case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(fs_info, type, level); + case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(fs_info, level); + case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(fs_info, type, level); + case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(fs_info, level); default: /* * This can't happen, the type is validated several times @@ -818,21 +930,21 @@ static struct list_head *get_workspace(int type, int level) * put a workspace struct back on the list or free it if we have enough * idle ones sitting around */ -void btrfs_put_workspace(int type, struct list_head *ws) +void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws) { - struct workspace_manager *wsm; + struct workspace_manager *gwsm = fs_info->compr_wsm[type]; struct list_head *idle_ws; spinlock_t *ws_lock; atomic_t *total_ws; wait_queue_head_t *ws_wait; int *free_ws; - wsm = btrfs_compress_op[type]->workspace_manager; - idle_ws = &wsm->idle_ws; - ws_lock = &wsm->ws_lock; - total_ws = &wsm->total_ws; - ws_wait = &wsm->ws_wait; - free_ws = &wsm->free_ws; + ASSERT(gwsm); + idle_ws = &gwsm->idle_ws; + ws_lock = &gwsm->ws_lock; + total_ws = &gwsm->total_ws; + ws_wait = &gwsm->ws_wait; + free_ws = &gwsm->free_ws; spin_lock(ws_lock); if (*free_ws <= num_online_cpus()) { @@ -849,13 +961,13 @@ wake: cond_wake_up(ws_wait); } -static void put_workspace(int type, struct list_head *ws) +static void put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws) { switch (type) { - case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws); - case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws); - case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws); - case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws); + case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(fs_info, type, ws); + case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(fs_info, type, ws); + case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(fs_info, type, ws); + case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(fs_info, ws); default: /* * This can't happen, the type is validated several times @@ -869,19 +981,52 @@ static void put_workspace(int type, struct list_head *ws) * Adjust @level according to the limits of the compression algorithm or * fallback to default */ -static unsigned int btrfs_compress_set_level(int type, unsigned level) +static int btrfs_compress_set_level(unsigned int type, int level) { - const struct btrfs_compress_op *ops = btrfs_compress_op[type]; + const struct btrfs_compress_levels *levels = btrfs_compress_levels[type]; if (level == 0) - level = ops->default_level; + level = levels->default_level; else - level = min(level, ops->max_level); + level = clamp(level, levels->min_level, levels->max_level); return level; } /* + * Check whether the @level is within the valid range for the given type. + */ +bool btrfs_compress_level_valid(unsigned int type, int level) +{ + const struct btrfs_compress_levels *levels = btrfs_compress_levels[type]; + + return levels->min_level <= level && level <= levels->max_level; +} + +/* Wrapper around find_get_page(), with extra error message. */ +int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, + struct folio **in_folio_ret) +{ + struct folio *in_folio; + + /* + * The compressed write path should have the folio locked already, thus + * we only need to grab one reference. + */ + in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT); + if (IS_ERR(in_folio)) { + struct btrfs_inode *inode = BTRFS_I(mapping->host); + + btrfs_crit(inode->root->fs_info, + "failed to get page cache, root %lld ino %llu file offset %llu", + btrfs_root_id(inode->root), btrfs_ino(inode), start); + return -ENOENT; + } + *in_folio_ret = in_folio; + return 0; +} + +/* * Given an address space and start and length, compress the bytes into @pages * that are allocated on demand. * @@ -890,45 +1035,46 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level) * - compression algo are 0-3 * - the level are bits 4-7 * - * @out_pages is an in/out parameter, holds maximum number of pages to allocate - * and returns number of actually allocated pages + * @out_folios is an in/out parameter, holds maximum number of folios to allocate + * and returns number of actually allocated folios * * @total_in is used to return the number of bytes actually read. It * may be smaller than the input length if we had to exit early because we - * ran out of room in the pages array or because we cross the + * ran out of room in the folios array or because we cross the * max_out threshold. * * @total_out is an in/out parameter, must be set to the input length and will * be also used to return the total number of compressed bytes */ -int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, - u64 start, struct page **pages, - unsigned long *out_pages, - unsigned long *total_in, - unsigned long *total_out) +int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode, + u64 start, struct folio **folios, unsigned long *out_folios, + unsigned long *total_in, unsigned long *total_out) { - int type = btrfs_compress_type(type_level); - int level = btrfs_compress_level(type_level); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + const unsigned long orig_len = *total_out; struct list_head *workspace; int ret; level = btrfs_compress_set_level(type, level); - workspace = get_workspace(type, level); - ret = compression_compress_pages(type, workspace, mapping, start, pages, - out_pages, total_in, total_out); - put_workspace(type, workspace); + workspace = get_workspace(fs_info, type, level); + ret = compression_compress_pages(type, workspace, inode, start, folios, + out_folios, total_in, total_out); + /* The total read-in bytes should be no larger than the input. */ + ASSERT(*total_in <= orig_len); + put_workspace(fs_info, type, workspace); return ret; } static int btrfs_decompress_bio(struct compressed_bio *cb) { + struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); struct list_head *workspace; int ret; int type = cb->compress_type; - workspace = get_workspace(type, 0); + workspace = get_workspace(fs_info, type, 0); ret = compression_decompress_bio(workspace, cb); - put_workspace(type, workspace); + put_workspace(fs_info, type, workspace); if (!ret) zero_fill_bio(&cb->orig_bbio->bio); @@ -938,45 +1084,113 @@ static int btrfs_decompress_bio(struct compressed_bio *cb) /* * a less complex decompression routine. Our compressed data fits in a * single page, and we want to read a single page out of it. - * start_byte tells us the offset into the compressed data we're interested in + * dest_pgoff tells us the offset into the destination folio where we write the + * decompressed data. */ -int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) +int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, + unsigned long dest_pgoff, size_t srclen, size_t destlen) { + struct btrfs_fs_info *fs_info = folio_to_fs_info(dest_folio); struct list_head *workspace; + const u32 sectorsize = fs_info->sectorsize; int ret; - workspace = get_workspace(type, 0); - ret = compression_decompress(type, workspace, data_in, dest_page, - start_byte, srclen, destlen); - put_workspace(type, workspace); + /* + * The full destination folio range should not exceed the folio size. + * And the @destlen should not exceed sectorsize, as this is only called for + * inline file extents, which should not exceed sectorsize. + */ + ASSERT(dest_pgoff + destlen <= folio_size(dest_folio) && destlen <= sectorsize); + + workspace = get_workspace(fs_info, type, 0); + ret = compression_decompress(type, workspace, data_in, dest_folio, + dest_pgoff, srclen, destlen); + put_workspace(fs_info, type, workspace); return ret; } +int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info) +{ + int ret; + + ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_NONE); + if (ret < 0) + goto error; + ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB); + if (ret < 0) + goto error; + ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_LZO); + if (ret < 0) + goto error; + ret = zstd_alloc_workspace_manager(fs_info); + if (ret < 0) + goto error; + return 0; +error: + btrfs_free_compress_wsm(fs_info); + return ret; +} + +void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info) +{ + free_workspace_manager(fs_info, BTRFS_COMPRESS_NONE); + free_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB); + free_workspace_manager(fs_info, BTRFS_COMPRESS_LZO); + zstd_free_workspace_manager(fs_info); +} + int __init btrfs_init_compress(void) { if (bioset_init(&btrfs_compressed_bioset, BIO_POOL_SIZE, offsetof(struct compressed_bio, bbio.bio), BIOSET_NEED_BVECS)) return -ENOMEM; - btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE); - btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB); - btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO); - zstd_init_workspace_manager(); + + compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages"); + if (!compr_pool.shrinker) + return -ENOMEM; + + spin_lock_init(&compr_pool.lock); + INIT_LIST_HEAD(&compr_pool.list); + compr_pool.count = 0; + /* 128K / 4K = 32, for 8 threads is 256 pages. */ + compr_pool.thresh = BTRFS_MAX_COMPRESSED / PAGE_SIZE * 8; + compr_pool.shrinker->count_objects = btrfs_compr_pool_count; + compr_pool.shrinker->scan_objects = btrfs_compr_pool_scan; + compr_pool.shrinker->batch = 32; + compr_pool.shrinker->seeks = DEFAULT_SEEKS; + shrinker_register(compr_pool.shrinker); + return 0; } void __cold btrfs_exit_compress(void) { - btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE); - btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB); - btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO); - zstd_cleanup_workspace_manager(); + /* For now scan drains all pages and does not touch the parameters. */ + btrfs_compr_pool_scan(NULL, NULL); + shrinker_free(compr_pool.shrinker); + bioset_exit(&btrfs_compressed_bioset); } /* + * The bvec is a single page bvec from a bio that contains folios from a filemap. + * + * Since the folio may be a large one, and if the bv_page is not a head page of + * a large folio, then page->index is unreliable. + * + * Thus we need this helper to grab the proper file offset. + */ +static u64 file_offset_from_bvec(const struct bio_vec *bvec) +{ + const struct page *page = bvec->bv_page; + const struct folio *folio = page_folio(page); + + return (page_pgoff(folio, page) << PAGE_SHIFT) + bvec->bv_offset; +} + +/* * Copy decompressed data from working buffer to pages. * * @buf: The decompressed data buffer @@ -1021,13 +1235,14 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, u32 copy_start; /* Offset inside the full decompressed extent */ u32 bvec_offset; + void *kaddr; bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter); /* * cb->start may underflow, but subtracting that value can still * give us correct offset inside the full decompressed extent. */ - bvec_offset = page_offset(bvec.bv_page) + bvec.bv_offset - cb->start; + bvec_offset = file_offset_from_bvec(&bvec) - cb->start; /* Haven't reached the bvec range, exit */ if (decompressed + buf_len <= bvec_offset) @@ -1043,10 +1258,12 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, * @buf + @buf_len. */ ASSERT(copy_start - decompressed < buf_len); - memcpy_to_page(bvec.bv_page, bvec.bv_offset, - buf + copy_start - decompressed, copy_len); - cur_offset += copy_len; + kaddr = bvec_kmap_local(&bvec); + memcpy(kaddr, buf + copy_start - decompressed, copy_len); + kunmap_local(kaddr); + + cur_offset += copy_len; bio_advance(orig_bio, copy_len); /* Finished the bio */ if (!orig_bio->bi_iter.bi_size) @@ -1076,7 +1293,7 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len, #define ENTROPY_LVL_HIGH (80) /* - * For increasead precision in shannon_entropy calculation, + * For increased precision in shannon_entropy calculation, * let's do pow(n, M) to save more digits after comma: * * - maximum int bit length is 64 @@ -1302,7 +1519,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, struct heuristic_ws *ws) { struct page *page; - u64 index, index_end; + pgoff_t index, index_end; u32 i, curr_sample_pos; u8 *in_data; @@ -1353,11 +1570,6 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, /* * Compression heuristic. * - * For now is's a naive and optimistic 'return true', we'll extend the logic to - * quickly (compared to direct compression) detect data characteristics - * (compressible/incompressible) to avoid wasting CPU time on incompressible - * data. - * * The following types of analysis can be performed: * - detect mostly zero data * - detect data with low "byte set" size (text, etc) @@ -1365,9 +1577,10 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, * * Return non-zero if the compression should be done, 0 otherwise. */ -int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) +int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end) { - struct list_head *ws_list = get_workspace(0, 0); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct list_head *ws_list = get_workspace(fs_info, 0, 0); struct heuristic_ws *ws; u32 i; u8 byte; @@ -1375,7 +1588,7 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) ws = list_entry(ws_list, struct heuristic_ws, list); - heuristic_collect_sample(inode, start, end, ws); + heuristic_collect_sample(&inode->vfs_inode, start, end, ws); if (sample_repeated_patterns(ws)) { ret = 1; @@ -1436,29 +1649,34 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) } out: - put_workspace(0, ws_list); + put_workspace(fs_info, 0, ws_list); return ret; } /* - * Convert the compression suffix (eg. after "zlib" starting with ":") to - * level, unrecognized string will set the default level + * Convert the compression suffix (eg. after "zlib" starting with ":") to level. + * + * If the resulting level exceeds the algo's supported levels, it will be clamped. + * + * Return <0 if no valid string can be found. + * Return 0 if everything is fine. */ -unsigned int btrfs_compress_str2level(unsigned int type, const char *str) +int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret) { - unsigned int level = 0; + int level = 0; int ret; - if (!type) + if (!type) { + *level_ret = btrfs_compress_set_level(type, level); return 0; + } if (str[0] == ':') { - ret = kstrtouint(str + 1, 10, &level); + ret = kstrtoint(str + 1, 10, &level); if (ret) - level = 0; + return ret; } - level = btrfs_compress_set_level(type, level); - - return level; + *level_ret = btrfs_compress_set_level(type, level); + return 0; } |
