summaryrefslogtreecommitdiff
path: root/fs/btrfs/zstd.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/zstd.c')
-rw-r--r--fs/btrfs/zstd.c682
1 files changed, 493 insertions, 189 deletions
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index af6ec59972f5..c9cddcfa337b 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -6,25 +6,35 @@
*/
#include <linux/bio.h>
+#include <linux/bitmap.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/sched/mm.h>
#include <linux/pagemap.h>
#include <linux/refcount.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/zstd.h>
+#include "misc.h"
+#include "fs.h"
+#include "btrfs_inode.h"
#include "compression.h"
+#include "super.h"
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
-#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
+#define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG)
#define ZSTD_BTRFS_DEFAULT_LEVEL 3
+#define ZSTD_BTRFS_MIN_LEVEL -15
+#define ZSTD_BTRFS_MAX_LEVEL 15
+/* 307s to avoid pathologically clashing with transaction commit */
+#define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
-static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
+static zstd_parameters zstd_get_btrfs_parameters(int level,
+ size_t src_len)
{
- ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
- src_len, 0);
+ zstd_parameters params = zstd_get_params(level, src_len);
if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
@@ -36,12 +46,320 @@ struct workspace {
void *mem;
size_t size;
char *buf;
+ int level;
+ int req_level;
+ unsigned long last_used; /* jiffies */
struct list_head list;
- ZSTD_inBuffer in_buf;
- ZSTD_outBuffer out_buf;
+ struct list_head lru_list;
+ zstd_in_buffer in_buf;
+ zstd_out_buffer out_buf;
+ zstd_parameters params;
};
-static void zstd_free_workspace(struct list_head *ws)
+/*
+ * Zstd Workspace Management
+ *
+ * Zstd workspaces have different memory requirements depending on the level.
+ * The zstd workspaces are managed by having individual lists for each level
+ * and a global lru. Forward progress is maintained by protecting a max level
+ * workspace.
+ *
+ * Getting a workspace is done by using the bitmap to identify the levels that
+ * have available workspaces and scans up. This lets us recycle higher level
+ * workspaces because of the monotonic memory guarantee. A workspace's
+ * last_used is only updated if it is being used by the corresponding memory
+ * level. Putting a workspace involves adding it back to the appropriate places
+ * and adding it back to the lru if necessary.
+ *
+ * A timer is used to reclaim workspaces if they have not been used for
+ * ZSTD_BTRFS_RECLAIM_JIFFIES. This helps keep only active workspaces around.
+ * The upper bound is provided by the workqueue limit which is 2 (percpu limit).
+ */
+
+struct zstd_workspace_manager {
+ spinlock_t lock;
+ struct list_head lru_list;
+ struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
+ unsigned long active_map;
+ wait_queue_head_t wait;
+ struct timer_list timer;
+};
+
+static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
+
+static inline struct workspace *list_to_workspace(struct list_head *list)
+{
+ return container_of(list, struct workspace, list);
+}
+
+static inline int clip_level(int level)
+{
+ return max(0, level - 1);
+}
+
+/*
+ * Timer callback to free unused workspaces.
+ *
+ * @t: timer
+ *
+ * This scans the lru_list and attempts to reclaim any workspace that hasn't
+ * been used for ZSTD_BTRFS_RECLAIM_JIFFIES.
+ *
+ * The context is softirq and does not need the _bh locking primitives.
+ */
+static void zstd_reclaim_timer_fn(struct timer_list *timer)
+{
+ struct zstd_workspace_manager *zwsm =
+ container_of(timer, struct zstd_workspace_manager, timer);
+ unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
+ struct list_head *pos, *next;
+
+ spin_lock(&zwsm->lock);
+
+ if (list_empty(&zwsm->lru_list)) {
+ spin_unlock(&zwsm->lock);
+ return;
+ }
+
+ list_for_each_prev_safe(pos, next, &zwsm->lru_list) {
+ struct workspace *victim = container_of(pos, struct workspace,
+ lru_list);
+ int level;
+
+ if (time_after(victim->last_used, reclaim_threshold))
+ break;
+
+ /* workspace is in use */
+ if (victim->req_level)
+ continue;
+
+ level = victim->level;
+ list_del(&victim->lru_list);
+ list_del(&victim->list);
+ zstd_free_workspace(&victim->list);
+
+ if (list_empty(&zwsm->idle_ws[level]))
+ clear_bit(level, &zwsm->active_map);
+
+ }
+
+ if (!list_empty(&zwsm->lru_list))
+ mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
+
+ spin_unlock(&zwsm->lock);
+}
+
+/*
+ * Calculate monotonic memory bounds.
+ *
+ * It is possible based on the level configurations that a higher level
+ * workspace uses less memory than a lower level workspace. In order to reuse
+ * workspaces, this must be made a monotonic relationship. This precomputes
+ * the required memory for each level and enforces the monotonicity between
+ * level and memory required.
+ */
+static void zstd_calc_ws_mem_sizes(void)
+{
+ size_t max_size = 0;
+ int level;
+
+ for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
+ if (level == 0)
+ continue;
+ zstd_parameters params =
+ zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
+ size_t level_size =
+ max_t(size_t,
+ zstd_cstream_workspace_bound(&params.cParams),
+ zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
+
+ max_size = max_t(size_t, max_size, level_size);
+ /* Use level 1 workspace size for all the fast mode negative levels. */
+ zstd_ws_mem_sizes[clip_level(level)] = max_size;
+ }
+}
+
+int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info)
+{
+ struct zstd_workspace_manager *zwsm;
+ struct list_head *ws;
+
+ ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL);
+ zwsm = kzalloc(sizeof(*zwsm), GFP_KERNEL);
+ if (!zwsm)
+ return -ENOMEM;
+ zstd_calc_ws_mem_sizes();
+ spin_lock_init(&zwsm->lock);
+ init_waitqueue_head(&zwsm->wait);
+ timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0);
+
+ INIT_LIST_HEAD(&zwsm->lru_list);
+ for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
+ INIT_LIST_HEAD(&zwsm->idle_ws[i]);
+ fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm;
+
+ ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL);
+ if (IS_ERR(ws)) {
+ btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
+ } else {
+ set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map);
+ list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
+ }
+ return 0;
+}
+
+void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info)
+{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
+ struct workspace *workspace;
+
+ if (!zwsm)
+ return;
+ fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL;
+ spin_lock_bh(&zwsm->lock);
+ for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
+ while (!list_empty(&zwsm->idle_ws[i])) {
+ workspace = container_of(zwsm->idle_ws[i].next,
+ struct workspace, list);
+ list_del(&workspace->list);
+ list_del(&workspace->lru_list);
+ zstd_free_workspace(&workspace->list);
+ }
+ }
+ spin_unlock_bh(&zwsm->lock);
+ timer_delete_sync(&zwsm->timer);
+ kfree(zwsm);
+}
+
+/*
+ * Find workspace for given level.
+ *
+ * @level: compression level
+ *
+ * This iterates over the set bits in the active_map beginning at the requested
+ * compression level. This lets us utilize already allocated workspaces before
+ * allocating a new one. If the workspace is of a larger size, it is used, but
+ * the place in the lru_list and last_used times are not updated. This is to
+ * offer the opportunity to reclaim the workspace in favor of allocating an
+ * appropriately sized one in the future.
+ */
+static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level)
+{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
+ struct list_head *ws;
+ struct workspace *workspace;
+ int i = clip_level(level);
+
+ ASSERT(zwsm);
+ spin_lock_bh(&zwsm->lock);
+ for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) {
+ if (!list_empty(&zwsm->idle_ws[i])) {
+ ws = zwsm->idle_ws[i].next;
+ workspace = list_to_workspace(ws);
+ list_del_init(ws);
+ /* keep its place if it's a lower level using this */
+ workspace->req_level = level;
+ if (clip_level(level) == workspace->level)
+ list_del(&workspace->lru_list);
+ if (list_empty(&zwsm->idle_ws[i]))
+ clear_bit(i, &zwsm->active_map);
+ spin_unlock_bh(&zwsm->lock);
+ return ws;
+ }
+ }
+ spin_unlock_bh(&zwsm->lock);
+
+ return NULL;
+}
+
+/*
+ * Zstd get_workspace for level.
+ *
+ * @level: compression level
+ *
+ * If @level is 0, then any compression level can be used. Therefore, we begin
+ * scanning from 1. We first scan through possible workspaces and then after
+ * attempt to allocate a new workspace. If we fail to allocate one due to
+ * memory pressure, go to sleep waiting for the max level workspace to free up.
+ */
+struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level)
+{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
+ struct list_head *ws;
+ unsigned int nofs_flag;
+
+ ASSERT(zwsm);
+
+ /* level == 0 means we can use any workspace */
+ if (!level)
+ level = 1;
+
+again:
+ ws = zstd_find_workspace(fs_info, level);
+ if (ws)
+ return ws;
+
+ nofs_flag = memalloc_nofs_save();
+ ws = zstd_alloc_workspace(fs_info, level);
+ memalloc_nofs_restore(nofs_flag);
+
+ if (IS_ERR(ws)) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE);
+ schedule();
+ finish_wait(&zwsm->wait, &wait);
+
+ goto again;
+ }
+
+ return ws;
+}
+
+/*
+ * Zstd put_workspace.
+ *
+ * @ws: list_head for the workspace
+ *
+ * When putting back a workspace, we only need to update the LRU if we are of
+ * the requested compression level. Here is where we continue to protect the
+ * max level workspace or update last_used accordingly. If the reclaim timer
+ * isn't set, it is also set here. Only the max level workspace tries and wakes
+ * up waiting workspaces.
+ */
+void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws)
+{
+ struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
+ struct workspace *workspace = list_to_workspace(ws);
+
+ ASSERT(zwsm);
+ spin_lock_bh(&zwsm->lock);
+
+ /* A node is only taken off the lru if we are the corresponding level */
+ if (clip_level(workspace->req_level) == workspace->level) {
+ /* Hide a max level workspace from reclaim */
+ if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
+ INIT_LIST_HEAD(&workspace->lru_list);
+ } else {
+ workspace->last_used = jiffies;
+ list_add(&workspace->lru_list, &zwsm->lru_list);
+ if (!timer_pending(&zwsm->timer))
+ mod_timer(&zwsm->timer,
+ jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
+ }
+ }
+
+ set_bit(workspace->level, &zwsm->active_map);
+ list_add(&workspace->list, &zwsm->idle_ws[workspace->level]);
+ workspace->req_level = 0;
+
+ spin_unlock_bh(&zwsm->lock);
+
+ if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
+ cond_wake_up(&zwsm->wait);
+}
+
+void zstd_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
@@ -50,25 +368,27 @@ static void zstd_free_workspace(struct list_head *ws)
kfree(workspace);
}
-static struct list_head *zstd_alloc_workspace(void)
+struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level)
{
- ZSTD_parameters params =
- zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT);
+ const u32 blocksize = fs_info->sectorsize;
struct workspace *workspace;
workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
if (!workspace)
return ERR_PTR(-ENOMEM);
- workspace->size = max_t(size_t,
- ZSTD_CStreamWorkspaceBound(params.cParams),
- ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
- workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
- workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ /* Use level 1 workspace size for all the fast mode negative levels. */
+ workspace->size = zstd_ws_mem_sizes[clip_level(level)];
+ workspace->level = clip_level(level);
+ workspace->req_level = level;
+ workspace->last_used = jiffies;
+ workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
+ workspace->buf = kmalloc(blocksize, GFP_KERNEL);
if (!workspace->mem || !workspace->buf)
goto fail;
INIT_LIST_HEAD(&workspace->list);
+ INIT_LIST_HEAD(&workspace->lru_list);
return &workspace->list;
fail:
@@ -76,72 +396,82 @@ fail:
return ERR_PTR(-ENOMEM);
}
-static int zstd_compress_pages(struct list_head *ws,
- struct address_space *mapping,
- u64 start,
- struct page **pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out)
+int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
+ u64 start, struct folio **folios, unsigned long *out_folios,
+ unsigned long *total_in, unsigned long *total_out)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
- ZSTD_CStream *stream;
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ zstd_cstream *stream;
int ret = 0;
- int nr_pages = 0;
- struct page *in_page = NULL; /* The current page to read */
- struct page *out_page = NULL; /* The current page to write to */
+ int nr_folios = 0;
+ struct folio *in_folio = NULL; /* The current folio to read. */
+ struct folio *out_folio = NULL; /* The current folio to write to. */
unsigned long tot_in = 0;
unsigned long tot_out = 0;
unsigned long len = *total_out;
- const unsigned long nr_dest_pages = *out_pages;
- unsigned long max_out = nr_dest_pages * PAGE_SIZE;
- ZSTD_parameters params = zstd_get_btrfs_parameters(len);
-
- *out_pages = 0;
+ const unsigned long nr_dest_folios = *out_folios;
+ const u64 orig_end = start + len;
+ const u32 blocksize = fs_info->sectorsize;
+ const u32 min_folio_size = btrfs_min_folio_size(fs_info);
+ unsigned long max_out = nr_dest_folios * min_folio_size;
+ unsigned int cur_len;
+
+ workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len);
+ *out_folios = 0;
*total_out = 0;
*total_in = 0;
/* Initialize the stream */
- stream = ZSTD_initCStream(params, len, workspace->mem,
+ stream = zstd_init_cstream(&workspace->params, len, workspace->mem,
workspace->size);
- if (!stream) {
- pr_warn("BTRFS: ZSTD_initCStream failed\n");
+ if (unlikely(!stream)) {
+ btrfs_err(fs_info,
+ "zstd compression init level %d failed, root %llu inode %llu offset %llu",
+ workspace->req_level, btrfs_root_id(inode->root),
+ btrfs_ino(inode), start);
ret = -EIO;
goto out;
}
/* map in the first page of input data */
- in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- workspace->in_buf.src = kmap(in_page);
+ ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
+ if (ret < 0)
+ goto out;
+ cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
+ workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start));
workspace->in_buf.pos = 0;
- workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
-
+ workspace->in_buf.size = cur_len;
/* Allocate and map in the output buffer */
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
+ out_folio = btrfs_alloc_compr_folio(fs_info);
+ if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
}
- pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ folios[nr_folios++] = out_folio;
+ workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
- workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
while (1) {
size_t ret2;
- ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
+ ret2 = zstd_compress_stream(stream, &workspace->out_buf,
&workspace->in_buf);
- if (ZSTD_isError(ret2)) {
- pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
- ZSTD_getErrorCode(ret2));
+ if (unlikely(zstd_is_error(ret2))) {
+ btrfs_warn(fs_info,
+"zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
+ workspace->req_level, zstd_get_error_code(ret2),
+ btrfs_root_id(inode->root), btrfs_ino(inode),
+ start);
ret = -EIO;
goto out;
}
/* Check to see if we are making it bigger */
- if (tot_in + workspace->in_buf.pos > 8192 &&
+ if (tot_in + workspace->in_buf.pos > blocksize * 2 &&
tot_in + workspace->in_buf.pos <
tot_out + workspace->out_buf.pos) {
ret = -E2BIG;
@@ -157,24 +487,21 @@ static int zstd_compress_pages(struct list_head *ws,
/* Check if we need more output space */
if (workspace->out_buf.pos == workspace->out_buf.size) {
- tot_out += PAGE_SIZE;
- max_out -= PAGE_SIZE;
- kunmap(out_page);
- if (nr_pages == nr_dest_pages) {
- out_page = NULL;
+ tot_out += min_folio_size;
+ max_out -= min_folio_size;
+ if (nr_folios == nr_dest_folios) {
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
+ out_folio = btrfs_alloc_compr_folio(fs_info);
+ if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
}
- pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ folios[nr_folios++] = out_folio;
+ workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
- workspace->out_buf.size = min_t(size_t, max_out,
- PAGE_SIZE);
+ workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
}
/* We've reached the end of the input */
@@ -185,25 +512,32 @@ static int zstd_compress_pages(struct list_head *ws,
/* Check if we need more input */
if (workspace->in_buf.pos == workspace->in_buf.size) {
- tot_in += PAGE_SIZE;
- kunmap(in_page);
- put_page(in_page);
-
- start += PAGE_SIZE;
- len -= PAGE_SIZE;
- in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- workspace->in_buf.src = kmap(in_page);
+ tot_in += workspace->in_buf.size;
+ kunmap_local(workspace->in_buf.src);
+ workspace->in_buf.src = NULL;
+ folio_put(in_folio);
+ start += cur_len;
+ len -= cur_len;
+ ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio);
+ if (ret < 0)
+ goto out;
+ cur_len = btrfs_calc_input_length(in_folio, orig_end, start);
+ workspace->in_buf.src = kmap_local_folio(in_folio,
+ offset_in_folio(in_folio, start));
workspace->in_buf.pos = 0;
- workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
+ workspace->in_buf.size = cur_len;
}
}
while (1) {
size_t ret2;
- ret2 = ZSTD_endStream(stream, &workspace->out_buf);
- if (ZSTD_isError(ret2)) {
- pr_debug("BTRFS: ZSTD_endStream returned %d\n",
- ZSTD_getErrorCode(ret2));
+ ret2 = zstd_end_stream(stream, &workspace->out_buf);
+ if (unlikely(zstd_is_error(ret2))) {
+ btrfs_err(fs_info,
+"zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
+ workspace->req_level, zstd_get_error_code(ret2),
+ btrfs_root_id(inode->root), btrfs_ino(inode),
+ start);
ret = -EIO;
goto out;
}
@@ -217,23 +551,21 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
- tot_out += PAGE_SIZE;
- max_out -= PAGE_SIZE;
- kunmap(out_page);
- if (nr_pages == nr_dest_pages) {
- out_page = NULL;
+ tot_out += min_folio_size;
+ max_out -= min_folio_size;
+ if (nr_folios == nr_dest_folios) {
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
+ out_folio = btrfs_alloc_compr_folio(fs_info);
+ if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
}
- pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ folios[nr_folios++] = out_folio;
+ workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
- workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
}
if (tot_out >= tot_in) {
@@ -245,55 +577,61 @@ static int zstd_compress_pages(struct list_head *ws,
*total_in = tot_in;
*total_out = tot_out;
out:
- *out_pages = nr_pages;
- /* Cleanup */
- if (in_page) {
- kunmap(in_page);
- put_page(in_page);
+ *out_folios = nr_folios;
+ if (workspace->in_buf.src) {
+ kunmap_local(workspace->in_buf.src);
+ folio_put(in_folio);
}
- if (out_page)
- kunmap(out_page);
return ret;
}
-static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
+int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
+ struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct workspace *workspace = list_entry(ws, struct workspace, list);
- struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
+ struct folio **folios_in = cb->compressed_folios;
size_t srclen = cb->compressed_len;
- ZSTD_DStream *stream;
+ zstd_dstream *stream;
int ret = 0;
- unsigned long page_in_index = 0;
- unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
+ const u32 blocksize = fs_info->sectorsize;
+ const unsigned int min_folio_size = btrfs_min_folio_size(fs_info);
+ unsigned long folio_in_index = 0;
+ unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
unsigned long buf_start;
unsigned long total_out = 0;
- stream = ZSTD_initDStream(
+ stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
- if (!stream) {
- pr_debug("BTRFS: ZSTD_initDStream failed\n");
+ if (unlikely(!stream)) {
+ struct btrfs_inode *inode = cb->bbio.inode;
+
+ btrfs_err(inode->root->fs_info,
+ "zstd decompression init failed, root %llu inode %llu offset %llu",
+ btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
ret = -EIO;
goto done;
}
- workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->in_buf.pos = 0;
- workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
- workspace->out_buf.size = PAGE_SIZE;
+ workspace->out_buf.size = blocksize;
while (1) {
size_t ret2;
- ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+ ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
&workspace->in_buf);
- if (ZSTD_isError(ret2)) {
- pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
- ZSTD_getErrorCode(ret2));
+ if (unlikely(zstd_is_error(ret2))) {
+ struct btrfs_inode *inode = cb->bbio.inode;
+
+ btrfs_err(inode->root->fs_info,
+ "zstd decompression failed, error %d root %llu inode %llu offset %llu",
+ zstd_get_error_code(ret2), btrfs_root_id(inode->root),
+ btrfs_ino(inode), cb->start);
ret = -EIO;
goto done;
}
@@ -302,7 +640,7 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->out_buf.pos = 0;
ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
- buf_start, total_out, disk_start, orig_bio);
+ total_out - buf_start, cb, buf_start);
if (ret == 0)
break;
@@ -314,120 +652,86 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
break;
if (workspace->in_buf.pos == workspace->in_buf.size) {
- kunmap(pages_in[page_in_index++]);
- if (page_in_index >= total_pages_in) {
+ kunmap_local(workspace->in_buf.src);
+ folio_in_index++;
+ if (unlikely(folio_in_index >= total_folios_in)) {
workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
- srclen -= PAGE_SIZE;
- workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ srclen -= min_folio_size;
+ workspace->in_buf.src =
+ kmap_local_folio(folios_in[folio_in_index], 0);
workspace->in_buf.pos = 0;
- workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
}
}
ret = 0;
- zero_fill_bio(orig_bio);
done:
if (workspace->in_buf.src)
- kunmap(pages_in[page_in_index]);
+ kunmap_local(workspace->in_buf.src);
return ret;
}
-static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
+int zstd_decompress(struct list_head *ws, const u8 *data_in,
+ struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
+ size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- ZSTD_DStream *stream;
+ struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
+ zstd_dstream *stream;
int ret = 0;
- size_t ret2;
- unsigned long total_out = 0;
- unsigned long pg_offset = 0;
- char *kaddr;
+ unsigned long to_copy = 0;
- stream = ZSTD_initDStream(
+ stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
- if (!stream) {
- pr_warn("BTRFS: ZSTD_initDStream failed\n");
+ if (unlikely(!stream)) {
+ struct btrfs_inode *inode = folio_to_inode(dest_folio);
+
+ btrfs_err(inode->root->fs_info,
+ "zstd decompression init failed, root %llu inode %llu offset %llu",
+ btrfs_root_id(inode->root), btrfs_ino(inode),
+ folio_pos(dest_folio));
ret = -EIO;
goto finish;
}
- destlen = min_t(size_t, destlen, PAGE_SIZE);
-
workspace->in_buf.src = data_in;
workspace->in_buf.pos = 0;
workspace->in_buf.size = srclen;
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
- workspace->out_buf.size = PAGE_SIZE;
-
- ret2 = 1;
- while (pg_offset < destlen
- && workspace->in_buf.pos < workspace->in_buf.size) {
- unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
-
- /* Check if the frame is over and we still need more input */
- if (ret2 == 0) {
- pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
- ret = -EIO;
- goto finish;
- }
- ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
- &workspace->in_buf);
- if (ZSTD_isError(ret2)) {
- pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
- ZSTD_getErrorCode(ret2));
- ret = -EIO;
- goto finish;
- }
-
- buf_start = total_out;
- total_out += workspace->out_buf.pos;
- workspace->out_buf.pos = 0;
-
- if (total_out <= start_byte)
- continue;
-
- if (total_out > start_byte && buf_start < start_byte)
- buf_offset = start_byte - buf_start;
- else
- buf_offset = 0;
-
- bytes = min_t(unsigned long, destlen - pg_offset,
- workspace->out_buf.size - buf_offset);
-
- kaddr = kmap_atomic(dest_page);
- memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
- bytes);
- kunmap_atomic(kaddr);
-
- pg_offset += bytes;
+ workspace->out_buf.size = sectorsize;
+
+ /*
+ * Since both input and output buffers should not exceed one sector,
+ * one call should end the decompression.
+ */
+ ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
+ if (unlikely(zstd_is_error(ret))) {
+ struct btrfs_inode *inode = folio_to_inode(dest_folio);
+
+ btrfs_err(inode->root->fs_info,
+ "zstd decompression failed, error %d root %llu inode %llu offset %llu",
+ zstd_get_error_code(ret), btrfs_root_id(inode->root),
+ btrfs_ino(inode), folio_pos(dest_folio));
+ goto finish;
}
- ret = 0;
+ to_copy = workspace->out_buf.pos;
+ memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy);
finish:
- if (pg_offset < destlen) {
- kaddr = kmap_atomic(dest_page);
- memset(kaddr + pg_offset, 0, destlen - pg_offset);
- kunmap_atomic(kaddr);
+ /* Error or early end. */
+ if (unlikely(to_copy < destlen)) {
+ ret = -EIO;
+ folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy);
}
return ret;
}
-static void zstd_set_level(struct list_head *ws, unsigned int type)
-{
-}
-
-const struct btrfs_compress_op btrfs_zstd_compress = {
- .alloc_workspace = zstd_alloc_workspace,
- .free_workspace = zstd_free_workspace,
- .compress_pages = zstd_compress_pages,
- .decompress_bio = zstd_decompress_bio,
- .decompress = zstd_decompress,
- .set_level = zstd_set_level,
+const struct btrfs_compress_levels btrfs_zstd_compress = {
+ .min_level = ZSTD_BTRFS_MIN_LEVEL,
+ .max_level = ZSTD_BTRFS_MAX_LEVEL,
+ .default_level = ZSTD_BTRFS_DEFAULT_LEVEL,
};