summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2019-07-10 12:28:17 -0700
committerDavid Sterba <dsterba@suse.com>2019-11-18 12:46:53 +0100
commitec39f7696ccfac85b3eea41eba7d6f747ee4ce8d (patch)
tree9a2da4377b0d00e0c553bbed9362cb21dc2e738f /fs/btrfs/inode.c
parent1d53c9e6723022b12e4a5ed4b141f67c834b7f6f (diff)
Btrfs: use REQ_CGROUP_PUNT for worker thread submitted bios
Async CRCs and compression submit IO through helper threads, which means they have IO priority inversions when cgroup IO controllers are in use. This flags all of the writes submitted by btrfs helper threads as REQ_CGROUP_PUNT. submit_bio() will punt these to dedicated per-blkcg work items to avoid the priority inversion. For the compression code, we take a reference on the wbc's blkg css and pass it down to the async workers. For the async CRCs, the bio already has the correct css, we just need to tell the block layer to use REQ_CGROUP_PUNT. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Chris Mason <clm@fb.com> Modified-and-reviewed-by: Tejun Heo <tj@kernel.org> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c31
1 files changed, 28 insertions, 3 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 861105e11b37..7ed0fe2dd5ba 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -368,6 +368,7 @@ struct async_chunk {
u64 end;
unsigned int write_flags;
struct list_head extents;
+ struct cgroup_subsys_state *blkcg_css;
struct btrfs_work work;
atomic_t *pending;
};
@@ -880,7 +881,8 @@ retry:
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages,
- async_chunk->write_flags)) {
+ async_chunk->write_flags,
+ async_chunk->blkcg_css)) {
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
@@ -1198,6 +1200,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
async_chunk = container_of(work, struct async_chunk, work);
if (async_chunk->inode)
btrfs_add_delayed_iput(async_chunk->inode);
+ if (async_chunk->blkcg_css)
+ css_put(async_chunk->blkcg_css);
/*
* Since the pointer to 'pending' is at the beginning of the array of
* async_chunk's, freeing it ensures the whole array has been freed.
@@ -1206,12 +1210,15 @@ static noinline void async_cow_free(struct btrfs_work *work)
kvfree(async_chunk->pending);
}
-static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+static int cow_file_range_async(struct inode *inode,
+ struct writeback_control *wbc,
+ struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written,
unsigned int write_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
@@ -1279,12 +1286,30 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
* to unlock it.
*/
if (locked_page) {
+ /*
+ * Depending on the compressibility, the pages might or
+ * might not go through async. We want all of them to
+ * be accounted against wbc once. Let's do it here
+ * before the paths diverge. wbc accounting is used
+ * only for foreign writeback detection and doesn't
+ * need full accuracy. Just account the whole thing
+ * against the first page.
+ */
+ wbc_account_cgroup_owner(wbc, locked_page,
+ cur_end - start);
async_chunk[i].locked_page = locked_page;
locked_page = NULL;
} else {
async_chunk[i].locked_page = NULL;
}
+ if (blkcg_css != blkcg_root_css) {
+ css_get(blkcg_css);
+ async_chunk[i].blkcg_css = blkcg_css;
+ } else {
+ async_chunk[i].blkcg_css = NULL;
+ }
+
btrfs_init_work(&async_chunk[i].work, async_cow_start,
async_cow_submit, async_cow_free);
@@ -1727,7 +1752,7 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
- ret = cow_file_range_async(inode, locked_page, start, end,
+ ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written,
write_flags);
}