diff options
Diffstat (limited to 'fs/btrfs/file-item.c')
| -rw-r--r-- | fs/btrfs/file-item.c | 209 |
1 files changed, 127 insertions, 82 deletions
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 886749b39672..14e5257f0f04 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -18,6 +18,7 @@ #include "fs.h" #include "accessors.h" #include "file-item.h" +#include "volumes.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ @@ -46,7 +47,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) { u64 start, end, i_size; - int ret; + bool found; spin_lock(&inode->lock); i_size = new_i_size ?: i_size_read(&inode->vfs_inode); @@ -55,9 +56,9 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz goto out_unlock; } - ret = find_contiguous_extent_bit(inode->file_extent_tree, 0, &start, - &end, EXTENT_DIRTY); - if (!ret && start == 0) + found = btrfs_find_contiguous_extent_bit(inode->file_extent_tree, 0, &start, + &end, EXTENT_DIRTY); + if (found && start == 0) i_size = min(i_size, end + 1); else i_size = 0; @@ -91,8 +92,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); - return set_extent_bit(inode->file_extent_tree, start, start + len - 1, - EXTENT_DIRTY, NULL); + return btrfs_set_extent_bit(inode->file_extent_tree, start, start + len - 1, + EXTENT_DIRTY, NULL); } /* @@ -121,8 +122,8 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || len == (u64)-1); - return clear_extent_bit(inode->file_extent_tree, start, - start + len - 1, EXTENT_DIRTY, NULL); + return btrfs_clear_extent_bit(inode->file_extent_tree, start, + start + len - 1, EXTENT_DIRTY, NULL); } static size_t bytes_to_csum_size(const struct btrfs_fs_info *fs_info, u32 bytes) @@ -163,20 +164,21 @@ int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + file_key.objectid = objectid; - file_key.offset = pos; file_key.type = BTRFS_EXTENT_DATA_KEY; + file_key.offset = pos; ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); if (ret < 0) - goto out; + return ret; leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -191,9 +193,6 @@ int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_encryption(leaf, item, 0); btrfs_set_file_extent_other_encoding(leaf, item, 0); - btrfs_mark_buffer_dirty(trans, leaf); -out: - btrfs_free_path(path); return ret; } @@ -214,8 +213,8 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, int csums_in_item; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - file_key.offset = bytenr; file_key.type = BTRFS_EXTENT_CSUM_KEY; + file_key.offset = bytenr; ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; @@ -261,8 +260,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int cow = mod != 0; file_key.objectid = objectid; - file_key.offset = offset; file_key.type = BTRFS_EXTENT_DATA_KEY; + file_key.offset = offset; return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); } @@ -338,23 +337,23 @@ out: * * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. */ -blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) +int btrfs_lookup_bio_sums(struct btrfs_bio *bbio) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct bio *bio = &bbio->bio; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); const u32 sectorsize = fs_info->sectorsize; const u32 csum_size = fs_info->csum_size; u32 orig_len = bio->bi_iter.bi_size; u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; - blk_status_t ret = BLK_STS_OK; + int ret = 0; u32 bio_offset = 0; if ((inode->flags & BTRFS_INODE_NODATASUM) || test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) - return BLK_STS_OK; + return 0; /* * This function is only called for read bio. @@ -371,14 +370,12 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) ASSERT(bio_op(bio) == REQ_OP_READ); path = btrfs_alloc_path(); if (!path) - return BLK_STS_RESOURCE; + return -ENOMEM; if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { - bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); - if (!bbio->csum) { - btrfs_free_path(path); - return BLK_STS_RESOURCE; - } + bbio->csum = kvcalloc(nblocks, csum_size, GFP_NOFS); + if (!bbio->csum) + return -ENOMEM; } else { bbio->csum = bbio->csum_inline; } @@ -397,8 +394,38 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) * between reading the free space cache and updating the csum tree. */ if (btrfs_is_free_space_inode(inode)) { - path->search_commit_root = 1; - path->skip_locking = 1; + path->search_commit_root = true; + path->skip_locking = true; + } + + /* + * If we are searching for a csum of an extent from a past + * transaction, we can search in the commit root and reduce + * lock contention on the csum tree extent buffers. + * + * This is important because that lock is an rwsem which gets + * pretty heavy write load under memory pressure and sustained + * csum overwrites, unlike the commit_root_sem. (Memory pressure + * makes us writeback the nodes multiple times per transaction, + * which makes us cow them each time, taking the write lock.) + * + * Due to how rwsem is implemented, there is a possible + * priority inversion where the readers holding the lock don't + * get scheduled (say they're in a cgroup stuck in heavy reclaim) + * which then blocks writers, including transaction commit. By + * using a semaphore with fewer writers (only a commit switching + * the roots), we make this issue less likely. + * + * Note that we don't rely on btrfs_search_slot to lock the + * commit root csum. We call search_slot multiple times, which would + * create a potential race where a commit comes in between searches + * while we are not holding the commit_root_sem, and we get csums + * from across transactions. + */ + if (bbio->csum_search_commit_root) { + path->search_commit_root = true; + path->skip_locking = true; + down_read(&fs_info->commit_root_sem); } while (bio_offset < orig_len) { @@ -410,9 +437,9 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) count = search_csum_tree(fs_info, path, cur_disk_bytenr, orig_len - bio_offset, csum_dst); if (count < 0) { - ret = errno_to_blk_status(count); + ret = count; if (bbio->csum != bbio->csum_inline) - kfree(bbio->csum); + kvfree(bbio->csum); bbio->csum = NULL; break; } @@ -431,12 +458,12 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) memset(csum_dst, 0, csum_size); count = 1; - if (btrfs_root_id(inode->root) == BTRFS_DATA_RELOC_TREE_OBJECTID) { + if (btrfs_is_data_reloc_root(inode->root)) { u64 file_offset = bbio->file_offset + bio_offset; - set_extent_bit(&inode->io_tree, file_offset, - file_offset + sectorsize - 1, - EXTENT_NODATASUM, NULL); + btrfs_set_extent_bit(&inode->io_tree, file_offset, + file_offset + sectorsize - 1, + EXTENT_NODATASUM, NULL); } else { btrfs_warn_rl(fs_info, "csum hole found for disk bytenr range [%llu, %llu)", @@ -446,7 +473,8 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio) bio_offset += count * sectorsize; } - btrfs_free_path(path); + if (bbio->csum_search_commit_root) + up_read(&fs_info->commit_root_sem); return ret; } @@ -486,8 +514,8 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, path->nowait = nowait; key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - key.offset = start; key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = start; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -737,23 +765,55 @@ fail: return ret; } +static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src) +{ + struct btrfs_inode *inode = bbio->inode; + struct btrfs_fs_info *fs_info = inode->root->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + struct bio *bio = &bbio->bio; + struct btrfs_ordered_sum *sums = bbio->sums; + struct bvec_iter iter = *src; + phys_addr_t paddr; + const u32 blocksize = fs_info->sectorsize; + const u32 step = min(blocksize, PAGE_SIZE); + const u32 nr_steps = blocksize / step; + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; + u32 offset = 0; + int index = 0; + + shash->tfm = fs_info->csum_shash; + + btrfs_bio_for_each_block(paddr, bio, &iter, step) { + paddrs[(offset / step) % nr_steps] = paddr; + offset += step; + + if (IS_ALIGNED(offset, blocksize)) { + btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index); + index += fs_info->csum_size; + } + } +} + +static void csum_one_bio_work(struct work_struct *work) +{ + struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work); + + ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE); + ASSERT(bbio->async_csum == true); + csum_one_bio(bbio, &bbio->csum_saved_iter); + complete(&bbio->csum_done); +} + /* * Calculate checksums of the data contained inside a bio. */ -blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) +int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async) { struct btrfs_ordered_extent *ordered = bbio->ordered; struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct bio *bio = &bbio->bio; struct btrfs_ordered_sum *sums; - char *data; - struct bvec_iter iter; - struct bio_vec bvec; - int index; - unsigned int blockcount; - int i; unsigned nofs_flag; nofs_flag = memalloc_nofs_save(); @@ -762,35 +822,23 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) memalloc_nofs_restore(nofs_flag); if (!sums) - return BLK_STS_RESOURCE; + return -ENOMEM; + sums->logical = bbio->orig_logical; sums->len = bio->bi_iter.bi_size; INIT_LIST_HEAD(&sums->list); - - sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; - index = 0; - - shash->tfm = fs_info->csum_shash; - - bio_for_each_segment(bvec, bio, iter) { - blockcount = BTRFS_BYTES_TO_BLKS(fs_info, - bvec.bv_len + fs_info->sectorsize - - 1); - - for (i = 0; i < blockcount; i++) { - data = bvec_kmap_local(&bvec); - crypto_shash_digest(shash, - data + (i * fs_info->sectorsize), - fs_info->sectorsize, - sums->sums + index); - kunmap_local(data); - index += fs_info->csum_size; - } - - } - bbio->sums = sums; btrfs_add_ordered_sum(ordered, sums); + + if (!async) { + csum_one_bio(bbio, &bbio->bio.bi_iter); + return 0; + } + init_completion(&bbio->csum_done); + bbio->async_csum = true; + bbio->csum_saved_iter = bbio->bio.bi_iter; + INIT_WORK(&bbio->csum_work, csum_one_bio_work); + schedule_work(&bbio->csum_work); return 0; } @@ -799,11 +847,11 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) * record the updated logical address on Zone Append completion. * Allocate just the structure with an empty sums array here for that case. */ -blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) +int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) { bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS); if (!bbio->sums) - return BLK_STS_RESOURCE; + return -ENOMEM; bbio->sums->len = bbio->bio.bi_iter.bi_size; bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; btrfs_add_ordered_sum(bbio->ordered, bbio->sums); @@ -876,7 +924,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; u64 end_byte = bytenr + len; u64 csum_end; @@ -894,8 +942,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, while (1) { key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - key.offset = end_byte - 1; key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = end_byte - 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { @@ -998,7 +1046,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, * item changed size or key */ ret = btrfs_split_item(trans, root, path, &key, offset); - if (ret && ret != -EAGAIN) { + if (unlikely(ret && ret != -EAGAIN)) { btrfs_abort_transaction(trans, ret); break; } @@ -1012,7 +1060,6 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, } btrfs_release_path(path); } - btrfs_free_path(path); return ret; } @@ -1054,7 +1101,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key file_key; struct btrfs_key found_key; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_csum_item *item; struct btrfs_csum_item *item_end; struct extent_buffer *leaf = NULL; @@ -1076,8 +1123,8 @@ again: found_next = 0; bytenr = sums->logical + total_bytes; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - file_key.offset = bytenr; file_key.type = BTRFS_EXTENT_CSUM_KEY; + file_key.offset = bytenr; item = btrfs_lookup_csum(trans, root, path, bytenr, 1); if (!IS_ERR(item)) { @@ -1130,10 +1177,10 @@ again: } btrfs_release_path(path); - path->search_for_extension = 1; + path->search_for_extension = true; ret = btrfs_search_slot(trans, root, &file_key, path, csum_size, 1); - path->search_for_extension = 0; + path->search_for_extension = false; if (ret < 0) goto out; @@ -1259,14 +1306,12 @@ found: ins_size /= csum_size; total_bytes += ins_size * fs_info->sectorsize; - btrfs_mark_buffer_dirty(trans, path->nodes[0]); if (total_bytes < sums->len) { btrfs_release_path(path); cond_resched(); goto again; } out: - btrfs_free_path(path); return ret; } @@ -1304,7 +1349,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); em->offset = btrfs_file_extent_offset(leaf, fi); if (compress_type != BTRFS_COMPRESS_NONE) { - extent_map_set_compression(em, compress_type); + btrfs_extent_map_set_compression(em, compress_type); } else { /* * Older kernels can create regular non-hole data @@ -1324,7 +1369,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, em->start = 0; em->len = fs_info->sectorsize; em->offset = 0; - extent_map_set_compression(em, compress_type); + btrfs_extent_map_set_compression(em, compress_type); } else { btrfs_err(fs_info, "unknown file extent item type %d, inode %llu, offset %llu, " |
