diff options
Diffstat (limited to 'fs/ext4/readpage.c')
| -rw-r--r-- | fs/ext4/readpage.c | 348 |
1 files changed, 237 insertions, 111 deletions
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 40a5497b0f60..e7f2350c725b 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/readpage.c * @@ -6,8 +7,8 @@ * * This was originally taken from fs/mpage.c * - * The intent is the ext4_mpage_readpages() function here is intended - * to replace mpage_readpages() in the general case, not just for + * The ext4_mpage_readpages() function here is intended to + * replace mpage_readahead() in the general case, not just for * encrypted files. It has some limitations (see below), where it * will fall back to read_block_full_page(), but these limitations * should only be hit when page_size != block_size. @@ -42,17 +43,108 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/pagevec.h> -#include <linux/cleancache.h> #include "ext4.h" -static inline bool ext4_bio_encrypted(struct bio *bio) +#define NUM_PREALLOC_POST_READ_CTXS 128 + +static struct kmem_cache *bio_post_read_ctx_cache; +static mempool_t *bio_post_read_ctx_pool; + +/* postprocessing steps for read bios */ +enum bio_post_read_step { + STEP_INITIAL = 0, + STEP_DECRYPT, + STEP_VERITY, + STEP_MAX, +}; + +struct bio_post_read_ctx { + struct bio *bio; + struct work_struct work; + unsigned int cur_step; + unsigned int enabled_steps; +}; + +static void __read_end_io(struct bio *bio) +{ + struct folio_iter fi; + + bio_for_each_folio_all(fi, bio) + folio_end_read(fi.folio, bio->bi_status == 0); + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx); + +static void decrypt_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; + + if (fscrypt_decrypt_bio(bio)) + bio_post_read_processing(ctx); + else + __read_end_io(bio); +} + +static void verity_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; + + /* + * fsverity_verify_bio() may call readahead() again, and although verity + * will be disabled for that, decryption may still be needed, causing + * another bio_post_read_ctx to be allocated. So to guarantee that + * mempool_alloc() never deadlocks we must free the current ctx first. + * This is safe because verity is the last post-read step. + */ + BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX); + mempool_free(ctx, bio_post_read_ctx_pool); + bio->bi_private = NULL; + + fsverity_verify_bio(bio); + + __read_end_io(bio); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx) { -#ifdef CONFIG_EXT4_FS_ENCRYPTION - return unlikely(bio->bi_private != NULL); -#else - return false; -#endif + /* + * We use different work queues for decryption and for verity because + * verity may require reading metadata pages that need decryption, and + * we shouldn't recurse to the same workqueue. + */ + switch (++ctx->cur_step) { + case STEP_DECRYPT: + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { + INIT_WORK(&ctx->work, decrypt_work); + fscrypt_enqueue_decrypt_work(&ctx->work); + return; + } + ctx->cur_step++; + fallthrough; + case STEP_VERITY: + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + ctx->cur_step++; + fallthrough; + default: + __read_end_io(ctx->bio); + } +} + +static bool bio_post_read_required(struct bio *bio) +{ + return bio->bi_private && !bio->bi_status; } /* @@ -60,7 +152,7 @@ static inline bool ext4_bio_encrypted(struct bio *bio) * * The mpage code never puts partial pages into a BIO (except for end-of-file). * If a page does not map to a contiguous run of blocks then it simply falls - * back to block_read_full_page(). + * back to block_read_full_folio(). * * Why is this? If a page's completion depends on a number of different BIOs * which can complete in any order (or at the same time) then determining the @@ -69,77 +161,98 @@ static inline bool ext4_bio_encrypted(struct bio *bio) */ static void mpage_end_io(struct bio *bio) { - struct bio_vec *bv; - int i; - - if (ext4_bio_encrypted(bio)) { - if (bio->bi_status) { - fscrypt_release_ctx(bio->bi_private); - } else { - fscrypt_decrypt_bio_pages(bio->bi_private, bio); - return; - } + if (bio_post_read_required(bio)) { + struct bio_post_read_ctx *ctx = bio->bi_private; + + ctx->cur_step = STEP_INITIAL; + bio_post_read_processing(ctx); + return; } - bio_for_each_segment_all(bv, bio, i) { - struct page *page = bv->bv_page; - - if (!bio->bi_status) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); + __read_end_io(bio); +} + +static inline bool ext4_need_verity(const struct inode *inode, pgoff_t idx) +{ + return fsverity_active(inode) && + idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); +} + +static void ext4_set_bio_post_read_ctx(struct bio *bio, + const struct inode *inode, + pgoff_t first_idx) +{ + unsigned int post_read_steps = 0; + + if (fscrypt_inode_uses_fs_layer_crypto(inode)) + post_read_steps |= 1 << STEP_DECRYPT; + + if (ext4_need_verity(inode, first_idx)) + post_read_steps |= 1 << STEP_VERITY; + + if (post_read_steps) { + /* Due to the mempool, this never fails. */ + struct bio_post_read_ctx *ctx = + mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); + + ctx->bio = bio; + ctx->enabled_steps = post_read_steps; + bio->bi_private = ctx; } +} - bio_put(bio); +static inline loff_t ext4_readpage_limit(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) + return inode->i_sb->s_maxbytes; + + return i_size_read(inode); } -int ext4_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages) +int ext4_mpage_readpages(struct inode *inode, + struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; - - struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; - const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; + sector_t next_block; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; - sector_t blocks[MAX_BUF_PER_PAGE]; + sector_t first_block; unsigned page_block; struct block_device *bdev = inode->i_sb->s_bdev; int length; unsigned relative_block = 0; struct ext4_map_blocks map; + unsigned int nr_pages, folio_pages; map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; map.m_flags = 0; - for (; nr_pages; nr_pages--) { + nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio); + for (; nr_pages; nr_pages -= folio_pages) { int fully_mapped = 1; - unsigned first_hole = blocks_per_page; - - prefetchw(&page->flags); - if (pages) { - page = list_entry(pages->prev, struct page, lru); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - readahead_gfp_mask(mapping))) - goto next_page; - } + unsigned int first_hole; + unsigned int blocks_per_folio; + + if (rac) + folio = readahead_folio(rac); - if (page_has_buffers(page)) + folio_pages = folio_nr_pages(folio); + prefetchw(&folio->flags); + + if (folio_buffers(folio)) goto confused; - block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); - last_block = block_in_file + nr_pages * blocks_per_page; - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + blocks_per_folio = folio_size(folio) >> blkbits; + first_hole = blocks_per_folio; + block_in_file = next_block = EXT4_PG_TO_LBLK(inode, folio->index); + last_block = EXT4_PG_TO_LBLK(inode, folio->index + nr_pages); + last_block_in_file = (ext4_readpage_limit(inode) + + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; @@ -153,16 +266,15 @@ int ext4_mpage_readpages(struct address_space *mapping, unsigned map_offset = block_in_file - map.m_lblk; unsigned last = map.m_len - map_offset; + first_block = map.m_pblk + map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } - if (page_block == blocks_per_page) + if (page_block == blocks_per_folio) break; - blocks[page_block] = map.m_pblk + map_offset + - relative_block; page_block++; block_in_file++; } @@ -170,124 +282,138 @@ int ext4_mpage_readpages(struct address_space *mapping, /* * Then do more ext4_map_blocks() calls until we are - * done with this page. + * done with this folio. */ - while (page_block < blocks_per_page) { + while (page_block < blocks_per_folio) { if (block_in_file < last_block) { map.m_lblk = block_in_file; map.m_len = last_block - block_in_file; if (ext4_map_blocks(NULL, inode, &map, 0) < 0) { set_error_page: - SetPageError(page); - zero_user_segment(page, 0, - PAGE_SIZE); - unlock_page(page); + folio_zero_segment(folio, 0, + folio_size(folio)); + folio_unlock(folio); goto next_page; } } if ((map.m_flags & EXT4_MAP_MAPPED) == 0) { fully_mapped = 0; - if (first_hole == blocks_per_page) + if (first_hole == blocks_per_folio) first_hole = page_block; page_block++; block_in_file++; continue; } - if (first_hole != blocks_per_page) + if (first_hole != blocks_per_folio) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ - if (page_block && blocks[page_block-1] != map.m_pblk-1) + if (!page_block) + first_block = map.m_pblk; + else if (first_block + page_block != map.m_pblk) goto confused; for (relative_block = 0; ; relative_block++) { if (relative_block == map.m_len) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; - } else if (page_block == blocks_per_page) + } else if (page_block == blocks_per_folio) break; - blocks[page_block] = map.m_pblk+relative_block; page_block++; block_in_file++; } } - if (first_hole != blocks_per_page) { - zero_user_segment(page, first_hole << blkbits, - PAGE_SIZE); + if (first_hole != blocks_per_folio) { + folio_zero_segment(folio, first_hole << blkbits, + folio_size(folio)); if (first_hole == 0) { - SetPageUptodate(page); - unlock_page(page); - goto next_page; + if (ext4_need_verity(inode, folio->index) && + !fsverity_verify_folio(folio)) + goto set_error_page; + folio_end_read(folio, true); + continue; } } else if (fully_mapped) { - SetPageMappedToDisk(page); - } - if (fully_mapped && blocks_per_page == 1 && - !PageUptodate(page) && cleancache_get_page(page) == 0) { - SetPageUptodate(page); - goto confused; + folio_set_mappedtodisk(folio); } /* - * This page will go to BIO. Do we need to send this + * This folio will go to BIO. Do we need to send this * BIO off first? */ - if (bio && (last_block_in_bio != blocks[0] - 1)) { + if (bio && (last_block_in_bio != first_block - 1 || + !fscrypt_mergeable_bio(bio, inode, next_block))) { submit_and_realloc: submit_bio(bio); bio = NULL; } if (bio == NULL) { - struct fscrypt_ctx *ctx = NULL; - - if (ext4_encrypted_inode(inode) && - S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - goto set_error_page; - } - bio = bio_alloc(GFP_KERNEL, - min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); - goto set_error_page; - } - bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); + /* + * bio_alloc will _always_ be able to allocate a bio if + * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). + */ + bio = bio_alloc(bdev, bio_max_segs(nr_pages), + REQ_OP_READ, GFP_KERNEL); + fscrypt_set_bio_crypt_ctx(bio, inode, next_block, + GFP_KERNEL); + ext4_set_bio_post_read_ctx(bio, inode, folio->index); + bio->bi_iter.bi_sector = first_block << (blkbits - 9); bio->bi_end_io = mpage_end_io; - bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + if (rac) + bio->bi_opf |= REQ_RAHEAD; } length = first_hole << blkbits; - if (bio_add_page(bio, page, length, 0) < length) + if (!bio_add_folio(bio, folio, length, 0)) goto submit_and_realloc; if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || - (first_hole != blocks_per_page)) { + (first_hole != blocks_per_folio)) { submit_bio(bio); bio = NULL; } else - last_block_in_bio = blocks[blocks_per_page - 1]; - goto next_page; + last_block_in_bio = first_block + blocks_per_folio - 1; + continue; confused: if (bio) { submit_bio(bio); bio = NULL; } - if (!PageUptodate(page)) - block_read_full_page(page, ext4_get_block); + if (!folio_test_uptodate(folio)) + block_read_full_folio(folio, ext4_get_block); else - unlock_page(page); - next_page: - if (pages) - put_page(page); + folio_unlock(folio); +next_page: + ; /* A label shall be followed by a statement until C23 */ } - BUG_ON(pages && !list_empty(pages)); if (bio) submit_bio(bio); return 0; } + +int __init ext4_init_post_read_processing(void) +{ + bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, SLAB_RECLAIM_ACCOUNT); + + if (!bio_post_read_ctx_cache) + goto fail; + bio_post_read_ctx_pool = + mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, + bio_post_read_ctx_cache); + if (!bio_post_read_ctx_pool) + goto fail_free_cache; + return 0; + +fail_free_cache: + kmem_cache_destroy(bio_post_read_ctx_cache); +fail: + return -ENOMEM; +} + +void ext4_exit_post_read_processing(void) +{ + mempool_destroy(bio_post_read_ctx_pool); + kmem_cache_destroy(bio_post_read_ctx_cache); +} |
