diff options
Diffstat (limited to 'fs/ext4/page-io.c')
| -rw-r--r-- | fs/ext4/page-io.c | 531 |
1 files changed, 311 insertions, 220 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 48786cdb5e6c..39abfeec5f36 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/page-io.c * @@ -8,7 +9,6 @@ #include <linux/fs.h> #include <linux/time.h> -#include <linux/jbd2.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> @@ -18,31 +18,69 @@ #include <linux/pagevec.h> #include <linux/mpage.h> #include <linux/namei.h> -#include <linux/aio.h> #include <linux/uio.h> #include <linux/bio.h> #include <linux/workqueue.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" static struct kmem_cache *io_end_cachep; +static struct kmem_cache *io_end_vec_cachep; int __init ext4_init_pageio(void) { io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); if (io_end_cachep == NULL) return -ENOMEM; + + io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0); + if (io_end_vec_cachep == NULL) { + kmem_cache_destroy(io_end_cachep); + return -ENOMEM; + } return 0; } void ext4_exit_pageio(void) { kmem_cache_destroy(io_end_cachep); + kmem_cache_destroy(io_end_vec_cachep); +} + +struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end) +{ + struct ext4_io_end_vec *io_end_vec; + + io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS); + if (!io_end_vec) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&io_end_vec->list); + list_add_tail(&io_end_vec->list, &io_end->list_vec); + return io_end_vec; +} + +static void ext4_free_io_end_vec(ext4_io_end_t *io_end) +{ + struct ext4_io_end_vec *io_end_vec, *tmp; + + if (list_empty(&io_end->list_vec)) + return; + list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) { + list_del(&io_end_vec->list); + kmem_cache_free(io_end_vec_cachep, io_end_vec); + } +} + +struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end) +{ + BUG_ON(list_empty(&io_end->list_vec)); + return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list); } /* @@ -54,40 +92,39 @@ void ext4_exit_pageio(void) */ static void buffer_io_error(struct buffer_head *bh) { - char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", - bdevname(bh->b_bdev, b), + printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n", + bh->b_bdev, (unsigned long long)bh->b_blocknr); } static void ext4_finish_bio(struct bio *bio) { - int i; - int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); + struct folio_iter fi; - for (i = 0; i < bio->bi_vcnt; i++) { - struct bio_vec *bvec = &bio->bi_io_vec[i]; - struct page *page = bvec->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; + struct folio *io_folio = NULL; struct buffer_head *bh, *head; - unsigned bio_start = bvec->bv_offset; - unsigned bio_end = bio_start + bvec->bv_len; + size_t bio_start = fi.offset; + size_t bio_end = bio_start + fi.length; unsigned under_io = 0; unsigned long flags; - if (!page) - continue; + if (fscrypt_is_bounce_folio(folio)) { + io_folio = folio; + folio = fscrypt_pagecache_folio(folio); + } - if (error) { - SetPageError(page); - set_bit(AS_EIO, &page->mapping->flags); + if (bio->bi_status) { + int err = blk_status_to_errno(bio->bi_status); + mapping_set_error(folio->mapping, err); } - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); /* - * We check all buffers in the page under BH_Uptodate_Lock + * We check all buffers in the folio under b_uptodate_lock * to avoid races with other end io clearing async_write flags */ - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); + spin_lock_irqsave(&head->b_uptodate_lock, flags); do { if (bh_offset(bh) < bio_start || bh_offset(bh) + bh->b_size > bio_end) { @@ -96,13 +133,16 @@ static void ext4_finish_bio(struct bio *bio) continue; } clear_buffer_async_write(bh); - if (error) + if (bio->bi_status) { + set_buffer_write_io_error(bh); buffer_io_error(bh); + } } while ((bh = bh->b_this_page) != head); - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); - local_irq_restore(flags); - if (!under_io) - end_page_writeback(page); + spin_unlock_irqrestore(&head->b_uptodate_lock, flags); + if (!under_io) { + fscrypt_free_bounce_page(&io_folio->page); + folio_end_writeback(folio); + } } } @@ -114,62 +154,60 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); WARN_ON(io_end->handle); - if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) - wake_up_all(ext4_ioend_wq(io_end->inode)); - for (bio = io_end->bio; bio; bio = next_bio) { next_bio = bio->bi_private; ext4_finish_bio(bio); bio_put(bio); } - if (io_end->flag & EXT4_IO_END_DIRECT) - inode_dio_done(io_end->inode); - if (io_end->iocb) - aio_complete(io_end->iocb, io_end->result, 0); + ext4_free_io_end_vec(io_end); kmem_cache_free(io_end_cachep, io_end); } -static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) -{ - struct inode *inode = io_end->inode; - - io_end->flag &= ~EXT4_IO_END_UNWRITTEN; - /* Wake up anyone waiting on unwritten extent conversion */ - if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) - wake_up_all(ext4_ioend_wq(inode)); -} - /* - * Check a range of space and convert unwritten extents to written. Note that + * On successful IO, check a range of space and convert unwritten extents to + * written. On IO failure, check if journal abort is needed. Note that * we are protected from truncate touching same part of extent tree by the * fact that truncate code waits for all DIO to finish (thus exclusion from * direct IO is achieved) and also waits for PageWriteback bits. Thus we * cannot get to ext4_ext_truncate() before all IOs overlapping that range are * completed (happens from ext4_free_ioend()). */ -static int ext4_end_io(ext4_io_end_t *io) +static int ext4_end_io_end(ext4_io_end_t *io_end) { - struct inode *inode = io->inode; - loff_t offset = io->offset; - ssize_t size = io->size; - handle_t *handle = io->handle; + struct inode *inode = io_end->inode; + handle_t *handle = io_end->handle; + struct super_block *sb = inode->i_sb; int ret = 0; - ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," + ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", - io, inode->i_ino, io->list.next, io->list.prev); + io_end, inode->i_ino, io_end->list.next, io_end->list.prev); - io->handle = NULL; /* Following call will use up the handle */ - ret = ext4_convert_unwritten_extents(handle, inode, offset, size); - if (ret < 0) { - ext4_msg(inode->i_sb, KERN_EMERG, + /* + * Do not convert the unwritten extents if data writeback fails, + * or stale data may be exposed. + */ + io_end->handle = NULL; /* Following call will use up the handle */ + if (unlikely(io_end->flag & EXT4_IO_END_FAILED)) { + ret = -EIO; + if (handle) + jbd2_journal_free_reserved(handle); + + if (test_opt(sb, DATA_ERR_ABORT)) + jbd2_journal_abort(EXT4_SB(sb)->s_journal, ret); + } else { + ret = ext4_convert_unwritten_io_end_vec(handle, io_end); + } + if (ret < 0 && !ext4_emergency_state(sb) && + io_end->flag & EXT4_IO_END_UNWRITTEN) { + ext4_msg(sb, KERN_EMERG, "failed to convert unwritten extents to written " "extents -- potential data loss! " - "(inode %lu, offset %llu, size %zd, error %d)", - inode->i_ino, offset, size, ret); + "(inode %lu, error %d)", inode->i_ino, ret); } - ext4_clear_io_unwritten_flag(io); - ext4_release_io_end(io); + + ext4_clear_io_unwritten_flag(io_end); + ext4_release_io_end(io_end); return ret; } @@ -177,52 +215,63 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head) { #ifdef EXT4FS_DEBUG struct list_head *cur, *before, *after; - ext4_io_end_t *io, *io0, *io1; + ext4_io_end_t *io_end, *io_end0, *io_end1; if (list_empty(head)) return; ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); - list_for_each_entry(io, head, list) { - cur = &io->list; + list_for_each_entry(io_end, head, list) { + cur = &io_end->list; before = cur->prev; - io0 = container_of(before, ext4_io_end_t, list); + io_end0 = container_of(before, ext4_io_end_t, list); after = cur->next; - io1 = container_of(after, ext4_io_end_t, list); + io_end1 = container_of(after, ext4_io_end_t, list); ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", - io, inode->i_ino, io0, io1); + io_end, inode->i_ino, io_end0, io_end1); } #endif } +static bool ext4_io_end_defer_completion(ext4_io_end_t *io_end) +{ + if (io_end->flag & EXT4_IO_END_UNWRITTEN && + !list_empty(&io_end->list_vec)) + return true; + if (test_opt(io_end->inode->i_sb, DATA_ERR_ABORT) && + io_end->flag & EXT4_IO_END_FAILED && + !ext4_emergency_state(io_end->inode->i_sb)) + return true; + return false; +} + /* Add the io_end to per-inode completed end_io list. */ static void ext4_add_complete_io(ext4_io_end_t *io_end) { struct ext4_inode_info *ei = EXT4_I(io_end->inode); + struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb); struct workqueue_struct *wq; unsigned long flags; - BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); + /* Only reserved conversions or pending IO errors will enter here. */ + WARN_ON(!(io_end->flag & EXT4_IO_END_DEFER_COMPLETION)); + WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN && + !io_end->handle && sbi->s_journal); + WARN_ON(!io_end->bio); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); - if (io_end->handle) { - wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; - if (list_empty(&ei->i_rsv_conversion_list)) - queue_work(wq, &ei->i_rsv_conversion_work); - list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); - } else { - wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq; - if (list_empty(&ei->i_unrsv_conversion_list)) - queue_work(wq, &ei->i_unrsv_conversion_work); - list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list); - } + wq = sbi->rsv_conversion_wq; + if (list_empty(&ei->i_rsv_conversion_list)) + queue_work(wq, &ei->i_rsv_conversion_work); + list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); } static int ext4_do_flush_completed_IO(struct inode *inode, struct list_head *head) { - ext4_io_end_t *io; + ext4_io_end_t *io_end; struct list_head unwritten; unsigned long flags; struct ext4_inode_info *ei = EXT4_I(inode); @@ -234,11 +283,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode, spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); while (!list_empty(&unwritten)) { - io = list_entry(unwritten.next, ext4_io_end_t, list); - BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); - list_del_init(&io->list); + io_end = list_entry(unwritten.next, ext4_io_end_t, list); + BUG_ON(!(io_end->flag & EXT4_IO_END_DEFER_COMPLETION)); + list_del_init(&io_end->list); - err = ext4_end_io(io); + err = ext4_end_io_end(io_end); if (unlikely(!ret && err)) ret = err; } @@ -246,7 +295,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode, } /* - * work on completed IO, to convert unwritten extents to extents + * Used to convert unwritten extents to written extents upon IO completion, + * or used to abort the journal upon IO errors. */ void ext4_end_io_rsv_work(struct work_struct *work) { @@ -255,93 +305,93 @@ void ext4_end_io_rsv_work(struct work_struct *work) ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); } -void ext4_end_io_unrsv_work(struct work_struct *work) -{ - struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, - i_unrsv_conversion_work); - ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list); -} - ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) { - ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); - if (io) { - atomic_inc(&EXT4_I(inode)->i_ioend_count); - io->inode = inode; - INIT_LIST_HEAD(&io->list); - atomic_set(&io->count, 1); + ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags); + + if (io_end) { + io_end->inode = inode; + INIT_LIST_HEAD(&io_end->list); + INIT_LIST_HEAD(&io_end->list_vec); + refcount_set(&io_end->count, 1); } - return io; + return io_end; } void ext4_put_io_end_defer(ext4_io_end_t *io_end) { - if (atomic_dec_and_test(&io_end->count)) { - if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { - ext4_release_io_end(io_end); - return; - } - ext4_add_complete_io(io_end); + if (refcount_dec_and_test(&io_end->count)) { + if (ext4_io_end_defer_completion(io_end)) + return ext4_add_complete_io(io_end); + + ext4_release_io_end(io_end); } } int ext4_put_io_end(ext4_io_end_t *io_end) { - int err = 0; - - if (atomic_dec_and_test(&io_end->count)) { - if (io_end->flag & EXT4_IO_END_UNWRITTEN) { - err = ext4_convert_unwritten_extents(io_end->handle, - io_end->inode, io_end->offset, - io_end->size); - io_end->handle = NULL; - ext4_clear_io_unwritten_flag(io_end); - } + if (refcount_dec_and_test(&io_end->count)) { + if (ext4_io_end_defer_completion(io_end)) + return ext4_end_io_end(io_end); + ext4_release_io_end(io_end); } - return err; + return 0; } ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) { - atomic_inc(&io_end->count); + refcount_inc(&io_end->count); return io_end; } -static void ext4_end_bio(struct bio *bio, int error) +/* BIO completion function for page writeback */ +static void ext4_end_bio(struct bio *bio) { ext4_io_end_t *io_end = bio->bi_private; - sector_t bi_sector = bio->bi_sector; + sector_t bi_sector = bio->bi_iter.bi_sector; - BUG_ON(!io_end); + if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n", + bio->bi_bdev, + (long long) bio->bi_iter.bi_sector, + (unsigned) bio_sectors(bio), + bio->bi_status)) { + ext4_finish_bio(bio); + bio_put(bio); + return; + } bio->bi_end_io = NULL; - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = 0; - if (io_end->flag & EXT4_IO_END_UNWRITTEN) { + if (bio->bi_status) { + struct inode *inode = io_end->inode; + + ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " + "starting block %llu)", + bio->bi_status, inode->i_ino, + (unsigned long long) + bi_sector >> (inode->i_blkbits - 9)); + io_end->flag |= EXT4_IO_END_FAILED; + mapping_set_error(inode->i_mapping, + blk_status_to_errno(bio->bi_status)); + } + + if (ext4_io_end_defer_completion(io_end)) { /* * Link bio into list hanging from io_end. We have to do it * atomically as bio completions can be racing against each * other. */ bio->bi_private = xchg(&io_end->bio, bio); + ext4_put_io_end_defer(io_end); } else { + /* + * Drop io_end reference early. Inode can get freed once + * we finish the bio. + */ + ext4_put_io_end_defer(io_end); ext4_finish_bio(bio); bio_put(bio); } - - if (error) { - struct inode *inode = io_end->inode; - - ext4_warning(inode->i_sb, "I/O error writing to inode %lu " - "(offset %llu size %ld starting block %llu)", - inode->i_ino, - (unsigned long long) io_end->offset, - (long) io_end->size, - (unsigned long long) - bi_sector >> (inode->i_blkbits - 9)); - } - ext4_put_io_end_defer(io_end); } void ext4_io_submit(struct ext4_io_submit *io) @@ -349,10 +399,9 @@ void ext4_io_submit(struct ext4_io_submit *io) struct bio *bio = io->io_bio; if (bio) { - bio_get(io->io_bio); - submit_bio(io->io_op, io->io_bio); - BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); - bio_put(io->io_bio); + if (io->io_wbc->sync_mode == WB_SYNC_ALL) + io->io_bio->bi_opf |= REQ_SYNC; + submit_bio(io->io_bio); } io->io_bio = NULL; } @@ -360,94 +409,88 @@ void ext4_io_submit(struct ext4_io_submit *io) void ext4_io_submit_init(struct ext4_io_submit *io, struct writeback_control *wbc) { - io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + io->io_wbc = wbc; io->io_bio = NULL; io->io_end = NULL; } -static int io_submit_init_bio(struct ext4_io_submit *io, - struct buffer_head *bh) +static void io_submit_init_bio(struct ext4_io_submit *io, + struct buffer_head *bh) { - int nvecs = bio_get_nr_vecs(bh->b_bdev); struct bio *bio; - bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); - if (!bio) - return -ENOMEM; - bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; + /* + * bio_alloc will _always_ be able to allocate a bio if + * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). + */ + bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO); + fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; io->io_next_block = bh->b_blocknr; - return 0; + wbc_init_bio(io->io_wbc, bio); } -static int io_submit_add_bh(struct ext4_io_submit *io, - struct inode *inode, - struct buffer_head *bh) +static void io_submit_add_bh(struct ext4_io_submit *io, + struct inode *inode, + struct folio *folio, + struct folio *io_folio, + struct buffer_head *bh) { - int ret; - - if (io->io_bio && bh->b_blocknr != io->io_next_block) { + if (io->io_bio && (bh->b_blocknr != io->io_next_block || + !fscrypt_mergeable_bio_bh(io->io_bio, bh))) { submit_and_retry: ext4_io_submit(io); } if (io->io_bio == NULL) { - ret = io_submit_init_bio(io, bh); - if (ret) - return ret; + io_submit_init_bio(io, bh); + io->io_bio->bi_write_hint = inode->i_write_hint; } - ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); - if (ret != bh->b_size) + if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh))) goto submit_and_retry; + wbc_account_cgroup_owner(io->io_wbc, folio, bh->b_size); io->io_next_block++; - return 0; } -int ext4_bio_write_page(struct ext4_io_submit *io, - struct page *page, - int len, - struct writeback_control *wbc) +int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, + size_t len) { - struct inode *inode = page->mapping->host; - unsigned block_start, blocksize; + struct folio *io_folio = folio; + struct inode *inode = folio->mapping->host; + unsigned block_start; struct buffer_head *bh, *head; int ret = 0; - int nr_submitted = 0; - - blocksize = 1 << inode->i_blkbits; - - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); + int nr_to_submit = 0; + struct writeback_control *wbc = io->io_wbc; + bool keep_towrite = false; - set_page_writeback(page); - ClearPageError(page); + BUG_ON(!folio_test_locked(folio)); + BUG_ON(folio_test_writeback(folio)); /* + * Comments copied from block_write_full_folio: + * + * The folio straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + if (len < folio_size(folio)) + folio_zero_segment(folio, len, folio_size(folio)); + /* * In the first loop we prepare and mark buffers to submit. We have to - * mark all buffers in the page before submitting so that - * end_page_writeback() cannot be called from ext4_bio_end_io() when IO + * mark all buffers in the folio before submitting so that + * folio_end_writeback() cannot be called from ext4_end_bio() when IO * on the first buffer finishes and we are still working on submitting * the second buffer. */ - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { block_start = bh_offset(bh); if (block_start >= len) { - /* - * Comments copied from block_write_full_page_endio: - * - * The page straddles i_size. It must be zeroed out on - * each and every writepage invocation because it may - * be mmapped. "A file is mapped in multiples of the - * page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when - * mapped, and writes to that region are not written - * out to the file." - */ - zero_user_segment(page, block_start, - block_start + blocksize); clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; @@ -457,46 +500,94 @@ int ext4_bio_write_page(struct ext4_io_submit *io, /* A hole? We can safely clear the dirty bit */ if (!buffer_mapped(bh)) clear_buffer_dirty(bh); - if (io->io_bio) - ext4_io_submit(io); + /* + * Keeping dirty some buffer we cannot write? Make sure + * to redirty the folio and keep TOWRITE tag so that + * racing WB_SYNC_ALL writeback does not skip the folio. + * This happens e.g. when doing writeout for + * transaction commit or when journalled data is not + * yet committed. + */ + if (buffer_dirty(bh) || + (buffer_jbd(bh) && buffer_jbddirty(bh))) { + if (!folio_test_dirty(folio)) + folio_redirty_for_writepage(wbc, folio); + keep_towrite = true; + } continue; } - if (buffer_new(bh)) { + if (buffer_new(bh)) clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); - } set_buffer_async_write(bh); + clear_buffer_dirty(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); + /* Nothing to submit? Just unlock the folio... */ + if (!nr_to_submit) + return 0; + + bh = head = folio_buffers(folio); + + /* + * If any blocks are being written to an encrypted file, encrypt them + * into a bounce page. For simplicity, just encrypt until the last + * block which might be needed. This may cause some unneeded blocks + * (e.g. holes) to be unnecessarily encrypted, but this is rare and + * can't happen in the common case of blocksize == PAGE_SIZE. + */ + if (fscrypt_inode_uses_fs_layer_crypto(inode)) { + gfp_t gfp_flags = GFP_NOFS; + unsigned int enc_bytes = round_up(len, i_blocksize(inode)); + struct page *bounce_page; + + /* + * Since bounce page allocation uses a mempool, we can only use + * a waiting mask (i.e. request guaranteed allocation) on the + * first page of the bio. Otherwise it can deadlock. + */ + if (io->io_bio) + gfp_flags = GFP_NOWAIT; + retry_encrypt: + bounce_page = fscrypt_encrypt_pagecache_blocks(folio, + enc_bytes, 0, gfp_flags); + if (IS_ERR(bounce_page)) { + ret = PTR_ERR(bounce_page); + if (ret == -ENOMEM && + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { + gfp_t new_gfp_flags = GFP_NOFS; + if (io->io_bio) + ext4_io_submit(io); + else + new_gfp_flags |= __GFP_NOFAIL; + memalloc_retry_wait(gfp_flags); + gfp_flags = new_gfp_flags; + goto retry_encrypt; + } + + printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); + folio_redirty_for_writepage(wbc, folio); + do { + if (buffer_async_write(bh)) { + clear_buffer_async_write(bh); + set_buffer_dirty(bh); + } + bh = bh->b_this_page; + } while (bh != head); + + return ret; + } + io_folio = page_folio(bounce_page); + } + + __folio_start_writeback(folio, keep_towrite); + /* Now submit buffers to write */ - bh = head = page_buffers(page); do { if (!buffer_async_write(bh)) continue; - ret = io_submit_add_bh(io, inode, bh); - if (ret) { - /* - * We only get here on ENOMEM. Not much else - * we can do but mark the page as dirty, and - * better luck next time. - */ - redirty_page_for_writepage(wbc, page); - break; - } - nr_submitted++; - clear_buffer_dirty(bh); + io_submit_add_bh(io, inode, folio, io_folio, bh); } while ((bh = bh->b_this_page) != head); - /* Error stopped previous loop? Clean up buffers... */ - if (ret) { - do { - clear_buffer_async_write(bh); - bh = bh->b_this_page; - } while (bh != head); - } - unlock_page(page); - /* Nothing submitted - we have to end page writeback */ - if (!nr_submitted) - end_page_writeback(page); - return ret; + return 0; } |
