diff options
Diffstat (limited to 'fs/iomap.c')
-rw-r--r-- | fs/iomap.c | 92 |
1 files changed, 58 insertions, 34 deletions
diff --git a/fs/iomap.c b/fs/iomap.c index be61cf742b5e..afd163586aa0 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -65,6 +65,8 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, return ret; if (WARN_ON(iomap.offset > pos)) return -EIO; + if (WARN_ON(iomap.length == 0)) + return -EIO; /* * Cut down the length to the one actually provided by the filesystem, @@ -350,8 +352,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, struct iomap *iomap) { - sector_t sector = iomap->blkno + - (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); + sector_t sector = (iomap->addr + + (pos & PAGE_MASK) - iomap->offset) >> 9; return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector, offset, bytes); @@ -510,11 +512,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, flags |= FIEMAP_EXTENT_MERGED; if (iomap->flags & IOMAP_F_SHARED) flags |= FIEMAP_EXTENT_SHARED; + if (iomap->flags & IOMAP_F_DATA_INLINE) + flags |= FIEMAP_EXTENT_DATA_INLINE; return fiemap_fill_next_extent(fi, iomap->offset, - iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0, + iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0, iomap->length, flags); - } static loff_t @@ -714,23 +717,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); + loff_t offset = iocb->ki_pos; ssize_t ret; - /* - * Try again to invalidate clean pages which might have been cached by - * non-direct readahead, or faulted in by get_user_pages() if the source - * of the write was an mmap'ed region of the file we're writing. Either - * one is a pretty crazy thing to do, so we don't support it 100%. If - * this invalidation fails, tough, the write still worked... - */ - if (!dio->error && - (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - iocb->ki_pos >> PAGE_SHIFT, - (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - if (dio->end_io) { ret = dio->end_io(iocb, dio->error ? dio->error : dio->size, @@ -742,12 +731,34 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) if (likely(!ret)) { ret = dio->size; /* check for short read */ - if (iocb->ki_pos + ret > dio->i_size && + if (offset + ret > dio->i_size && !(dio->flags & IOMAP_DIO_WRITE)) - ret = dio->i_size - iocb->ki_pos; + ret = dio->i_size - offset; iocb->ki_pos += ret; } + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. + */ + if (!dio->error && + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { + int err; + err = invalidate_inode_pages2_range(inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + dio->size - 1) >> PAGE_SHIFT); + if (err) + dio_warn_stale_pagecache(iocb->ki_filp); + } + inode_dio_end(file_inode(iocb->ki_filp)); kfree(dio); @@ -823,7 +834,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, bio = bio_alloc(GFP_KERNEL, 1); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = - iomap->blkno + ((pos - iomap->offset) >> 9); + (iomap->addr + pos - iomap->offset) >> 9; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; @@ -848,6 +859,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, struct bio *bio; bool need_zeroout = false; int nr_pages, ret; + size_t copied = 0; if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL; @@ -859,7 +871,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, /*FALLTHRU*/ case IOMAP_UNWRITTEN: if (!(dio->flags & IOMAP_DIO_WRITE)) { - iov_iter_zero(length, dio->submit.iter); + length = iov_iter_zero(length, dio->submit.iter); dio->size += length; return length; } @@ -896,13 +908,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, } do { - if (dio->error) + size_t n; + if (dio->error) { + iov_iter_revert(dio->submit.iter, copied); return 0; + } bio = bio_alloc(GFP_KERNEL, nr_pages); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = - iomap->blkno + ((pos - iomap->offset) >> 9); + (iomap->addr + pos - iomap->offset) >> 9; bio->bi_write_hint = dio->iocb->ki_hint; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; @@ -910,20 +925,24 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ret = bio_iov_iter_get_pages(bio, &iter); if (unlikely(ret)) { bio_put(bio); - return ret; + return copied ? copied : ret; } + n = bio->bi_iter.bi_size; if (dio->flags & IOMAP_DIO_WRITE) { bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); - task_io_account_write(bio->bi_iter.bi_size); + task_io_account_write(n); } else { bio_set_op_attrs(bio, REQ_OP_READ, 0); if (dio->flags & IOMAP_DIO_DIRTY) bio_set_pages_dirty(bio); } - dio->size += bio->bi_iter.bi_size; - pos += bio->bi_iter.bi_size; + iov_iter_advance(dio->submit.iter, n); + + dio->size += n; + pos += n; + copied += n; nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); @@ -939,9 +958,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, if (pad) iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); } - - iov_iter_advance(dio->submit.iter, length); - return length; + return copied; } ssize_t @@ -1004,9 +1021,16 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret) goto out_free_dio; + /* + * Try to invalidate cache pages for the range we're direct + * writing. If this invalidation fails, tough, the write will + * still work, but racing two incompatible write paths is a + * pretty crazy thing to do, so we don't support it 100%. + */ ret = invalidate_inode_pages2_range(mapping, start >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + if (ret) + dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && @@ -1049,7 +1073,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!(iocb->ki_flags & IOCB_HIPRI) || !dio->submit.last_queue || - !blk_mq_poll(dio->submit.last_queue, + !blk_poll(dio->submit.last_queue, dio->submit.cookie)) io_schedule(); } |