summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_aops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r--fs/xfs/xfs_aops.c145
1 files changed, 86 insertions, 59 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f18e5932aec4..0ab824f574ed 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -209,7 +209,8 @@ xfs_setfilesize_trans_alloc(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
+ XFS_TRANS_NOFS, &tp);
if (error)
return error;
@@ -390,6 +391,19 @@ xfs_map_blocks(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
+ /*
+ * Truncate can race with writeback since writeback doesn't take the
+ * iolock and truncate decreases the file size before it starts
+ * truncating the pages between new_size and old_size. Therefore, we
+ * can end up in the situation where writeback gets a CoW fork mapping
+ * but the truncate makes the mapping invalid and we end up in here
+ * trying to get a new mapping. Bail out here so that we simply never
+ * get a valid mapping and so we drop the write altogether. The page
+ * truncation will kill the contents anyway.
+ */
+ if (type == XFS_IO_COW && offset > i_size_read(inode))
+ return 0;
+
ASSERT(type != XFS_IO_COW);
if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
@@ -399,7 +413,7 @@ xfs_map_blocks(
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + count > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - count)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -446,6 +460,19 @@ xfs_imap_valid(
{
offset >>= inode->i_blkbits;
+ /*
+ * We have to make sure the cached mapping is within EOF to protect
+ * against eofblocks trimming on file release leaving us with a stale
+ * mapping. Otherwise, a page for a subsequent file extending buffered
+ * write could get picked up by this writeback cycle and written to the
+ * wrong blocks.
+ *
+ * Note that what we really want here is a generic mapping invalidation
+ * mechanism to protect us from arbitrary extent modifying contexts, not
+ * just eofblocks.
+ */
+ xfs_trim_extent_eof(imap, XFS_I(inode));
+
return offset >= imap->br_startoff &&
offset < imap->br_startoff + imap->br_blockcount;
}
@@ -735,6 +762,14 @@ xfs_vm_invalidatepage(
{
trace_xfs_invalidatepage(page->mapping->host, page, offset,
length);
+
+ /*
+ * If we are invalidating the entire page, clear the dirty state from it
+ * so that we can check for attempts to release dirty cached pages in
+ * xfs_vm_releasepage().
+ */
+ if (offset == 0 && length >= PAGE_SIZE)
+ cancel_dirty_page(page);
block_invalidatepage(page, offset, length);
}
@@ -770,7 +805,7 @@ xfs_aops_discard_page(
goto out_invalidate;
xfs_alert(ip->i_mount,
- "page discard on page %p, inode 0x%llx, offset %llu.",
+ "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
page, ip->i_ino, offset);
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -875,13 +910,13 @@ xfs_writepage_map(
struct writeback_control *wbc,
struct inode *inode,
struct page *page,
- loff_t offset,
- uint64_t end_offset)
+ uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
ssize_t len = i_blocksize(inode);
+ uint64_t offset;
int error = 0;
int count = 0;
int uptodate = 1;
@@ -1125,7 +1160,7 @@ xfs_do_writepage(
end_offset = offset;
}
- return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
+ return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -1160,16 +1195,22 @@ xfs_vm_writepages(
int ret;
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- if (dax_mapping(mapping))
- return dax_writeback_mapping_range(mapping,
- xfs_find_bdev_for_inode(mapping->host), wbc);
-
ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
if (wpc.ioend)
ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
return ret;
}
+STATIC int
+xfs_dax_writepages(
+ struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+ return dax_writeback_mapping_range(mapping,
+ xfs_find_bdev_for_inode(mapping->host), wbc);
+}
+
/*
* Called to move a page into cleanable state - and from there
* to be released. The page should already be clean. We always
@@ -1190,25 +1231,27 @@ xfs_vm_releasepage(
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(). Conversely,
- * block_invalidatepage() can send pages that are still marked dirty
- * but otherwise have invalidated buffers.
+ * block_invalidatepage() can send pages that are still marked dirty but
+ * otherwise have invalidated buffers.
*
* We want to release the latter to avoid unnecessary buildup of the
- * LRU, skip the former and warn if we've left any lingering
- * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
- * or unwritten buffers and warn if the page is not dirty. Otherwise
- * try to release the buffers.
+ * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
+ * that are entirely invalidated and need to be released. Hence the
+ * only time we should get dirty pages here is through
+ * shrink_active_list() and so we can simply skip those now.
+ *
+ * warn if we've left any lingering delalloc/unwritten buffers on clean
+ * or invalidated pages we are about to release.
*/
+ if (PageDirty(page))
+ return 0;
+
xfs_count_page_state(page, &delalloc, &unwritten);
- if (delalloc) {
- WARN_ON_ONCE(!PageDirty(page));
+ if (WARN_ON_ONCE(delalloc))
return 0;
- }
- if (unwritten) {
- WARN_ON_ONCE(!PageDirty(page));
+ if (WARN_ON_ONCE(unwritten))
return 0;
- }
return try_to_free_buffers(page);
}
@@ -1242,7 +1285,7 @@ xfs_map_trim_size(
if (mapping_size > size)
mapping_size = size;
if (offset < i_size_read(inode) &&
- offset + mapping_size >= i_size_read(inode)) {
+ (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
i_blocksize(inode));
@@ -1289,26 +1332,25 @@ xfs_get_blocks(
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + size > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - size)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
- error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
- &imap, &nimaps, XFS_BMAPI_ENTIRE);
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+ &nimaps, 0);
if (error)
goto out_unlock;
-
- if (nimaps) {
- trace_xfs_get_blocks_found(ip, offset, size,
- imap.br_state == XFS_EXT_UNWRITTEN ?
- XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
- xfs_iunlock(ip, lockmode);
- } else {
+ if (!nimaps) {
trace_xfs_get_blocks_notfound(ip, offset, size);
goto out_unlock;
}
+ trace_xfs_get_blocks_found(ip, offset, size,
+ imap.br_state == XFS_EXT_UNWRITTEN ?
+ XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
+ xfs_iunlock(ip, lockmode);
+
/* trim mapping down to size requested */
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
@@ -1331,17 +1373,6 @@ out_unlock:
return error;
}
-STATIC ssize_t
-xfs_vm_direct_IO(
- struct kiocb *iocb,
- struct iov_iter *iter)
-{
- /*
- * We just need the method present so that open/fcntl allow direct I/O.
- */
- return -EINVAL;
-}
-
STATIC sector_t
xfs_vm_bmap(
struct address_space *mapping,
@@ -1354,7 +1385,7 @@ xfs_vm_bmap(
/*
* The swap code (ab-)uses ->bmap to get a block mapping and then
- * bypasseѕ the file system for actual I/O. We really can't allow
+ * bypasses the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
* 0 is the magic code for a bmap error.
*
@@ -1436,19 +1467,8 @@ xfs_vm_set_page_dirty(
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
- if (newly_dirty) {
- /* sigh - __set_page_dirty() is static, so copy it here, too */
- unsigned long flags;
-
- spin_lock_irqsave(&mapping->tree_lock, flags);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(!PageUptodate(page));
- account_page_dirtied(page, mapping);
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
- }
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 1);
unlock_page_memcg(page);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1464,8 +1484,15 @@ const struct address_space_operations xfs_address_space_operations = {
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.bmap = xfs_vm_bmap,
- .direct_IO = xfs_vm_direct_IO,
+ .direct_IO = noop_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
+
+const struct address_space_operations xfs_dax_aops = {
+ .writepages = xfs_dax_writepages,
+ .direct_IO = noop_direct_IO,
+ .set_page_dirty = noop_set_page_dirty,
+ .invalidatepage = noop_invalidatepage,
+};