diff options
Diffstat (limited to 'fs/ocfs2/aops.c')
| -rw-r--r-- | fs/ocfs2/aops.c | 599 |
1 files changed, 292 insertions, 307 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 832c1759a09a..76c86f1c2b1c 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1,22 +1,6 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * +// SPDX-License-Identifier: GPL-2.0-or-later +/* * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> @@ -25,7 +9,6 @@ #include <linux/pagemap.h> #include <asm/byteorder.h> #include <linux/swap.h> -#include <linux/pipe_fs_i.h> #include <linux/mpage.h> #include <linux/quotaops.h> #include <linux/blkdev.h> @@ -63,7 +46,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - void *kaddr; trace_ocfs2_symlink_get_block( (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -108,17 +90,11 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, * could've happened. Since we've got a reference on * the bh, even if it commits while we're doing the * copy, the data is still good. */ - if (buffer_jbd(buffer_cache_bh) - && ocfs2_inode_is_new(inode)) { - kaddr = kmap_atomic(bh_result->b_page); - if (!kaddr) { - mlog(ML_ERROR, "couldn't kmap!\n"); - goto bail; - } - memcpy(kaddr + (bh_result->b_size * iblock), - buffer_cache_bh->b_data, - bh_result->b_size); - kunmap_atomic(kaddr); + if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { + memcpy_to_folio(bh_result->b_folio, + bh_result->b_size * iblock, + buffer_cache_bh->b_data, + bh_result->b_size); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); @@ -173,9 +149,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count, &ext_flags); if (err) { - mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, " - "%llu, NULL)\n", err, inode, (unsigned long long)iblock, - (unsigned long long)p_blkno); + mlog(ML_ERROR, "get_blocks() failed, inode: 0x%p, " + "block: %llu\n", inode, (unsigned long long)iblock); goto bail; } @@ -233,10 +208,9 @@ bail: return err; } -int ocfs2_read_inline_data(struct inode *inode, struct page *page, +int ocfs2_read_inline_data(struct inode *inode, struct folio *folio, struct buffer_head *di_bh) { - void *kaddr; loff_t size; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; @@ -248,7 +222,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, size = i_size_read(inode); - if (size > PAGE_SIZE || + if (size > folio_size(folio) || size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) { ocfs2_error(inode->i_sb, "Inode %llu has with inline data has bad size: %Lu\n", @@ -257,25 +231,18 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, return -EROFS; } - kaddr = kmap_atomic(page); - if (size) - memcpy(kaddr, di->id2.i_data.id_data, size); - /* Clear the remaining part of the page */ - memset(kaddr + size, 0, PAGE_SIZE - size); - flush_dcache_page(page); - kunmap_atomic(kaddr); - - SetPageUptodate(page); + folio_fill_tail(folio, 0, di->id2.i_data.id_data, size); + folio_mark_uptodate(folio); return 0; } -static int ocfs2_readpage_inline(struct inode *inode, struct page *page) +static int ocfs2_readpage_inline(struct inode *inode, struct folio *folio) { int ret; struct buffer_head *di_bh = NULL; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); ret = ocfs2_read_inode_block(inode, &di_bh); @@ -284,25 +251,24 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) goto out; } - ret = ocfs2_read_inline_data(inode, page, di_bh); + ret = ocfs2_read_inline_data(inode, folio, di_bh); out: - unlock_page(page); + folio_unlock(folio); brelse(di_bh); return ret; } -static int ocfs2_readpage(struct file *file, struct page *page) +static int ocfs2_read_folio(struct file *file, struct folio *folio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ocfs2_inode_info *oi = OCFS2_I(inode); - loff_t start = (loff_t)page->index << PAGE_SHIFT; + loff_t start = folio_pos(folio); int ret, unlock = 1; - trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, - (page ? page->index : 0)); + trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, folio->index); - ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); + ret = ocfs2_inode_lock_with_folio(inode, NULL, 0, folio); if (ret != 0) { if (ret == AOP_TRUNCATED_PAGE) unlock = 0; @@ -312,11 +278,11 @@ static int ocfs2_readpage(struct file *file, struct page *page) if (down_read_trylock(&oi->ip_alloc_sem) == 0) { /* - * Unlock the page and cycle ip_alloc_sem so that we don't + * Unlock the folio and cycle ip_alloc_sem so that we don't * busyloop waiting for ip_alloc_sem to unlock */ ret = AOP_TRUNCATED_PAGE; - unlock_page(page); + folio_unlock(folio); unlock = 0; down_read(&oi->ip_alloc_sem); up_read(&oi->ip_alloc_sem); @@ -324,26 +290,26 @@ static int ocfs2_readpage(struct file *file, struct page *page) } /* - * i_size might have just been updated as we grabed the meta lock. We + * i_size might have just been updated as we grabbed the meta lock. We * might now be discovering a truncate that hit on another node. - * block_read_full_page->get_block freaks out if it is asked to read + * block_read_full_folio->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers * (generic_file_read, vm_ops->fault) are clever enough to check i_size - * and notice that the page they just read isn't needed. + * and notice that the folio they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { - zero_user(page, 0, PAGE_SIZE); - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); ret = 0; goto out_alloc; } if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - ret = ocfs2_readpage_inline(inode, page); + ret = ocfs2_readpage_inline(inode, folio); else - ret = block_read_full_page(page, ocfs2_get_block); + ret = block_read_full_folio(folio, ocfs2_get_block); unlock = 0; out_alloc: @@ -352,7 +318,7 @@ out_inode_unlock: ocfs2_inode_unlock(inode, 0); out: if (unlock) - unlock_page(page); + folio_unlock(folio); return ret; } @@ -365,14 +331,11 @@ out: * grow out to a tree. If need be, detecting boundary extents could * trivially be added in a future version of ocfs2_get_block(). */ -static int ocfs2_readpages(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void ocfs2_readahead(struct readahead_control *rac) { - int ret, err = -EIO; - struct inode *inode = mapping->host; + int ret; + struct inode *inode = rac->mapping->host; struct ocfs2_inode_info *oi = OCFS2_I(inode); - loff_t start; - struct page *last; /* * Use the nonblocking flag for the dlm code to avoid page @@ -380,56 +343,48 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping, */ ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); if (ret) - return err; + return; - if (down_read_trylock(&oi->ip_alloc_sem) == 0) { - ocfs2_inode_unlock(inode, 0); - return err; - } + if (down_read_trylock(&oi->ip_alloc_sem) == 0) + goto out_unlock; /* * Don't bother with inline-data. There isn't anything * to read-ahead in that case anyway... */ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - goto out_unlock; + goto out_up; /* * Check whether a remote node truncated this file - we just * drop out in that case as it's not worth handling here. */ - last = lru_to_page(pages); - start = (loff_t)last->index << PAGE_SHIFT; - if (start >= i_size_read(inode)) - goto out_unlock; + if (readahead_pos(rac) >= i_size_read(inode)) + goto out_up; - err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); + mpage_readahead(rac, ocfs2_get_block); -out_unlock: +out_up: up_read(&oi->ip_alloc_sem); +out_unlock: ocfs2_inode_unlock(inode, 0); - - return err; } /* Note: Because we don't support holes, our allocation has * already happened (allocation writes zeros to the file data) * so we don't have to worry about ordered writes in - * ocfs2_writepage. + * ocfs2_writepages. * - * ->writepage is called during the process of invalidating the page cache + * ->writepages is called during the process of invalidating the page cache * during blocked lock processing. It can't block on any cluster locks * to during block mapping. It's relying on the fact that the block * mapping can't have disappeared under the dirty pages that it is * being asked to write back. */ -static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) +static int ocfs2_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - trace_ocfs2_writepage( - (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno, - page->index); - - return block_write_full_page(page, ocfs2_get_block, wbc); + return mpage_writepages(mapping, wbc, ocfs2_get_block); } /* Taken from ext3. We don't necessarily need the full blown @@ -522,11 +477,11 @@ bail: return status; } -static int ocfs2_releasepage(struct page *page, gfp_t wait) +static bool ocfs2_release_folio(struct folio *folio, gfp_t wait) { - if (!page_has_buffers(page)) - return 0; - return try_to_free_buffers(page); + if (!folio_buffers(folio)) + return false; + return try_to_free_buffers(folio); } static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, @@ -564,7 +519,7 @@ static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, * * from == to == 0 is code for "zero the entire cluster region" */ -static void ocfs2_clear_page_regions(struct page *page, +static void ocfs2_clear_folio_regions(struct folio *folio, struct ocfs2_super *osb, u32 cpos, unsigned from, unsigned to) { @@ -573,7 +528,7 @@ static void ocfs2_clear_page_regions(struct page *page, ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); - kaddr = kmap_atomic(page); + kaddr = kmap_local_folio(folio, 0); if (from || to) { if (from > cluster_start) @@ -584,20 +539,20 @@ static void ocfs2_clear_page_regions(struct page *page, memset(kaddr + cluster_start, 0, cluster_end - cluster_start); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); } /* * Nonsparse file systems fully allocate before we get to the write * code. This prevents ocfs2_write() from tagging the write as an - * allocating one, which means ocfs2_map_page_blocks() might try to + * allocating one, which means ocfs2_map_folio_blocks() might try to * read-in the blocks at the tail of our file. Avoid reading them by * testing i_size against each block offset. */ -static int ocfs2_should_read_blk(struct inode *inode, struct page *page, +static int ocfs2_should_read_blk(struct inode *inode, struct folio *folio, unsigned int block_start) { - u64 offset = page_offset(page) + block_start; + u64 offset = folio_pos(folio) + block_start; if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) return 1; @@ -615,7 +570,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page, * * This will also skip zeroing, which is handled externally. */ -int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, +int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new) { @@ -624,10 +579,10 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, unsigned int block_end, block_start; unsigned int bsize = i_blocksize(inode); - if (!page_has_buffers(page)) - create_empty_buffers(page, bsize, 0); + head = folio_buffers(folio); + if (!head) + head = create_empty_buffers(folio, bsize, 0); - head = page_buffers(page); for (bh = head, block_start = 0; bh != head || !block_start; bh = bh->b_this_page, block_start += bsize) { block_end = block_start + bsize; @@ -639,7 +594,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, * they may belong to unallocated clusters. */ if (block_start >= to || block_end <= from) { - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); continue; } @@ -656,14 +611,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, clean_bdev_bh_alias(bh); } - if (PageUptodate(page)) { - if (!buffer_uptodate(bh)) - set_buffer_uptodate(bh); + if (folio_test_uptodate(folio)) { + set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_new(bh) && - ocfs2_should_read_blk(inode, page, block_start) && + ocfs2_should_read_blk(inode, folio, block_start) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + bh_read_nowait(bh, 0); *wait_bh++=bh; } @@ -695,7 +649,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, if (block_start >= to) break; - zero_user(page, block_start, bh->b_size); + folio_zero_range(folio, block_start, bh->b_size); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -759,24 +713,24 @@ struct ocfs2_write_ctxt { unsigned int w_large_pages; /* - * Pages involved in this write. + * Folios involved in this write. * - * w_target_page is the page being written to by the user. + * w_target_folio is the folio being written to by the user. * - * w_pages is an array of pages which always contains - * w_target_page, and in the case of an allocating write with + * w_folios is an array of folios which always contains + * w_target_folio, and in the case of an allocating write with * page_size < cluster size, it will contain zero'd and mapped - * pages adjacent to w_target_page which need to be written + * pages adjacent to w_target_folio which need to be written * out in so that future reads from that region will get * zero's. */ - unsigned int w_num_pages; - struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; - struct page *w_target_page; + unsigned int w_num_folios; + struct folio *w_folios[OCFS2_MAX_CTXT_PAGES]; + struct folio *w_target_folio; /* * w_target_locked is used for page_mkwrite path indicating no unlocking - * against w_target_page in ocfs2_write_end_nolock. + * against w_target_folio in ocfs2_write_end_nolock. */ unsigned int w_target_locked:1; @@ -801,40 +755,40 @@ struct ocfs2_write_ctxt { unsigned int w_unwritten_count; }; -void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) +void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios) { int i; - for(i = 0; i < num_pages; i++) { - if (pages[i]) { - unlock_page(pages[i]); - mark_page_accessed(pages[i]); - put_page(pages[i]); - } + for(i = 0; i < num_folios; i++) { + if (!folios[i]) + continue; + folio_unlock(folios[i]); + folio_mark_accessed(folios[i]); + folio_put(folios[i]); } } -static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_folios(struct ocfs2_write_ctxt *wc) { int i; /* * w_target_locked is only set to true in the page_mkwrite() case. * The intent is to allow us to lock the target page from write_begin() - * to write_end(). The caller must hold a ref on w_target_page. + * to write_end(). The caller must hold a ref on w_target_folio. */ if (wc->w_target_locked) { - BUG_ON(!wc->w_target_page); - for (i = 0; i < wc->w_num_pages; i++) { - if (wc->w_target_page == wc->w_pages[i]) { - wc->w_pages[i] = NULL; + BUG_ON(!wc->w_target_folio); + for (i = 0; i < wc->w_num_folios; i++) { + if (wc->w_target_folio == wc->w_folios[i]) { + wc->w_folios[i] = NULL; break; } } - mark_page_accessed(wc->w_target_page); - put_page(wc->w_target_page); + folio_mark_accessed(wc->w_target_folio); + folio_put(wc->w_target_folio); } - ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); + ocfs2_unlock_and_free_folios(wc->w_folios, wc->w_num_folios); } static void ocfs2_free_unwritten_list(struct inode *inode, @@ -856,7 +810,7 @@ static void ocfs2_free_write_ctxt(struct inode *inode, struct ocfs2_write_ctxt *wc) { ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list); - ocfs2_unlock_pages(wc); + ocfs2_unlock_folios(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -899,29 +853,30 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, * and dirty so they'll be written out (in order to prevent uninitialised * block data from leaking). And clear the new bit. */ -static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) +static void ocfs2_zero_new_buffers(struct folio *folio, size_t from, size_t to) { unsigned int block_start, block_end; struct buffer_head *head, *bh; - BUG_ON(!PageLocked(page)); - if (!page_has_buffers(page)) + BUG_ON(!folio_test_locked(folio)); + head = folio_buffers(folio); + if (!head) return; - bh = head = page_buffers(page); + bh = head; block_start = 0; do { block_end = block_start + bh->b_size; if (buffer_new(bh)) { if (block_end > from && block_start < to) { - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { unsigned start, end; start = max(from, block_start); end = min(to, block_end); - zero_user_segment(page, start, end); + folio_zero_segment(folio, start, end); set_buffer_uptodate(bh); } @@ -946,28 +901,26 @@ static void ocfs2_write_failure(struct inode *inode, int i; unsigned from = user_pos & (PAGE_SIZE - 1), to = user_pos + user_len; - struct page *tmppage; - if (wc->w_target_page) - ocfs2_zero_new_buffers(wc->w_target_page, from, to); + if (wc->w_target_folio) + ocfs2_zero_new_buffers(wc->w_target_folio, from, to); - for(i = 0; i < wc->w_num_pages; i++) { - tmppage = wc->w_pages[i]; + for (i = 0; i < wc->w_num_folios; i++) { + struct folio *folio = wc->w_folios[i]; - if (tmppage && page_has_buffers(tmppage)) { + if (folio && folio_buffers(folio)) { if (ocfs2_should_order_data(inode)) - ocfs2_jbd2_file_inode(wc->w_handle, inode); + ocfs2_jbd2_inode_add_write(wc->w_handle, inode, + user_pos, user_len); - block_commit_write(tmppage, from, to); + block_commit_write(folio, from, to); } } } -static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, - struct ocfs2_write_ctxt *wc, - struct page *page, u32 cpos, - loff_t user_pos, unsigned user_len, - int new) +static int ocfs2_prepare_folio_for_write(struct inode *inode, u64 *p_blkno, + struct ocfs2_write_ctxt *wc, struct folio *folio, u32 cpos, + loff_t user_pos, unsigned user_len, int new) { int ret; unsigned int map_from = 0, map_to = 0; @@ -980,20 +933,19 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, /* treat the write as new if the a hole/lseek spanned across * the page boundary. */ - new = new | ((i_size_read(inode) <= page_offset(page)) && - (page_offset(page) <= user_pos)); + new = new | ((i_size_read(inode) <= folio_pos(folio)) && + (folio_pos(folio) <= user_pos)); - if (page == wc->w_target_page) { + if (folio == wc->w_target_folio) { map_from = user_pos & (PAGE_SIZE - 1); map_to = map_from + user_len; if (new) - ret = ocfs2_map_page_blocks(page, p_blkno, inode, - cluster_start, cluster_end, - new); + ret = ocfs2_map_folio_blocks(folio, p_blkno, inode, + cluster_start, cluster_end, new); else - ret = ocfs2_map_page_blocks(page, p_blkno, inode, - map_from, map_to, new); + ret = ocfs2_map_folio_blocks(folio, p_blkno, inode, + map_from, map_to, new); if (ret) { mlog_errno(ret); goto out; @@ -1007,7 +959,7 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, } } else { /* - * If we haven't allocated the new page yet, we + * If we haven't allocated the new folio yet, we * shouldn't be writing it out without copying user * data. This is likely a math error from the caller. */ @@ -1016,8 +968,8 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, map_from = cluster_start; map_to = cluster_end; - ret = ocfs2_map_page_blocks(page, p_blkno, inode, - cluster_start, cluster_end, new); + ret = ocfs2_map_folio_blocks(folio, p_blkno, inode, + cluster_start, cluster_end, new); if (ret) { mlog_errno(ret); goto out; @@ -1025,20 +977,20 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, } /* - * Parts of newly allocated pages need to be zero'd. + * Parts of newly allocated folios need to be zero'd. * * Above, we have also rewritten 'to' and 'from' - as far as * the rest of the function is concerned, the entire cluster - * range inside of a page needs to be written. + * range inside of a folio needs to be written. * - * We can skip this if the page is up to date - it's already + * We can skip this if the folio is uptodate - it's already * been zero'd from being read in as a hole. */ - if (new && !PageUptodate(page)) - ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), + if (new && !folio_test_uptodate(folio)) + ocfs2_clear_folio_regions(folio, OCFS2_SB(inode->i_sb), cpos, user_data_from, user_data_to); - flush_dcache_page(page); + flush_dcache_folio(folio); out: return ret; @@ -1047,11 +999,9 @@ out: /* * This function will only grab one clusters worth of pages. */ -static int ocfs2_grab_pages_for_write(struct address_space *mapping, - struct ocfs2_write_ctxt *wc, - u32 cpos, loff_t user_pos, - unsigned user_len, int new, - struct page *mmap_page) +static int ocfs2_grab_folios_for_write(struct address_space *mapping, + struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos, + unsigned user_len, int new, struct folio *mmap_folio) { int ret = 0, i; unsigned long start, target_index, end_index, index; @@ -1068,7 +1018,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, * last page of the write. */ if (new) { - wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); + wc->w_num_folios = ocfs2_pages_per_cluster(inode->i_sb); start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); /* * We need the index *past* the last page we could possibly @@ -1078,15 +1028,15 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, last_byte = max(user_pos + user_len, i_size_read(inode)); BUG_ON(last_byte < 1); end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1; - if ((start + wc->w_num_pages) > end_index) - wc->w_num_pages = end_index - start; + if ((start + wc->w_num_folios) > end_index) + wc->w_num_folios = end_index - start; } else { - wc->w_num_pages = 1; + wc->w_num_folios = 1; start = target_index; } end_index = (user_pos + user_len - 1) >> PAGE_SHIFT; - for(i = 0; i < wc->w_num_pages; i++) { + for(i = 0; i < wc->w_num_folios; i++) { index = start + i; if (index >= target_index && index <= end_index && @@ -1096,37 +1046,39 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, * and wants us to directly use the page * passed in. */ - lock_page(mmap_page); + folio_lock(mmap_folio); /* Exit and let the caller retry */ - if (mmap_page->mapping != mapping) { - WARN_ON(mmap_page->mapping); - unlock_page(mmap_page); + if (mmap_folio->mapping != mapping) { + WARN_ON(mmap_folio->mapping); + folio_unlock(mmap_folio); ret = -EAGAIN; goto out; } - get_page(mmap_page); - wc->w_pages[i] = mmap_page; + folio_get(mmap_folio); + wc->w_folios[i] = mmap_folio; wc->w_target_locked = true; } else if (index >= target_index && index <= end_index && wc->w_type == OCFS2_WRITE_DIRECT) { /* Direct write has no mapping page. */ - wc->w_pages[i] = NULL; + wc->w_folios[i] = NULL; continue; } else { - wc->w_pages[i] = find_or_create_page(mapping, index, - GFP_NOFS); - if (!wc->w_pages[i]) { - ret = -ENOMEM; + wc->w_folios[i] = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + GFP_NOFS); + if (IS_ERR(wc->w_folios[i])) { + ret = PTR_ERR(wc->w_folios[i]); mlog_errno(ret); + wc->w_folios[i] = NULL; goto out; } } - wait_for_stable_page(wc->w_pages[i]); + folio_wait_stable(wc->w_folios[i]); if (index == target_index) - wc->w_target_page = wc->w_pages[i]; + wc->w_target_folio = wc->w_folios[i]; } out: if (ret) @@ -1210,19 +1162,18 @@ static int ocfs2_write_cluster(struct address_space *mapping, if (!should_zero) p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1); - for(i = 0; i < wc->w_num_pages; i++) { + for (i = 0; i < wc->w_num_folios; i++) { int tmpret; /* This is the direct io target page. */ - if (wc->w_pages[i] == NULL) { - p_blkno++; + if (wc->w_folios[i] == NULL) { + p_blkno += (1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits)); continue; } - tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, - wc->w_pages[i], cpos, - user_pos, user_len, - should_zero); + tmpret = ocfs2_prepare_folio_for_write(inode, &p_blkno, wc, + wc->w_folios[i], cpos, user_pos, user_len, + should_zero); if (tmpret) { mlog_errno(tmpret); if (ret == 0) @@ -1501,7 +1452,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, { int ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct page *page; + struct folio *folio; handle_t *handle; struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; @@ -1512,19 +1463,21 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, goto out; } - page = find_or_create_page(mapping, 0, GFP_NOFS); - if (!page) { + folio = __filemap_get_folio(mapping, 0, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); + if (IS_ERR(folio)) { ocfs2_commit_trans(osb, handle); - ret = -ENOMEM; + ret = PTR_ERR(folio); mlog_errno(ret); goto out; } /* - * If we don't set w_num_pages then this page won't get unlocked + * If we don't set w_num_folios then this folio won't get unlocked * and freed on cleanup of the write context. */ - wc->w_pages[0] = wc->w_target_page = page; - wc->w_num_pages = 1; + wc->w_target_folio = folio; + wc->w_folios[0] = folio; + wc->w_num_folios = 1; ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1538,8 +1491,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) ocfs2_set_inode_data_inline(inode, di); - if (!PageUptodate(page)) { - ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh); + if (!folio_test_uptodate(folio)) { + ret = ocfs2_read_inline_data(inode, folio, wc->w_di_bh); if (ret) { ocfs2_commit_trans(osb, handle); @@ -1562,9 +1515,8 @@ int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size) } static int ocfs2_try_to_write_inline_data(struct address_space *mapping, - struct inode *inode, loff_t pos, - unsigned len, struct page *mmap_page, - struct ocfs2_write_ctxt *wc) + struct inode *inode, loff_t pos, size_t len, + struct folio *mmap_folio, struct ocfs2_write_ctxt *wc) { int ret, written = 0; loff_t end = pos + len; @@ -1579,7 +1531,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping, * Handle inodes which already have inline data 1st. */ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - if (mmap_page == NULL && + if (mmap_folio == NULL && ocfs2_size_fits_inline_data(wc->w_di_bh, end)) goto do_inline_write; @@ -1603,7 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping, * Check whether the write can fit. */ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; - if (mmap_page || + if (mmap_folio || end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) return 0; @@ -1670,9 +1622,9 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, } int ocfs2_write_begin_nolock(struct address_space *mapping, - loff_t pos, unsigned len, ocfs2_write_type_t type, - struct page **pagep, void **fsdata, - struct buffer_head *di_bh, struct page *mmap_page) + loff_t pos, unsigned len, ocfs2_write_type_t type, + struct folio **foliop, void **fsdata, + struct buffer_head *di_bh, struct folio *mmap_folio) { int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0; @@ -1695,7 +1647,7 @@ try_again: if (ocfs2_supports_inline_data(osb)) { ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len, - mmap_page, wc); + mmap_folio, wc); if (ret == 1) { ret = 0; goto success; @@ -1747,7 +1699,7 @@ try_again: (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), - pos, len, type, mmap_page, + pos, len, type, mmap_folio, clusters_to_alloc, extents_to_split); /* @@ -1818,26 +1770,26 @@ try_again: } /* - * Fill our page array first. That way we've grabbed enough so + * Fill our folio array first. That way we've grabbed enough so * that we can zero and flush if we error after adding the * extent. */ - ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, - cluster_of_pages, mmap_page); - if (ret && ret != -EAGAIN) { - mlog_errno(ret); - goto out_quota; - } + ret = ocfs2_grab_folios_for_write(mapping, wc, wc->w_cpos, pos, len, + cluster_of_pages, mmap_folio); + if (ret) { + /* + * ocfs2_grab_folios_for_write() returns -EAGAIN if it + * could not lock the target folio. In this case, we exit + * with no error and no target folio. This will trigger + * the caller, page_mkwrite(), to re-try the operation. + */ + if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) { + BUG_ON(wc->w_target_folio); + ret = 0; + goto out_quota; + } - /* - * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock - * the target page. In this case, we exit with no error and no target - * page. This will trigger the caller, page_mkwrite(), to re-try - * the operation. - */ - if (ret == -EAGAIN) { - BUG_ON(wc->w_target_page); - ret = 0; + mlog_errno(ret); goto out_quota; } @@ -1854,8 +1806,8 @@ try_again: ocfs2_free_alloc_context(meta_ac); success: - if (pagep) - *pagep = wc->w_target_page; + if (foliop) + *foliop = wc->w_target_folio; *fsdata = wc; return 0; out_quota: @@ -1874,7 +1826,7 @@ out: * to VM code. */ if (wc->w_target_locked) - unlock_page(mmap_page); + folio_unlock(mmap_folio); ocfs2_free_write_ctxt(inode, wc); @@ -1905,9 +1857,10 @@ out: return ret; } -static int ocfs2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +static int ocfs2_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, + struct folio **foliop, void **fsdata) { int ret; struct buffer_head *di_bh = NULL; @@ -1922,14 +1875,14 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, /* * Take alloc sem here to prevent concurrent lookups. That way * the mapping, zeroing and tree manipulation within - * ocfs2_write() will be safe against ->readpage(). This + * ocfs2_write() will be safe against ->read_folio(). This * should also serve to lock out allocation from a shared * writeable region. */ down_write(&OCFS2_I(inode)->ip_alloc_sem); ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER, - pagep, fsdata, di_bh, NULL); + foliop, fsdata, di_bh, NULL); if (ret) { mlog_errno(ret); goto out_fail; @@ -1953,18 +1906,15 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, struct ocfs2_dinode *di, struct ocfs2_write_ctxt *wc) { - void *kaddr; - if (unlikely(*copied < len)) { - if (!PageUptodate(wc->w_target_page)) { + if (!folio_test_uptodate(wc->w_target_folio)) { *copied = 0; return; } } - kaddr = kmap_atomic(wc->w_target_page); - memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); - kunmap_atomic(kaddr); + memcpy_from_folio(di->id2.i_data.id_data + pos, wc->w_target_folio, + pos, *copied); trace_ocfs2_write_end_inline( (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -1973,17 +1923,16 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, le16_to_cpu(di->i_dyn_features)); } -int ocfs2_write_end_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, void *fsdata) +int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, + unsigned len, unsigned copied, void *fsdata) { int i, ret; - unsigned from, to, start = pos & (PAGE_SIZE - 1); + size_t from, to, start = pos & (PAGE_SIZE - 1); struct inode *inode = mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_write_ctxt *wc = fsdata; struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; handle_t *handle = wc->w_handle; - struct page *tmppage; BUG_ON(!list_empty(&wc->w_unwritten_list)); @@ -2002,29 +1951,44 @@ int ocfs2_write_end_nolock(struct address_space *mapping, goto out_write_size; } - if (unlikely(copied < len) && wc->w_target_page) { - if (!PageUptodate(wc->w_target_page)) + if (unlikely(copied < len) && wc->w_target_folio) { + loff_t new_isize; + + if (!folio_test_uptodate(wc->w_target_folio)) copied = 0; - ocfs2_zero_new_buffers(wc->w_target_page, start+copied, - start+len); + new_isize = max_t(loff_t, i_size_read(inode), pos + copied); + if (new_isize > folio_pos(wc->w_target_folio)) + ocfs2_zero_new_buffers(wc->w_target_folio, start+copied, + start+len); + else { + /* + * When folio is fully beyond new isize (data copy + * failed), do not bother zeroing the folio. Invalidate + * it instead so that writeback does not get confused + * put page & buffer dirty bits into inconsistent + * state. + */ + block_invalidate_folio(wc->w_target_folio, 0, + folio_size(wc->w_target_folio)); + } } - if (wc->w_target_page) - flush_dcache_page(wc->w_target_page); + if (wc->w_target_folio) + flush_dcache_folio(wc->w_target_folio); - for(i = 0; i < wc->w_num_pages; i++) { - tmppage = wc->w_pages[i]; + for (i = 0; i < wc->w_num_folios; i++) { + struct folio *folio = wc->w_folios[i]; - /* This is the direct io target page. */ - if (tmppage == NULL) + /* This is the direct io target folio */ + if (folio == NULL) continue; - if (tmppage == wc->w_target_page) { + if (folio == wc->w_target_folio) { from = wc->w_target_from; to = wc->w_target_to; - BUG_ON(from > PAGE_SIZE || - to > PAGE_SIZE || + BUG_ON(from > folio_size(folio) || + to > folio_size(folio) || to < from); } else { /* @@ -2033,13 +1997,17 @@ int ocfs2_write_end_nolock(struct address_space *mapping, * to flush their entire range. */ from = 0; - to = PAGE_SIZE; + to = folio_size(folio); } - if (page_has_buffers(tmppage)) { - if (handle && ocfs2_should_order_data(inode)) - ocfs2_jbd2_file_inode(handle, inode); - block_commit_write(tmppage, from, to); + if (folio_buffers(folio)) { + if (handle && ocfs2_should_order_data(inode)) { + loff_t start_byte = folio_pos(folio) + from; + loff_t length = to - from; + ocfs2_jbd2_inode_add_write(handle, inode, + start_byte, length); + } + block_commit_write(folio, from, to); } } @@ -2053,10 +2021,11 @@ out_write_size: } inode->i_blocks = ocfs2_inode_sector_count(inode); di->i_size = cpu_to_le64((u64)i_size_read(inode)); - inode->i_mtime = inode->i_ctime = current_time(inode); - di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); - di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_update_inode_fsync_trans(handle, inode, 1); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + di->i_mtime = di->i_ctime = cpu_to_le64(inode_get_mtime_sec(inode)); + di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); + if (handle) + ocfs2_update_inode_fsync_trans(handle, inode, 1); } if (handle) ocfs2_journal_dirty(handle, wc->w_di_bh); @@ -2067,7 +2036,7 @@ out: * this lock and will ask for the page lock when flushing the data. * put it here to preserve the unlock order. */ - ocfs2_unlock_pages(wc); + ocfs2_unlock_folios(wc); if (handle) ocfs2_commit_trans(osb, handle); @@ -2080,9 +2049,10 @@ out: return copied; } -static int ocfs2_write_end(struct file *file, struct address_space *mapping, +static int ocfs2_write_end(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { int ret; struct inode *inode = mapping->host; @@ -2153,13 +2123,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct ocfs2_dio_write_ctxt *dwc = NULL; struct buffer_head *di_bh = NULL; u64 p_blkno; - loff_t pos = iblock << inode->i_sb->s_blocksize_bits; + unsigned int i_blkbits = inode->i_sb->s_blocksize_bits; + loff_t pos = iblock << i_blkbits; + sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits; unsigned len, total_len = bh_result->b_size; int ret = 0, first_get_block = 0; len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); len = min(total_len, len); + /* + * bh_result->b_size is count in get_more_blocks according to write + * "pos" and "end", we need map twice to return different buffer state: + * 1. area in file size, not set NEW; + * 2. area out file size, set NEW. + * + * iblock endblk + * |--------|---------|---------|--------- + * |<-------area in file------->| + */ + + if ((iblock <= endblk) && + ((iblock + ((len - 1) >> i_blkbits)) > endblk)) + len = (endblk - iblock + 1) << i_blkbits; + mlog(0, "get block of %lu at %llu:%u req %u\n", inode->i_ino, pos, len, total_len); @@ -2243,6 +2230,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, if (desc->c_needs_zero) set_buffer_new(bh_result); + if (iblock > endblk) + set_buffer_new(bh_result); + /* May sleep in end_io. It should not happen in a irq context. So defer * it to dio work queue. */ set_buffer_defer_completion(bh_result); @@ -2269,8 +2259,6 @@ unlock: ocfs2_inode_unlock(inode, 1); brelse(di_bh); out: - if (ret < 0) - ret = -EIO; return ret; } @@ -2290,7 +2278,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle = NULL; loff_t end = offset + bytes; - int ret = 0, credits = 0, locked = 0; + int ret = 0, credits = 0; ocfs2_init_dealloc_ctxt(&dealloc); @@ -2301,13 +2289,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, !dwc->dw_orphaned) goto out; - /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we - * are in that context. */ - if (dwc->dw_writer_pid != task_pid_nr(current)) { - inode_lock(inode); - locked = 1; - } - ret = ocfs2_inode_lock(inode, &di_bh, 1); if (ret < 0) { mlog_errno(ret); @@ -2316,7 +2297,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, down_write(&oi->ip_alloc_sem); - /* Delete orphan before acquire i_mutex. */ + /* Delete orphan before acquire i_rwsem. */ if (dwc->dw_orphaned) { BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); @@ -2361,6 +2342,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode, } list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { + ret = ocfs2_assure_trans_credits(handle, credits); + if (ret < 0) { + mlog_errno(ret); + break; + } ret = ocfs2_mark_extent_written(inode, &et, handle, ue->ue_cpos, 1, ue->ue_phys, @@ -2388,8 +2374,6 @@ out: if (meta_ac) ocfs2_free_alloc_context(meta_ac); ocfs2_run_deallocs(osb, &dealloc); - if (locked) - inode_unlock(inode); ocfs2_dio_free_write_ctx(inode, dwc); return ret; @@ -2456,20 +2440,21 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block, - ocfs2_dio_end_io, NULL, 0); + ocfs2_dio_end_io, 0); } const struct address_space_operations ocfs2_aops = { - .readpage = ocfs2_readpage, - .readpages = ocfs2_readpages, - .writepage = ocfs2_writepage, + .dirty_folio = block_dirty_folio, + .read_folio = ocfs2_read_folio, + .readahead = ocfs2_readahead, + .writepages = ocfs2_writepages, .write_begin = ocfs2_write_begin, .write_end = ocfs2_write_end, .bmap = ocfs2_bmap, .direct_IO = ocfs2_direct_IO, - .invalidatepage = block_invalidatepage, - .releasepage = ocfs2_releasepage, - .migratepage = buffer_migrate_page, + .invalidate_folio = block_invalidate_folio, + .release_folio = ocfs2_release_folio, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, }; |
