diff options
Diffstat (limited to 'fs/nilfs2/btnode.c')
| -rw-r--r-- | fs/nilfs2/btnode.c | 255 |
1 files changed, 158 insertions, 97 deletions
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index a35ae35e6932..568367129092 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -1,25 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * btnode.c - NILFS B-tree node cache + * NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * This file was originally written by Seiji Kihara <kihara@osrg.net> - * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for - * stabilization and simplification. + * Originally written by Seiji Kihara. + * Fully revised by Ryusuke Konishi for stabilization and simplification. * */ @@ -34,6 +20,24 @@ #include "page.h" #include "btnode.h" + +/** + * nilfs_init_btnc_inode - initialize B-tree node cache inode + * @btnc_inode: inode to be initialized + * + * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. + */ +void nilfs_init_btnc_inode(struct inode *btnc_inode) +{ + struct nilfs_inode_info *ii = NILFS_I(btnc_inode); + + btnc_inode->i_mode = S_IFREG; + ii->i_flags = 0; + memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); + mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); + btnc_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; +} + void nilfs_btnode_cache_clear(struct address_space *btnc) { invalidate_mapping_pages(btnc, 0, -1); @@ -43,44 +47,58 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) struct buffer_head * nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) { - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; struct buffer_head *bh; - bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) - return NULL; + return ERR_PTR(-ENOMEM); if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || buffer_dirty(bh))) { - brelse(bh); - BUG(); + /* + * The block buffer at the specified new address was already + * in use. This can happen if it is a virtual block number + * and has been reallocated due to corruption of the bitmap + * used to manage its allocation state (if not, the buffer + * clearing of an abandoned b-tree node is missing somewhere). + */ + nilfs_error(inode->i_sb, + "state inconsistency probably due to duplicate use of b-tree node block address %llu (ino=%lu)", + (unsigned long long)blocknr, inode->i_ino); + goto failed; } - memset(bh->b_data, 0, 1 << inode->i_blkbits); - bh->b_bdev = inode->i_sb->s_bdev; + memset(bh->b_data, 0, i_blocksize(inode)); bh->b_blocknr = blocknr; set_buffer_mapped(bh); set_buffer_uptodate(bh); - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); return bh; + +failed: + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); + brelse(bh); + return ERR_PTR(-EIO); } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, int mode, + sector_t pblocknr, blk_opf_t opf, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; - struct inode *inode = NILFS_BTNC_I(btnc); - struct page *page; + struct inode *inode = btnc->host; + struct folio *folio; int err; - bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); if (unlikely(!bh)) return -ENOMEM; err = -EEXIST; /* internal code */ - page = bh->b_page; + folio = bh->b_folio; if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -100,13 +118,13 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, } } - if (mode == READA) { + if (opf & REQ_RAHEAD) { if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { err = -EBUSY; /* internal code */ brelse(bh); goto out_locked; } - } else { /* mode == READ */ + } else { /* opf == REQ_OP_READ */ lock_buffer(bh); } if (buffer_uptodate(bh)) { @@ -115,11 +133,10 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, goto found; } set_buffer_mapped(bh); - bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, bh); + submit_bh(opf, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; @@ -127,8 +144,8 @@ found: *pbh = bh; out_locked: - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); return err; } @@ -142,36 +159,58 @@ out_locked: void nilfs_btnode_delete(struct buffer_head *bh) { struct address_space *mapping; - struct page *page = bh->b_page; - pgoff_t index = page_index(page); + struct folio *folio = bh->b_folio; + pgoff_t index = folio->index; int still_dirty; - page_cache_get(page); - lock_page(page); - wait_on_page_writeback(page); + folio_get(folio); + folio_lock(folio); + folio_wait_writeback(folio); nilfs_forget_buffer(bh); - still_dirty = PageDirty(page); - mapping = page->mapping; - unlock_page(page); - page_cache_release(page); + still_dirty = folio_test_dirty(folio); + mapping = folio->mapping; + folio_unlock(folio); + folio_put(folio); if (!still_dirty && mapping) invalidate_inode_pages2_range(mapping, index, index); } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or one of the following negative error codes on + * failure: + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh, *nbh; - struct inode *inode = NILFS_BTNC_I(btnc); + struct inode *inode = btnc->host; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; int err; @@ -181,89 +220,94 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, obh = ctxt->bh; ctxt->newbh = NULL; - if (inode->i_blkbits == PAGE_CACHE_SHIFT) { - lock_page(obh->b_page); - /* - * We cannot call radix_tree_preload for the kernels older - * than 2.6.23, because it is not exported for modules. - */ + if (inode->i_blkbits == PAGE_SHIFT) { + struct folio *ofolio = obh->b_folio; + folio_lock(ofolio); retry: - err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); - if (err) - goto failed_unlock; - /* BUG_ON(oldkey != obh->b_page->index); */ - if (unlikely(oldkey != obh->b_page->index)) - NILFS_PAGE_BUG(obh->b_page, + /* BUG_ON(oldkey != obh->b_folio->index); */ + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); - spin_lock_irq(&btnc->tree_lock); - err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); - spin_unlock_irq(&btnc->tree_lock); + xa_lock_irq(&btnc->i_pages); + err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS); + xa_unlock_irq(&btnc->i_pages); /* - * Note: page->index will not change to newkey until + * Note: folio->index will not change to newkey until * nilfs_btnode_commit_change_key() will be called. - * To protect the page in intermediate state, the page lock + * To protect the folio in intermediate state, the folio lock * is held. */ - radix_tree_preload_end(); if (!err) return 0; - else if (err != -EEXIST) + else if (err != -EBUSY) goto failed_unlock; err = invalidate_inode_pages2_range(btnc, newkey, newkey); if (!err) goto retry; /* fallback to copy mode */ - unlock_page(obh->b_page); + folio_unlock(ofolio); } nbh = nilfs_btnode_create_block(btnc, newkey); - if (!nbh) - return -ENOMEM; + if (IS_ERR(nbh)) + return PTR_ERR(nbh); BUG_ON(nbh == obh); ctxt->newbh = nbh; return 0; failed_unlock: - unlock_page(obh->b_page); + folio_unlock(obh->b_folio); return err; } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; - struct page *opage; + struct folio *ofolio; if (oldkey == newkey) return; if (nbh == NULL) { /* blocksize == pagesize */ - opage = obh->b_page; - if (unlikely(oldkey != opage->index)) - NILFS_PAGE_BUG(opage, + ofolio = obh->b_folio; + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); mark_buffer_dirty(obh); - spin_lock_irq(&btnc->tree_lock); - radix_tree_delete(&btnc->page_tree, oldkey); - radix_tree_tag_set(&btnc->page_tree, newkey, - PAGECACHE_TAG_DIRTY); - spin_unlock_irq(&btnc->tree_lock); + xa_lock_irq(&btnc->i_pages); + __xa_erase(&btnc->i_pages, oldkey); + __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY); + xa_unlock_irq(&btnc->i_pages); - opage->index = obh->b_blocknr = newkey; - unlock_page(opage); + ofolio->index = obh->b_blocknr = newkey; + folio_unlock(ofolio); } else { nilfs_copy_buffer(nbh, obh); mark_buffer_dirty(nbh); @@ -275,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -288,10 +343,16 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, return; if (nbh == NULL) { /* blocksize == pagesize */ - spin_lock_irq(&btnc->tree_lock); - radix_tree_delete(&btnc->page_tree, newkey); - spin_unlock_irq(&btnc->tree_lock); - unlock_page(ctxt->bh->b_page); - } else - brelse(nbh); + xa_erase_irq(&btnc->i_pages, newkey); + folio_unlock(ctxt->bh->b_folio); + } else { + /* + * When canceling a buffer that a prepare operation has + * allocated to copy a node block to another location, use + * nilfs_btnode_delete() to initialize and release the buffer + * so that the buffer flags will not be in an inconsistent + * state when it is reallocated. + */ + nilfs_btnode_delete(nbh); + } } |
