diff options
Diffstat (limited to 'fs/gfs2')
44 files changed, 3209 insertions, 3277 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 03c966840422..7bd231d16d4a 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config GFS2_FS tristate "GFS2 file system support" + select BUFFER_HEAD select FS_POSIX_ACL select CRC32 - select LIBCRC32C select QUOTACTL select FS_IOMAP help diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3dcde4912413..443640e6fb9c 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -109,7 +109,7 @@ out: return error; } -int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct inode *inode = d_inode(dentry); @@ -135,14 +135,14 @@ int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { - ret = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl); + ret = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl); if (ret) goto unlock; } ret = __gfs2_set_acl(inode, acl, type); if (!ret && mode != inode->i_mode) { - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); inode->i_mode = mode; mark_inode_dirty(inode); } diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index b8de8c148f5c..82f5b09c04e6 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -11,9 +11,9 @@ #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) -extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); -extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, - struct posix_acl *acl, int type); +struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); +int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); +int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, + struct posix_acl *acl, int type); #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e782b4f1d104..e79ad087512a 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -37,27 +37,6 @@ #include "aops.h" -void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int len) -{ - struct buffer_head *head = page_buffers(page); - unsigned int bsize = head->b_size; - struct buffer_head *bh; - unsigned int to = from + len; - unsigned int start, end; - - for (bh = head, start = 0; bh != head || !start; - bh = bh->b_this_page, start = end) { - end = start + bsize; - if (end <= from) - continue; - if (start >= to) - break; - set_buffer_uptodate(bh); - gfs2_trans_add_data(ip->i_gl, bh); - } -} - /** * gfs2_get_block_noalloc - Fills in a buffer head with details about a block * @inode: The inode @@ -82,90 +61,90 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, } /** - * gfs2_write_jdata_page - gfs2 jdata-specific version of block_write_full_page - * @page: The page to write + * gfs2_write_jdata_folio - gfs2 jdata-specific version of block_write_full_folio + * @folio: The folio to write * @wbc: The writeback control * - * This is the same as calling block_write_full_page, but it also + * This is the same as calling block_write_full_folio, but it also * writes pages outside of i_size */ -static int gfs2_write_jdata_page(struct page *page, +static int gfs2_write_jdata_folio(struct folio *folio, struct writeback_control *wbc) { - struct inode * const inode = page->mapping->host; + struct inode * const inode = folio->mapping->host; loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_SHIFT; - unsigned offset; /* - * The page straddles i_size. It must be zeroed out on each and every + * The folio straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and + * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - offset = i_size & (PAGE_SIZE - 1); - if (page->index == end_index && offset) - zero_user_segment(page, offset, PAGE_SIZE); + if (folio_pos(folio) < i_size && i_size < folio_next_pos(folio)) + folio_zero_segment(folio, offset_in_folio(folio, i_size), + folio_size(folio)); - return __block_write_full_page(inode, page, gfs2_get_block_noalloc, wbc, - end_buffer_async_write); + return __block_write_full_folio(inode, folio, gfs2_get_block_noalloc, + wbc); } /** - * __gfs2_jdata_writepage - The core of jdata writepage - * @page: The page to write + * __gfs2_jdata_write_folio - The core of jdata writepage + * @folio: The folio to write * @wbc: The writeback control * - * This is shared between writepage and writepages and implements the - * core of the writepage operation. If a transaction is required then - * PageChecked will have been set and the transaction will have + * Implements the core of write back. If a transaction is required then + * the checked flag will have been set and the transaction will have * already been started before this is called. */ - -static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +static int __gfs2_jdata_write_folio(struct folio *folio, + struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - if (PageChecked(page)) { - ClearPageChecked(page); - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - BIT(BH_Dirty)|BIT(BH_Uptodate)); + if (folio_test_checked(folio)) { + folio_clear_checked(folio); + if (!folio_buffers(folio)) { + create_empty_buffers(folio, + inode->i_sb->s_blocksize, + BIT(BH_Dirty)|BIT(BH_Uptodate)); } - gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize); + gfs2_trans_add_databufs(ip->i_gl, folio, 0, folio_size(folio)); } - return gfs2_write_jdata_page(page, wbc); + return gfs2_write_jdata_folio(folio, wbc); } /** - * gfs2_jdata_writepage - Write complete page - * @page: Page to write + * gfs2_jdata_writeback - Write jdata folios to the log + * @mapping: The mapping to write * @wbc: The writeback control * * Returns: errno - * */ - -static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) +int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + struct folio *folio = NULL; + int error; - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) - goto out; - if (PageChecked(page) || current->journal_info) - goto out_ignore; - return __gfs2_jdata_writepage(page, wbc); + BUG_ON(current->journal_info); + if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE)) + return 0; -out_ignore: - redirty_page_for_writepage(wbc, page); -out: - unlock_page(page); - return 0; + while ((folio = writeback_iter(mapping, wbc, folio, &error))) { + if (folio_test_checked(folio)) { + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); + continue; + } + error = __gfs2_jdata_write_folio(folio, wbc); + } + + return error; } /** @@ -179,99 +158,99 @@ static int gfs2_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); - struct iomap_writepage_ctx wpc = { }; + struct iomap_writepage_ctx wpc = { + .inode = mapping->host, + .wbc = wbc, + .ops = &gfs2_writeback_ops, + }; int ret; /* - * Even if we didn't write any pages here, we might still be holding + * Even if we didn't write enough pages here, we might still be holding * dirty pages in the ail. We forcibly flush the ail because we don't * want balance_dirty_pages() to loop indefinitely trying to write out * pages held in the ail that it can't find. */ - ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops); - if (ret == 0) + ret = iomap_writepages(&wpc); + if (ret == 0 && wbc->nr_to_write > 0) set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags); return ret; } /** - * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages + * gfs2_write_jdata_batch - Write back a folio batch's worth of folios * @mapping: The mapping * @wbc: The writeback control - * @pvec: The vector of pages - * @nr_pages: The number of pages to write + * @fbatch: The batch of folios * @done_index: Page index * * Returns: non-zero if loop should terminate, zero otherwise */ -static int gfs2_write_jdata_pagevec(struct address_space *mapping, +static int gfs2_write_jdata_batch(struct address_space *mapping, struct writeback_control *wbc, - struct pagevec *pvec, - int nr_pages, + struct folio_batch *fbatch, pgoff_t *done_index) { struct inode *inode = mapping->host; struct gfs2_sbd *sdp = GFS2_SB(inode); - unsigned nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits); + unsigned nrblocks; int i; int ret; + size_t size = 0; + int nr_folios = folio_batch_count(fbatch); + + for (i = 0; i < nr_folios; i++) + size += folio_size(fbatch->folios[i]); + nrblocks = size >> inode->i_blkbits; ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); if (ret < 0) return ret; - for(i = 0; i < nr_pages; i++) { - struct page *page = pvec->pages[i]; + for (i = 0; i < nr_folios; i++) { + struct folio *folio = fbatch->folios[i]; - *done_index = page->index; + *done_index = folio->index; - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != mapping)) { + if (unlikely(folio->mapping != mapping)) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (!PageDirty(page)) { + if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } - if (PageWriteback(page)) { + if (folio_test_writeback(folio)) { if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); + folio_wait_writeback(folio); else goto continue_unlock; } - BUG_ON(PageWriteback(page)); - if (!clear_page_dirty_for_io(page)) + BUG_ON(folio_test_writeback(folio)); + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; trace_wbc_writepage(wbc, inode_to_bdi(inode)); - ret = __gfs2_jdata_writepage(page, wbc); + ret = __gfs2_jdata_write_folio(folio, wbc); if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); - ret = 0; - } else { - - /* - * done_index is set past this page, - * so media errors will not choke - * background writeout for the entire - * file. This has consequences for - * range_cyclic semantics (ie. it may - * not be suitable for data integrity - * writeout). - */ - *done_index = page->index + 1; - ret = 1; - break; - } + /* + * done_index is set past this page, so media errors + * will not choke background writeout for the entire + * file. This has consequences for range_cyclic + * semantics (ie. it may not be suitable for data + * integrity writeout). + */ + *done_index = folio_next_index(folio); + ret = 1; + break; } /* @@ -305,8 +284,8 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, { int ret = 0; int done = 0; - struct pagevec pvec; - int nr_pages; + struct folio_batch fbatch; + int nr_folios; pgoff_t writeback_index; pgoff_t index; pgoff_t end; @@ -315,7 +294,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int range_whole = 0; xa_mark_t tag; - pagevec_init(&pvec); + folio_batch_init(&fbatch); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -331,27 +310,25 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ } - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) - tag = PAGECACHE_TAG_TOWRITE; - else - tag = PAGECACHE_TAG_DIRTY; + tag = wbc_to_tag(wbc); retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { - nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, - tag); - if (nr_pages == 0) + nr_folios = filemap_get_folios_tag(mapping, &index, end, + tag, &fbatch); + if (nr_folios == 0) break; - ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index); + ret = gfs2_write_jdata_batch(mapping, wbc, &fbatch, + &done_index); if (ret) done = 1; if (ret > 0) ret = 0; - pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } @@ -398,43 +375,39 @@ static int gfs2_jdata_writepages(struct address_space *mapping, } /** - * stuffed_readpage - Fill in a Linux page with stuffed file data + * stuffed_read_folio - Fill in a Linux folio with stuffed file data * @ip: the inode - * @page: the page + * @folio: the folio * * Returns: errno */ -static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +static int stuffed_read_folio(struct gfs2_inode *ip, struct folio *folio) { - struct buffer_head *dibh; - u64 dsize = i_size_read(&ip->i_inode); - void *kaddr; - int error; + struct buffer_head *dibh = NULL; + size_t dsize = i_size_read(&ip->i_inode); + void *from = NULL; + int error = 0; /* * Due to the order of unstuffing files and ->fault(), we can be - * asked for a zero page in the case of a stuffed file being extended, + * asked for a zero folio in the case of a stuffed file being extended, * so we need to supply one here. It doesn't happen often. */ - if (unlikely(page->index)) { - zero_user(page, 0, PAGE_SIZE); - SetPageUptodate(page); - return 0; + if (unlikely(folio->index)) { + dsize = 0; + } else { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + from = dibh->b_data + sizeof(struct gfs2_dinode); } - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - kaddr = kmap_atomic(page); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); - memset(kaddr + dsize, 0, PAGE_SIZE - dsize); - kunmap_atomic(kaddr); - flush_dcache_page(page); + folio_fill_tail(folio, 0, from, dsize); brelse(dibh); - SetPageUptodate(page); +out: + folio_end_read(folio, error == 0); - return 0; + return error; } /** @@ -447,19 +420,18 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) struct inode *inode = folio->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - int error; + int error = 0; if (!gfs2_is_jdata(ip) || (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) { - error = iomap_read_folio(folio, &gfs2_iomap_ops); + iomap_bio_read_folio(folio, &gfs2_iomap_ops); } else if (gfs2_is_stuffed(ip)) { - error = stuffed_readpage(ip, &folio->page); - folio_unlock(folio); + error = stuffed_read_folio(ip, folio); } else { error = mpage_read_folio(folio, gfs2_block_map); } - if (unlikely(gfs2_withdrawn(sdp))) + if (gfs2_withdrawn(sdp)) return -EIO; return error; @@ -474,31 +446,29 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) * */ -int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, - unsigned size) +ssize_t gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, + size_t size) { struct address_space *mapping = ip->i_inode.i_mapping; unsigned long index = *pos >> PAGE_SHIFT; - unsigned offset = *pos & (PAGE_SIZE - 1); - unsigned copied = 0; - unsigned amt; - struct page *page; - void *p; + size_t copied = 0; do { - amt = size - copied; - if (offset + size > PAGE_SIZE) - amt = PAGE_SIZE - offset; - page = read_cache_page(mapping, index, gfs2_read_folio, NULL); - if (IS_ERR(page)) - return PTR_ERR(page); - p = kmap_atomic(page); - memcpy(buf + copied, p + offset, amt); - kunmap_atomic(p); - put_page(page); - copied += amt; - index++; - offset = 0; + size_t offset, chunk; + struct folio *folio; + + folio = read_cache_folio(mapping, index, gfs2_read_folio, NULL); + if (IS_ERR(folio)) { + if (PTR_ERR(folio) == -EINTR) + continue; + return PTR_ERR(folio); + } + offset = *pos + copied - folio_pos(folio); + chunk = min(size - copied, folio_size(folio) - offset); + memcpy_from_folio(buf + copied, folio, offset, chunk); + index = folio_next_index(folio); + folio_put(folio); + copied += chunk; } while(copied < size); (*pos) += size; return size; @@ -529,7 +499,7 @@ static void gfs2_readahead(struct readahead_control *rac) else if (gfs2_is_jdata(ip)) mpage_readahead(rac, gfs2_block_map); else - iomap_readahead(rac, &gfs2_iomap_ops); + iomap_bio_readahead(rac, &gfs2_iomap_ops); } /** @@ -572,7 +542,7 @@ out: gfs2_trans_end(sdp); } -static bool jdata_dirty_folio(struct address_space *mapping, +static bool gfs2_jdata_dirty_folio(struct address_space *mapping, struct folio *folio) { if (current->journal_info) @@ -741,27 +711,26 @@ static const struct address_space_operations gfs2_aops = { .writepages = gfs2_writepages, .read_folio = gfs2_read_folio, .readahead = gfs2_readahead, - .dirty_folio = filemap_dirty_folio, + .dirty_folio = iomap_dirty_folio, .release_folio = iomap_release_folio, .invalidate_folio = iomap_invalidate_folio, .bmap = gfs2_bmap, - .direct_IO = noop_direct_IO, .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, }; static const struct address_space_operations gfs2_jdata_aops = { - .writepage = gfs2_jdata_writepage, .writepages = gfs2_jdata_writepages, .read_folio = gfs2_read_folio, .readahead = gfs2_readahead, - .dirty_folio = jdata_dirty_folio, + .dirty_folio = gfs2_jdata_dirty_folio, .bmap = gfs2_bmap, + .migrate_folio = buffer_migrate_folio, .invalidate_folio = gfs2_invalidate_folio, .release_folio = gfs2_release_folio, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, }; void gfs2_set_aops(struct inode *inode) diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index ff9877a68780..bf002522a782 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -8,8 +8,7 @@ #include "incore.h" -extern void adjust_fs_space(struct inode *inode); -extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, - unsigned int from, unsigned int len); +void adjust_fs_space(struct inode *inode); +int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc); #endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e7537fd305dd..131091520de6 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -43,53 +43,51 @@ struct metapath { static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); /** - * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page + * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio * @ip: the inode * @dibh: the dinode buffer * @block: the block number that was allocated - * @page: The (optional) page. This is looked up if @page is NULL + * @folio: The folio. * * Returns: errno */ - -static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, - u64 block, struct page *page) +static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh, + u64 block, struct folio *folio) { struct inode *inode = &ip->i_inode; - if (!PageUptodate(page)) { - void *kaddr = kmap(page); + if (!folio_test_uptodate(folio)) { + void *kaddr = kmap_local_folio(folio, 0); u64 dsize = i_size_read(inode); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); - memset(kaddr + dsize, 0, PAGE_SIZE - dsize); - kunmap(page); + memset(kaddr + dsize, 0, folio_size(folio) - dsize); + kunmap_local(kaddr); - SetPageUptodate(page); + folio_mark_uptodate(folio); } if (gfs2_is_jdata(ip)) { - struct buffer_head *bh; + struct buffer_head *bh = folio_buffers(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, BIT(inode->i_blkbits), - BIT(BH_Uptodate)); + if (!bh) + bh = create_empty_buffers(folio, + BIT(inode->i_blkbits), BIT(BH_Uptodate)); - bh = page_buffers(page); if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, block); set_buffer_uptodate(bh); gfs2_trans_add_data(ip->i_gl, bh); } else { - set_page_dirty(page); + folio_mark_dirty(folio); gfs2_ordered_add_inode(ip); } return 0; } -static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page) +static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio) { struct buffer_head *bh, *dibh; struct gfs2_dinode *di; @@ -106,7 +104,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page) and write it out to disk */ unsigned int n = 1; - error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) goto out_brelse; if (isdir) { @@ -118,7 +116,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page) dibh, sizeof(struct gfs2_dinode)); brelse(bh); } else { - error = gfs2_unstuffer_page(ip, dibh, block, page); + error = gfs2_unstuffer_folio(ip, dibh, block, folio); if (error) goto out_brelse; } @@ -157,17 +155,17 @@ out_brelse: int gfs2_unstuff_dinode(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; - struct page *page; + struct folio *folio; int error; down_write(&ip->i_rw_mutex); - page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); - error = -ENOMEM; - if (!page) + folio = filemap_grab_folio(inode->i_mapping, 0); + error = PTR_ERR(folio); + if (IS_ERR(folio)) goto out; - error = __gfs2_unstuff_inode(ip, page); - unlock_page(page); - put_page(page); + error = __gfs2_unstuff_inode(ip, folio); + folio_unlock(folio); + folio_put(folio); out: up_write(&ip->i_rw_mutex); return error; @@ -317,6 +315,12 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) } } +static inline struct buffer_head * +metapath_dibh(struct metapath *mp) +{ + return mp->mp_bh[0]; +} + static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, unsigned int x, unsigned int h) { @@ -415,13 +419,12 @@ static void release_metapath(struct metapath *mp) * gfs2_extent_length - Returns length of an extent of blocks * @bh: The metadata block * @ptr: Current position in @bh - * @limit: Max extent length to return * @eob: Set to 1 if we hit "end of block" * * Returns: The length of the extent (minimum of one block) */ -static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) +static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob) { const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; @@ -660,7 +663,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct buffer_head *dibh = mp->mp_bh[0]; + struct buffer_head *dibh = metapath_dibh(mp); u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; size_t dblks = iomap->length >> inode->i_blkbits; @@ -702,7 +705,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, i = mp->mp_aheight; do { n = blks - alloced; - ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); + ret = gfs2_alloc_blocks(ip, &bn, &n, 0); if (ret) goto out; alloced += n; @@ -913,7 +916,7 @@ unstuff: goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; - len = gfs2_extent_length(bh, ptr, len, &eob); + len = gfs2_extent_length(bh, ptr, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; @@ -956,36 +959,56 @@ hole_found: goto out; } -static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, - unsigned len) +static struct folio * +gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) { + struct inode *inode = iter->inode; + struct gfs2_inode *ip = GFS2_I(inode); unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int blocks; + struct folio *folio; + int status; + + if (!gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) + return iomap_get_folio(iter, pos, len); blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; - return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); + status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); + if (status) + return ERR_PTR(status); + + folio = iomap_get_folio(iter, pos, len); + if (IS_ERR(folio)) + gfs2_trans_end(sdp); + return folio; } -static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, - unsigned copied, struct page *page) +static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, + unsigned copied, struct folio *folio) { struct gfs2_trans *tr = current->journal_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - if (page && !gfs2_is_stuffed(ip)) - gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); + if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) + gfs2_trans_add_databufs(ip->i_gl, folio, + offset_in_folio(folio, pos), + copied); - if (tr->tr_num_buf_new) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + folio_unlock(folio); + folio_put(folio); - gfs2_trans_end(sdp); + if (gfs2_is_jdata(ip) || gfs2_is_stuffed(ip)) { + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + gfs2_trans_end(sdp); + } } -static const struct iomap_page_ops gfs2_iomap_page_ops = { - .page_prepare = gfs2_iomap_page_prepare, - .page_done = gfs2_iomap_page_done, +const struct iomap_write_ops gfs2_iomap_write_ops = { + .get_folio = gfs2_iomap_get_folio, + .put_folio = gfs2_iomap_put_folio, }; static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, @@ -1060,8 +1083,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, gfs2_trans_end(sdp); } - if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) - iomap->page_ops = &gfs2_iomap_page_ops; return 0; out_trans_end: @@ -1277,13 +1298,16 @@ int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, /* * NOTE: Never call gfs2_block_zero_range with an open transaction because it * uses iomap write to perform its actions, which begin their own transactions - * (iomap_begin, page_prepare, etc.) + * (iomap_begin, get_folio, etc.) */ -static int gfs2_block_zero_range(struct inode *inode, loff_t from, - unsigned int length) +static int gfs2_block_zero_range(struct inode *inode, loff_t from, loff_t length) { BUG_ON(current->journal_info); - return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); + if (from >= inode->i_size) + return 0; + length = min(length, inode->i_size - from); + return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops, + &gfs2_iomap_write_ops, NULL); } #define GFS2_JTRUNC_REVOKES 8192 @@ -1372,7 +1396,7 @@ static int trunc_start(struct inode *inode, u64 newsize) ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; i_size_write(inode, newsize); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_dinode_out(ip, dibh->b_data); if (journaled) @@ -1569,8 +1593,7 @@ out_unlock: /* Every transaction boundary, we rewrite the dinode to keep its di_blocks current in case of failure. */ - ip->i_inode.i_mtime = ip->i_inode.i_ctime = - current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1702,7 +1725,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; @@ -1713,10 +1737,10 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* - * The starting point lies beyond the allocated meta-data; - * there are no blocks do deallocate. + * The starting point lies beyond the allocated metadata; + * there are no blocks to deallocate. */ return 0; } @@ -1810,7 +1834,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, prev_bnr != bh->b_blocknr)) { - fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u," + fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, " "s_h:%u, mp_h:%u\n", (unsigned long long)ip->i_no_addr, prev_bnr, ip->i_height, strip_h, mp_h); @@ -1936,7 +1960,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) gfs2_statfs_change(sdp, 0, +btotal, 0); gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, ip->i_inode.i_gid); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); @@ -1979,7 +2003,7 @@ static int trunc_end(struct gfs2_inode *ip) gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); gfs2_ordered_del_inode(ip); } - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_meta(ip->i_gl, dibh); @@ -2021,14 +2045,6 @@ static int do_shrink(struct inode *inode, u64 newsize) return error; } -void gfs2_trim_blocks(struct inode *inode) -{ - int ret; - - ret = do_shrink(inode, inode->i_size); - WARN_ON(ret != 0); -} - /** * do_grow - Touch and update inode size * @inode: The inode @@ -2088,7 +2104,7 @@ static int do_grow(struct inode *inode, u64 size) goto do_end_trans; truncate_setsize(inode, size); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -2456,23 +2472,26 @@ out: return error; } -static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset) +static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, unsigned int len, u64 end_pos) { - int ret; - - if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode)))) + if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode)))) return -EIO; - if (offset >= wpc->iomap.offset && - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; + if (offset < wpc->iomap.offset || + offset >= wpc->iomap.offset + wpc->iomap.length) { + int ret; - memset(&wpc->iomap, 0, sizeof(wpc->iomap)); - ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap); - return ret; + memset(&wpc->iomap, 0, sizeof(wpc->iomap)); + ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap); + if (ret) + return ret; + } + + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); } const struct iomap_writeback_ops gfs2_writeback_ops = { - .map_blocks = gfs2_map_blocks, + .writeback_range = gfs2_writeback_range, + .writeback_submit = iomap_ioend_writeback_submit, }; diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 53cce6c08e81..6cdc72dd55a3 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -44,27 +44,27 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, } extern const struct iomap_ops gfs2_iomap_ops; +extern const struct iomap_write_ops gfs2_iomap_write_ops; extern const struct iomap_writeback_ops gfs2_writeback_ops; -extern int gfs2_unstuff_dinode(struct gfs2_inode *ip); -extern int gfs2_block_map(struct inode *inode, sector_t lblock, - struct buffer_head *bh, int create); -extern int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, - struct iomap *iomap); -extern int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, - struct iomap *iomap); -extern int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, - unsigned int *extlen); -extern int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, - unsigned *extlen, bool *new); -extern int gfs2_setattr_size(struct inode *inode, u64 size); -extern void gfs2_trim_blocks(struct inode *inode); -extern int gfs2_truncatei_resume(struct gfs2_inode *ip); -extern int gfs2_file_dealloc(struct gfs2_inode *ip); -extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, - unsigned int len); -extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); -extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); -extern int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length); +int gfs2_unstuff_dinode(struct gfs2_inode *ip); +int gfs2_block_map(struct inode *inode, sector_t lblock, + struct buffer_head *bh, int create); +int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, + struct iomap *iomap); +int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, + struct iomap *iomap); +int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, + unsigned int *extlen); +int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, + unsigned *extlen, bool *new); +int gfs2_setattr_size(struct inode *inode, u64 size); +int gfs2_truncatei_resume(struct gfs2_inode *ip); +int gfs2_file_dealloc(struct gfs2_inode *ip); +int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, + unsigned int len); +int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); +void gfs2_free_journal_extents(struct gfs2_jdesc *jd); +int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length); #endif /* __BMAP_DOT_H__ */ diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 2e215e8c3c88..95050e719233 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -21,7 +21,9 @@ /** * gfs2_drevalidate - Check directory lookup consistency - * @dentry: the mapping to check + * @dir: expected parent directory inode + * @name: expexted name + * @dentry: dentry to check * @flags: lookup flags * * Check to make sure the lookup necessary to arrive at this inode from its @@ -30,50 +32,43 @@ * Returns: 1 if the dentry is ok, 0 if it isn't */ -static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) +static int gfs2_drevalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct dentry *parent; - struct gfs2_sbd *sdp; - struct gfs2_inode *dip; + struct gfs2_sbd *sdp = GFS2_SB(dir); + struct gfs2_inode *dip = GFS2_I(dir); struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; - int error, valid = 0; + int error, valid; int had_lock = 0; if (flags & LOOKUP_RCU) return -ECHILD; - parent = dget_parent(dentry); - sdp = GFS2_SB(d_inode(parent)); - dip = GFS2_I(d_inode(parent)); inode = d_inode(dentry); if (inode) { if (is_bad_inode(inode)) - goto out; + return 0; ip = GFS2_I(inode); } - if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) { - valid = 1; - goto out; - } + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + return 1; had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); if (!had_lock) { error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) - goto out; + return 0; } - error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip); + error = gfs2_dir_check(dir, name, ip); valid = inode ? !error : (error == -ENOENT); if (!had_lock) gfs2_glock_dq_uninit(&d_gh); -out: - dput(parent); return valid; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 54a6d17b8c25..509e2f0d97e7 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -60,6 +60,7 @@ #include <linux/crc32.h> #include <linux/vmalloc.h> #include <linux/bio.h> +#include <linux/log2.h> #include "gfs2.h" #include "incore.h" @@ -130,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_inode.i_size < offset + size) i_size_write(&ip->i_inode, offset + size); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -227,7 +228,7 @@ out: if (ip->i_inode.i_size < offset + copied) i_size_write(&ip->i_inode, offset + copied); - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); @@ -562,15 +563,18 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, int ret = 0; ret = gfs2_dirent_offset(GFS2_SB(inode), buf); - if (ret < 0) - goto consist_inode; - + if (ret < 0) { + gfs2_consist_inode(GFS2_I(inode)); + return ERR_PTR(-EIO); + } offset = ret; prev = NULL; dent = buf + offset; size = be16_to_cpu(dent->de_rec_len); - if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1)) - goto consist_inode; + if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1)) { + gfs2_consist_inode(GFS2_I(inode)); + return ERR_PTR(-EIO); + } do { ret = scan(dent, name, opaque); if (ret) @@ -582,8 +586,10 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, dent = buf + offset; size = be16_to_cpu(dent->de_rec_len); if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, - len, 0)) - goto consist_inode; + len, 0)) { + gfs2_consist_inode(GFS2_I(inode)); + return ERR_PTR(-EIO); + } } while(1); switch(ret) { @@ -597,10 +603,6 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, BUG_ON(ret > 0); return ERR_PTR(ret); } - -consist_inode: - gfs2_consist_inode(GFS2_I(inode)); - return ERR_PTR(-EIO); } static int dirent_check_reclen(struct gfs2_inode *dip, @@ -609,14 +611,16 @@ static int dirent_check_reclen(struct gfs2_inode *dip, const void *ptr = d; u16 rec_len = be16_to_cpu(d->de_rec_len); - if (unlikely(rec_len < sizeof(struct gfs2_dirent))) - goto broken; + if (unlikely(rec_len < sizeof(struct gfs2_dirent))) { + gfs2_consist_inode(dip); + return -EIO; + } ptr += rec_len; if (ptr < end_p) return rec_len; if (ptr == end_p) return -ENOENT; -broken: + gfs2_consist_inode(dip); return -EIO; } @@ -868,7 +872,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct gfs2_dirent *dent; struct timespec64 tv = current_time(inode); - error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &bn, &n, 0); if (error) return NULL; bh = gfs2_meta_new(ip->i_gl, bn); @@ -909,7 +913,6 @@ static int dir_make_exhash(struct inode *inode) struct qstr args; struct buffer_head *bh, *dibh; struct gfs2_leaf *leaf; - int y; u32 x; __be64 *lp; u64 bn; @@ -976,9 +979,7 @@ static int dir_make_exhash(struct inode *inode) i_size_write(inode, sdp->sd_sb.sb_bsize / 2); gfs2_add_inode_blocks(&dip->i_inode, 1); dip->i_diskflags |= GFS2_DIF_EXHASH; - - for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; - dip->i_depth = y; + dip->i_depth = ilog2(sdp->sd_hash_ptrs); gfs2_dinode_out(dip, dibh->b_data); @@ -1814,7 +1815,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); - tv = current_time(&ip->i_inode); + tv = inode_set_ctime_current(&ip->i_inode); if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); @@ -1825,7 +1826,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, da->bh = NULL; brelse(bh); ip->i_entries++; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; + inode_set_mtime_to_ts(&ip->i_inode, tv); if (S_ISDIR(nip->i_inode.i_mode)) inc_nlink(&ip->i_inode); mark_inode_dirty(inode); @@ -1876,7 +1877,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; - struct timespec64 tv = current_time(&dip->i_inode); + struct timespec64 tv; /* Returns _either_ the entry (if its first in block) or the previous entry otherwise */ @@ -1896,6 +1897,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) } dirent_del(dip, bh, prev, dent); + tv = inode_set_ctime_current(&dip->i_inode); if (dip->i_diskflags & GFS2_DIF_EXHASH) { struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; u16 entries = be16_to_cpu(leaf->lf_entries); @@ -1910,7 +1912,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) if (!dip->i_entries) gfs2_consist_inode(dip); dip->i_entries--; - dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; + inode_set_mtime_to_ts(&dip->i_inode, tv); if (d_is_dir(dentry)) drop_nlink(&dip->i_inode); mark_inode_dirty(&dip->i_inode); @@ -1951,7 +1953,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, dent->de_type = cpu_to_be16(new_type); brelse(bh); - dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode); + inode_set_mtime_to_ts(&dip->i_inode, inode_set_ctime_current(&dip->i_inode)); mark_inode_dirty_sync(&dip->i_inode); return 0; } diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 5b76480c17c9..25a857c78b53 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -23,32 +23,32 @@ struct gfs2_diradd { int save_loc; }; -extern struct inode *gfs2_dir_search(struct inode *dir, - const struct qstr *filename, - bool fail_on_exist); -extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, - const struct gfs2_inode *ip); -extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip, struct gfs2_diradd *da); +struct inode *gfs2_dir_search(struct inode *dir, + const struct qstr *filename, + bool fail_on_exist); +int gfs2_dir_check(struct inode *dir, const struct qstr *filename, + const struct gfs2_inode *ip); +int gfs2_dir_add(struct inode *inode, const struct qstr *filename, + const struct gfs2_inode *ip, struct gfs2_diradd *da); static inline void gfs2_dir_no_add(struct gfs2_diradd *da) { brelse(da->bh); da->bh = NULL; } -extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); -extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, - struct file_ra_state *f_ra); -extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - const struct gfs2_inode *nip, unsigned int new_type); +int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); +int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, + struct file_ra_state *f_ra); +int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, + const struct gfs2_inode *nip, unsigned int new_type); -extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); +int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); -extern int gfs2_diradd_alloc_required(struct inode *dir, - const struct qstr *filename, - struct gfs2_diradd *da); -extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, - struct buffer_head **bhp); -extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); +int gfs2_diradd_alloc_required(struct inode *dir, + const struct qstr *filename, + struct gfs2_diradd *da); +int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, + struct buffer_head **bhp); +void gfs2_dir_hash_inval(struct gfs2_inode *ip); static inline u32 gfs2_disk_hash(const char *data, int len) { diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index cf40895233f5..3334c394ce9c 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -138,8 +138,6 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, return ERR_PTR(-ESTALE); inode = gfs2_lookup_by_inum(sdp, inum->no_addr, inum->no_formal_ino, GFS2_BLKST_DINODE); - if (IS_ERR(inode)) - return ERR_CAST(inode); return d_obtain_alias(inode); } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index eea5be4fbf0e..b2d23c98c996 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/filelock.h> #include <linux/gfs2_ondisk.h> #include <linux/falloc.h> #include <linux/swap.h> @@ -154,7 +155,7 @@ static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags) return fsflags; } -int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa) +int gfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); @@ -235,7 +236,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) goto out; if (!IS_IMMUTABLE(inode)) { - error = gfs2_permission(&init_user_ns, inode, MAY_WRITE); + error = gfs2_permission(&nop_mnt_idmap, inode, MAY_WRITE); if (error) goto out; } @@ -250,6 +251,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) error = filemap_fdatawait(inode->i_mapping); if (error) goto out; + truncate_inode_pages(inode->i_mapping, 0); if (new_flags & GFS2_DIF_JDATA) gfs2_ordered_del_inode(ip); } @@ -259,7 +261,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_trans_end; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); gfs2_trans_add_meta(ip->i_gl, bh); ip->i_diskflags = new_flags; gfs2_dinode_out(ip, bh->b_data); @@ -273,8 +275,8 @@ out: return error; } -int gfs2_fileattr_set(struct user_namespace *mnt_userns, - struct dentry *dentry, struct fileattr *fa) +int gfs2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); u32 fsflags = fa->flags, gfsflags = 0; @@ -375,23 +377,23 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) } /** - * gfs2_allocate_page_backing - Allocate blocks for a write fault - * @page: The (locked) page to allocate backing for + * gfs2_allocate_folio_backing - Allocate blocks for a write fault + * @folio: The (locked) folio to allocate backing for * @length: Size of the allocation * - * We try to allocate all the blocks required for the page in one go. This + * We try to allocate all the blocks required for the folio in one go. This * might fail for various reasons, so we keep trying until all the blocks to - * back this page are allocated. If some of the blocks are already allocated, + * back this folio are allocated. If some of the blocks are already allocated, * that is ok too. */ -static int gfs2_allocate_page_backing(struct page *page, unsigned int length) +static int gfs2_allocate_folio_backing(struct folio *folio, size_t length) { - u64 pos = page_offset(page); + u64 pos = folio_pos(folio); do { struct iomap iomap = { }; - if (gfs2_iomap_alloc(page->mapping->host, pos, length, &iomap)) + if (gfs2_iomap_alloc(folio->mapping->host, pos, length, &iomap)) return -EIO; if (length < iomap.length) @@ -413,16 +415,16 @@ static int gfs2_allocate_page_backing(struct page *page, unsigned int length) static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; - u64 offset = page_offset(page); + struct gfs2_alloc_parms ap = {}; + u64 pos = folio_pos(folio); unsigned int data_blocks, ind_blocks, rblocks; vm_fault_t ret = VM_FAULT_LOCKED; struct gfs2_holder gh; - unsigned int length; + size_t length; loff_t size; int err; @@ -431,27 +433,27 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); err = gfs2_glock_nq(&gh); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_uninit; } - /* Check page index against inode size */ + /* Check folio index against inode size */ size = i_size_read(inode); - if (offset >= size) { + if (pos >= size) { ret = VM_FAULT_SIGBUS; goto out_unlock; } - /* Update file times before taking page lock */ + /* Update file times before taking folio lock */ file_update_time(vmf->vma->vm_file); - /* page is wholly or partially inside EOF */ - if (size - offset < PAGE_SIZE) - length = size - offset; + /* folio is wholly or partially inside EOF */ + if (size - pos < folio_size(folio)) + length = size - pos; else - length = PAGE_SIZE; + length = folio_size(folio); - gfs2_size_hint(vmf->vma->vm_file, offset, length); + gfs2_size_hint(vmf->vma->vm_file, pos, length); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); @@ -462,18 +464,19 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) */ if (!gfs2_is_stuffed(ip) && - !gfs2_write_alloc_required(ip, offset, length)) { - lock_page(page); - if (!PageUptodate(page) || page->mapping != inode->i_mapping) { + !gfs2_write_alloc_required(ip, pos, length)) { + folio_lock(folio); + if (!folio_test_uptodate(folio) || + folio->mapping != inode->i_mapping) { ret = VM_FAULT_NOPAGE; - unlock_page(page); + folio_unlock(folio); } goto out_unlock; } err = gfs2_rindex_update(sdp); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_unlock; } @@ -481,12 +484,12 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) ap.target = data_blocks + ind_blocks; err = gfs2_quota_lock_check(ip, &ap); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_unlock; } err = gfs2_inplace_reserve(ip, &ap); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_quota_unlock; } @@ -499,35 +502,35 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) } err = gfs2_trans_begin(sdp, rblocks, 0); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_trans_fail; } - /* Unstuff, if required, and allocate backing blocks for page */ + /* Unstuff, if required, and allocate backing blocks for folio */ if (gfs2_is_stuffed(ip)) { err = gfs2_unstuff_dinode(ip); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_trans_end; } } - lock_page(page); + folio_lock(folio); /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ - if (!PageUptodate(page) || page->mapping != inode->i_mapping) { + if (!folio_test_uptodate(folio) || folio->mapping != inode->i_mapping) { ret = VM_FAULT_NOPAGE; goto out_page_locked; } - err = gfs2_allocate_page_backing(page, length); + err = gfs2_allocate_folio_backing(folio, length); if (err) - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); out_page_locked: if (ret != VM_FAULT_LOCKED) - unlock_page(page); + folio_unlock(folio); out_trans_end: gfs2_trans_end(sdp); out_trans_fail: @@ -539,8 +542,8 @@ out_unlock: out_uninit: gfs2_holder_uninit(&gh); if (ret == VM_FAULT_LOCKED) { - set_page_dirty(page); - wait_for_stable_page(page); + folio_mark_dirty(folio); + folio_wait_stable(folio); } sb_end_pagefault(inode->i_sb); return ret; @@ -557,7 +560,7 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf) gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); err = gfs2_glock_nq(&gh); if (err) { - ret = block_page_mkwrite_return(err); + ret = vmf_fs_error(err); goto out_uninit; } ret = filemap_fault(vmf); @@ -629,6 +632,9 @@ int gfs2_open_common(struct inode *inode, struct file *file) ret = generic_file_open(inode, file); if (ret) return ret; + + if (!gfs2_is_jdata(GFS2_I(inode))) + file->f_mode |= FMODE_CAN_ODIRECT; } fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); @@ -738,7 +744,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - int sync_state = inode->i_state & I_DIRTY; + int sync_state = inode_state_read_once(inode) & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; @@ -783,9 +789,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i, if (!user_backed_iter(i)) return false; + /* + * Try to fault in multiple pages initially. When that doesn't result + * in any progress, fall back to a single page. + */ size = PAGE_SIZE; offs = offset_in_page(iocb->ki_pos); - if (*prev_count != count || !*window_size) { + if (*prev_count != count) { size_t nr_dirtied; nr_dirtied = max(current->nr_dirtied_pause - @@ -810,7 +820,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate - * that the inode glock may be dropped, fault in the pages manually, + * that the inode glock should be dropped, fault in the pages manually, * and retry. * * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger @@ -869,12 +879,13 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; size_t written = 0; + bool enough_retries; ssize_t ret; /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate - * that the inode glock may be dropped, fault in the pages manually, + * that the inode glock should be dropped, fault in the pages manually, * and retry. * * For writes, iomap_dio_rw only triggers manual page faults, so we @@ -912,11 +923,17 @@ retry: if (ret > 0) written = ret; + enough_retries = prev_count == iov_iter_count(from) && + window_size <= PAGE_SIZE; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_readable(from, window_size); - if (window_size) - goto retry; + if (window_size) { + if (!enough_retries) + goto retry; + /* fall back to buffered I/O */ + ret = 0; + } } out_unlock: if (gfs2_holder_queued(gh)) @@ -940,7 +957,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate - * that the inode glock may be dropped, fault in the pages manually, + * that the inode glock should be dropped, fault in the pages manually, * and retry. */ @@ -1007,7 +1024,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, /* * In this function, we disable page faults when we're holding the * inode glock while doing I/O. If a page fault occurs, we indicate - * that the inode glock may be dropped, fault in the pages manually, + * that the inode glock should be dropped, fault in the pages manually, * and retry. */ @@ -1018,8 +1035,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, } gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); -retry: if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { +retry: window_size -= fault_in_iov_iter_readable(from, window_size); if (!window_size) { ret = -EFAULT; @@ -1040,15 +1057,12 @@ retry: goto out_unlock; } - current->backing_dev_info = inode_to_bdi(inode); pagefault_disable(); - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops, + &gfs2_iomap_write_ops, NULL); pagefault_enable(); - current->backing_dev_info = NULL; - if (ret > 0) { - iocb->ki_pos += ret; + if (ret > 0) written += ret; - } if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); @@ -1109,14 +1123,16 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out_unlock; - ret = file_update_time(file); - if (ret) - goto out_unlock; - if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; ssize_t buffered, ret2; + /* + * Note that under direct I/O, we don't allow and inode + * timestamp updates, so we're not calling file_update_time() + * here. + */ + ret = gfs2_file_direct_write(iocb, from, &gh); if (ret < 0 || !iov_iter_count(from)) goto out_unlock; @@ -1143,6 +1159,10 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!ret || ret2 > 0) ret += ret2; } else { + ret = file_update_time(file); + if (ret) + goto out_unlock; + ret = gfs2_file_buffered_write(iocb, from, &gh); if (likely(ret > 0)) ret = generic_write_sync(iocb, ret); @@ -1234,7 +1254,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes, max_blks; int error; @@ -1422,25 +1442,29 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int ret; - if (!(fl->fl_flags & FL_POSIX)) + if (!(fl->c.flc_flags & FL_POSIX)) return -ENOLCK; - if (cmd == F_CANCELLK) { - /* Hack: */ - cmd = F_SETLK; - fl->fl_type = F_UNLCK; - } - if (unlikely(gfs2_withdrawn(sdp))) { - if (fl->fl_type == F_UNLCK) + if (gfs2_withdrawn(sdp)) { + if (lock_is_unlock(fl)) locks_lock_file_wait(file, fl); return -EIO; } - if (IS_GETLK(cmd)) - return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); - else if (fl->fl_type == F_UNLCK) - return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); - else - return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); + down_read(&ls->ls_sem); + ret = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + if (cmd == F_CANCELLK) + ret = dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl); + else if (IS_GETLK(cmd)) + ret = dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); + else if (lock_is_unlock(fl)) + ret = dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); + else + ret = dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); + } + up_read(&ls->ls_sem); + return ret; } static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) @@ -1469,7 +1493,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int error = 0; int sleeptime; - state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; + state = lock_is_write(fl) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; flags = GL_EXACT | GL_NOPID; if (!IS_SETLKW(cmd)) flags |= LM_FLAG_TRY_1CB; @@ -1481,8 +1505,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) if (fl_gh->gh_state == state) goto out; locks_init_lock(&request); - request.fl_type = F_UNLCK; - request.fl_flags = FL_FLOCK; + request.c.flc_type = F_UNLCK; + request.c.flc_flags = FL_FLOCK; locks_lock_file_wait(file, &request); gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); @@ -1543,10 +1567,10 @@ static void do_unflock(struct file *file, struct file_lock *fl) static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) { - if (!(fl->fl_flags & FL_FLOCK)) + if (!(fl->c.flc_flags & FL_FLOCK)) return -ENOLCK; - if (fl->fl_type == F_UNLCK) { + if (lock_is_unlock(fl)) { do_unflock(file, fl); return 0; } else { @@ -1567,10 +1591,11 @@ const struct file_operations gfs2_file_fops = { .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, - .splice_read = generic_file_splice_read, + .splice_read = copy_splice_read, .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, + .fop_flags = FOP_ASYNC_LOCK, }; const struct file_operations gfs2_dir_fops = { @@ -1583,6 +1608,7 @@ const struct file_operations gfs2_dir_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .llseek = default_llseek, + .fop_flags = FOP_ASYNC_LOCK, }; #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ @@ -1598,7 +1624,7 @@ const struct file_operations gfs2_file_fops_nolock = { .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, - .splice_read = generic_file_splice_read, + .splice_read = copy_splice_read, .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 524f3c96b9a4..92e029104d8a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -34,8 +34,8 @@ #include <linux/lockref.h> #include <linux/rhashtable.h> #include <linux/pid_namespace.h> -#include <linux/fdtable.h> #include <linux/file.h> +#include <linux/random.h> #include "gfs2.h" #include "incore.h" @@ -61,13 +61,10 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); -static void __gfs2_glock_dq(struct gfs2_holder *gh); -static void handle_callback(struct gfs2_glock *gl, unsigned int state, - unsigned long delay, bool remote); +static void request_demote(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote); static struct dentry *gfs2_root; -static struct workqueue_struct *glock_workqueue; -struct workqueue_struct *gfs2_delete_workqueue; static LIST_HEAD(lru_list); static atomic_t lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(lru_lock); @@ -140,44 +137,43 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) kmem_cache_free(gfs2_glock_cachep, gl); } -/** - * glock_blocked_by_withdraw - determine if we can still use a glock - * @gl: the glock - * - * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted - * when we're withdrawn. For example, to maintain metadata integrity, we should - * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like - * iopen or the transaction glocks may be safely used because none of their - * metadata goes through the journal. So in general, we should disallow all - * glocks that are journaled, and allow all the others. One exception is: - * we need to allow our active journal to be promoted and demoted so others - * may recover it and we can reacquire it when they're done. - */ -static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) +static void __gfs2_glock_free(struct gfs2_glock *gl) { + rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); + smp_mb(); + wake_up_glock(gl); + call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); +} + +void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (likely(!gfs2_withdrawn(sdp))) - return false; - if (gl->gl_ops->go_flags & GLOF_NONDISK) - return false; - if (!sdp->sd_jdesc || - gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) - return false; - return true; + __gfs2_glock_free(gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_kill_wait); } -void gfs2_glock_free(struct gfs2_glock *gl) -{ +void gfs2_glock_free_later(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); - rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); - smp_mb(); - wake_up_glock(gl); - call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); + spin_lock(&lru_lock); + list_add(&gl->gl_lru, &sdp->sd_dead_glocks); + spin_unlock(&lru_lock); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + wake_up(&sdp->sd_kill_wait); +} + +static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) +{ + struct list_head *list = &sdp->sd_dead_glocks; + + while(!list_empty(list)) { + struct gfs2_glock *gl; + + gl = list_first_entry(list, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + __gfs2_glock_free(gl); + } } /** @@ -193,34 +189,9 @@ struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) return gl; } -/** - * demote_ok - Check to see if it's ok to unlock a glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int demote_ok(const struct gfs2_glock *gl) +static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { - const struct gfs2_glock_operations *glops = gl->gl_ops; - - if (gl->gl_state == LM_ST_UNLOCKED) - return 0; - if (!list_empty(&gl->gl_holders)) - return 0; - if (glops->go_demote_ok) - return glops->go_demote_ok(gl); - return 1; -} - - -void gfs2_glock_add_to_lru(struct gfs2_glock *gl) -{ - if (!(gl->gl_ops->go_flags & GLOF_LRU)) - return; - spin_lock(&lru_lock); - list_move_tail(&gl->gl_lru, &lru_list); if (!test_bit(GLF_LRU, &gl->gl_flags)) { @@ -233,9 +204,6 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { - if (!(gl->gl_ops->go_flags & GLOF_LRU)) - return; - spin_lock(&lru_lock); if (test_bit(GLF_LRU, &gl->gl_flags)) { list_del_init(&gl->gl_lru); @@ -249,8 +217,10 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) * Enqueue the glock on the work queue. Passes one glock reference on to the * work queue. */ -static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { - if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) { +static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + if (!queue_delayed_work(sdp->sd_glock_wq, &gl->gl_work, delay)) { /* * We are holding the lockref spinlock, and the work was still * queued above. The queued work (glock_work_func) takes that @@ -262,21 +232,14 @@ static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) } } -static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { - spin_lock(&gl->gl_lockref.lock); - __gfs2_glock_queue_work(gl, delay); - spin_unlock(&gl->gl_lockref.lock); -} - static void __gfs2_glock_put(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); lockref_mark_dead(&gl->gl_lockref); - - gfs2_glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); + gfs2_glock_remove_from_lru(gl); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); if (mapping) { truncate_inode_pages_final(mapping); @@ -287,12 +250,18 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } -/* - * Cause the glock to be put in work queue context. - */ -void gfs2_glock_queue_put(struct gfs2_glock *gl) +static bool __gfs2_glock_put_or_lock(struct gfs2_glock *gl) { - gfs2_glock_queue_work(gl, 0); + if (lockref_put_or_lock(&gl->gl_lockref)) + return true; + GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); + if (gl->gl_state != LM_ST_UNLOCKED) { + gl->gl_lockref.count--; + gfs2_glock_add_to_lru(gl); + spin_unlock(&gl->gl_lockref.lock); + return true; + } + return false; } /** @@ -303,12 +272,28 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - if (lockref_put_or_lock(&gl->gl_lockref)) + if (__gfs2_glock_put_or_lock(gl)) return; __gfs2_glock_put(gl); } +/* + * gfs2_glock_put_async - Decrement reference count without sleeping + * @gl: The glock to put + * + * Decrement the reference count on glock immediately unless it is the last + * reference. Defer putting the last reference to work queue context. + */ +void gfs2_glock_put_async(struct gfs2_glock *gl) +{ + if (__gfs2_glock_put_or_lock(gl)) + return; + + gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); +} + /** * may_grant - check if it's ok to grant a new lock * @gl: The glock @@ -469,14 +454,18 @@ done: /** * do_promote - promote as many requests as possible on the current queue * @gl: The glock - * - * Returns: 1 if there is a blocked holder at the head of the list */ -static int do_promote(struct gfs2_glock *gl) +static void do_promote(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_holder *gh, *current_gh; + if (gfs2_withdrawn(sdp)) { + do_error(gl, LM_OUT_ERROR); + return; + } + current_gh = find_first_holder(gl); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) @@ -484,13 +473,10 @@ static int do_promote(struct gfs2_glock *gl) if (!may_grant(gl, current_gh, gh)) { /* * If we get here, it means we may not grant this - * holder for some reason. If this holder is at the - * head of the list, it means we have a blocked holder - * at the head, so return 1. + * holder for some reason. */ - if (list_is_first(&gh->gh_list, &gl->gl_holders)) - return 1; - do_error(gl, 0); + if (current_gh) + do_error(gl, 0); /* Fail queued try locks */ break; } set_bit(HIF_HOLDER, &gh->gh_iflags); @@ -499,7 +485,6 @@ static int do_promote(struct gfs2_glock *gl) if (!current_gh) current_gh = gh; } - return 0; } /** @@ -519,6 +504,23 @@ static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) } /** + * find_last_waiter - find the last gh that's waiting for the glock + * @gl: the glock + * + * This also is a fast way of finding out if there are any waiters. + */ + +static inline struct gfs2_holder *find_last_waiter(const struct gfs2_glock *gl) +{ + struct gfs2_holder *gh; + + if (list_empty(&gl->gl_holders)) + return NULL; + gh = list_last_entry(&gl->gl_holders, struct gfs2_holder, gh_list); + return test_bit(HIF_HOLDER, &gh->gh_iflags) ? NULL : gh; +} + +/** * state_change - record that the glock is now in a different state * @gl: the glock * @new_state: the new state @@ -526,18 +528,6 @@ static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) static void state_change(struct gfs2_glock *gl, unsigned int new_state) { - int held1, held2; - - held1 = (gl->gl_state != LM_ST_UNLOCKED); - held2 = (new_state != LM_ST_UNLOCKED); - - if (held1 != held2) { - GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); - if (held2) - gl->gl_lockref.count++; - else - gl->gl_lockref.count--; - } if (new_state != gl->gl_target) /* shorten our minimum hold time */ gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, @@ -546,11 +536,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) gl->gl_tchange = jiffies; } -static void gfs2_set_demote(struct gfs2_glock *gl) +static void gfs2_set_demote(int nr, struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - set_bit(GLF_DEMOTE, &gl->gl_flags); + set_bit(nr, &gl->gl_flags); smp_mb(); wake_up(&sdp->sd_async_glock_wait); } @@ -573,31 +563,31 @@ static void gfs2_demote_wake(struct gfs2_glock *gl) static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) { const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh; - unsigned state = ret & LM_OUT_ST_MASK; - spin_lock(&gl->gl_lockref.lock); - trace_gfs2_glock_state_change(gl, state); - state_change(gl, state); - gh = find_first_waiter(gl); + if (!(ret & ~LM_OUT_ST_MASK)) { + unsigned state = ret & LM_OUT_ST_MASK; + + trace_gfs2_glock_state_change(gl, state); + state_change(gl, state); + } /* Demote to UN request arrived during demote to SH or DF */ if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && - state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) + gl->gl_state != LM_ST_UNLOCKED && + gl->gl_demote_state == LM_ST_UNLOCKED) gl->gl_target = LM_ST_UNLOCKED; /* Check for state != intended state */ - if (unlikely(state != gl->gl_target)) { - if (gh && (ret & LM_OUT_CANCELED)) - gfs2_holder_wake(gh); + if (unlikely(gl->gl_state != gl->gl_target)) { + struct gfs2_holder *gh = find_first_waiter(gl); + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { - /* move to back of queue and try next entry */ if (ret & LM_OUT_CANCELED) { - if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh = find_first_waiter(gl); - gl->gl_target = gh->gh_state; - goto retry; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + gl->gl_target = gl->gl_state; + goto out; } /* Some error or failed "try lock" - report it */ if ((ret & LM_OUT_ERROR) || @@ -607,10 +597,9 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) goto out; } } - switch(state) { + switch(gl->gl_state) { /* Unlocked due to conversion deadlock, try again */ case LM_ST_UNLOCKED: -retry: do_xmote(gl, gh, gl->gl_target); break; /* Conversion fails, unlock and try again */ @@ -619,18 +608,21 @@ retry: do_xmote(gl, gh, LM_ST_UNLOCKED); break; default: /* Everything else */ - fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", - gl->gl_target, state); + fs_err(gl->gl_name.ln_sbd, + "glock %u:%llu requested=%u ret=%u\n", + gl->gl_name.ln_type, gl->gl_name.ln_number, + gl->gl_req, ret); GLOCK_BUG_ON(gl, 1); } - spin_unlock(&gl->gl_lockref.lock); return; } /* Fast path - we got what we asked for */ - if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { + clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_demote_wake(gl); - if (state != LM_ST_UNLOCKED) { + } + if (gl->gl_state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { int rv; @@ -645,18 +637,8 @@ retry: do_promote(gl); } out: - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_lockref.lock); -} - -static bool is_system_glock(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - - if (gl == m_ip->i_gl) - return true; - return false; + if (!test_bit(GLF_CANCELING, &gl->gl_flags)) + clear_bit(GLF_LOCK, &gl->gl_flags); } /** @@ -674,136 +656,86 @@ __acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; int ret; - if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && - gh && !(gh->gh_flags & LM_FLAG_NOEXP)) - goto skip_inval; + /* + * When a filesystem is withdrawing, the remaining cluster nodes will + * take care of recovering the withdrawing node's journal. We only + * need to make sure that once we trigger remote recovery, we won't + * write to the shared block device anymore. This means that here, + * + * - no new writes to the filesystem must be triggered (->go_sync()). + * + * - any cached data should be discarded by calling ->go_inval(), dirty + * or not and journaled or unjournaled. + * + * - no more dlm locking operations should be issued (->lm_lock()). + */ - lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | - LM_FLAG_PRIORITY); GLOCK_BUG_ON(gl, gl->gl_state == target); GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); - if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && - glops->go_inval) { - /* - * If another process is already doing the invalidate, let that - * finish first. The glock state machine will get back to this - * holder again later. - */ - if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS, - &gl->gl_flags)) - return; - do_error(gl, 0); /* Fail queued try locks */ - } - gl->gl_req = target; - set_bit(GLF_BLOCKING, &gl->gl_flags); - if ((gl->gl_req == LM_ST_UNLOCKED) || - (gl->gl_state == LM_ST_EXCLUSIVE) || - (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) - clear_bit(GLF_BLOCKING, &gl->gl_flags); + + if (!glops->go_inval || !glops->go_sync) + goto skip_inval; + spin_unlock(&gl->gl_lockref.lock); - if (glops->go_sync) { + if (!gfs2_withdrawn(sdp)) { ret = glops->go_sync(gl); - /* If we had a problem syncing (due to io errors or whatever, - * we should not invalidate the metadata or tell dlm to - * release the glock to other nodes. - */ if (ret) { if (cmpxchg(&sdp->sd_log_error, 0, ret)) { - fs_err(sdp, "Error %d syncing glock \n", ret); + fs_err(sdp, "Error %d syncing glock\n", ret); gfs2_dump_glock(NULL, gl, true); + gfs2_withdraw(sdp); } - goto skip_inval; } } - if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { - /* - * The call to go_sync should have cleared out the ail list. - * If there are still items, we have a problem. We ought to - * withdraw, but we can't because the withdraw code also uses - * glocks. Warn about the error, dump the glock, then fall - * through and wait for logd to do the withdraw for us. - */ - if ((atomic_read(&gl->gl_ail_count) != 0) && - (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { - gfs2_glock_assert_warn(gl, - !atomic_read(&gl->gl_ail_count)); - gfs2_dump_glock(NULL, gl, true); - } + + if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); - clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); - } + spin_lock(&gl->gl_lockref.lock); skip_inval: - gfs2_glock_hold(gl); - /* - * Check for an error encountered since we called go_sync and go_inval. - * If so, we can't withdraw from the glock code because the withdraw - * code itself uses glocks (see function signal_our_withdraw) to - * change the mount to read-only. Most importantly, we must not call - * dlm to unlock the glock until the journal is in a known good state - * (after journal replay) otherwise other nodes may use the object - * (rgrp or dinode) and then later, journal replay will corrupt the - * file system. The best we can do here is wait for the logd daemon - * to see sd_log_error and withdraw, and in the meantime, requeue the - * work for later. - * - * We make a special exception for some system glocks, such as the - * system statfs inode glock, which needs to be granted before the - * gfs2_quotad daemon can exit, and that exit needs to finish before - * we can unmount the withdrawn file system. - * - * However, if we're just unlocking the lock (say, for unmount, when - * gfs2_gl_hash_clear calls clear_glock) and recovery is complete - * then it's okay to tell dlm to unlock it. - */ - if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) - gfs2_withdraw_delayed(sdp); - if (glock_blocked_by_withdraw(gl) && - (target != LM_ST_UNLOCKED || - test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { - if (!is_system_glock(gl)) { - handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */ - /* - * Ordinarily, we would call dlm and its callback would call - * finish_xmote, which would call state_change() to the new state. - * Since we withdrew, we won't call dlm, so call state_change - * manually, but to the UNLOCKED state we desire. - */ - state_change(gl, LM_ST_UNLOCKED); + if (gfs2_withdrawn(sdp)) { + if (target != LM_ST_UNLOCKED) + target = LM_OUT_ERROR; + goto out; + } + + if (ls->ls_ops->lm_lock) { + set_bit(GLF_PENDING_REPLY, &gl->gl_flags); + spin_unlock(&gl->gl_lockref.lock); + ret = ls->ls_ops->lm_lock(gl, target, gh ? gh->gh_flags : 0); + spin_lock(&gl->gl_lockref.lock); + + if (!ret) { + /* The operation will be completed asynchronously. */ + gl->gl_lockref.count++; + return; + } + clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); + + if (ret == -ENODEV) { /* - * We skip telling dlm to do the locking, so we won't get a - * reply that would otherwise clear GLF_LOCK. So we clear it here. + * The lockspace has been released and the lock has + * been unlocked implicitly. */ - clear_bit(GLF_LOCK, &gl->gl_flags); - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); - goto out; + if (target != LM_ST_UNLOCKED) { + target = LM_OUT_ERROR; + goto out; + } } else { - clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); - } - } - - if (sdp->sd_lockstruct.ls_ops->lm_lock) { - /* lock_dlm */ - ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); - if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && - target == LM_ST_UNLOCKED && - test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { - finish_xmote(gl, target); - gfs2_glock_queue_work(gl, 0); - } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); + return; } - } else { /* lock_nolock */ - finish_xmote(gl, target); - gfs2_glock_queue_work(gl, 0); } + out: - spin_lock(&gl->gl_lockref.lock); + /* Complete the operation now. */ + finish_xmote(gl, target); + gl->gl_lockref.count++; + gfs2_glock_queue_work(gl, 0); } /** @@ -817,15 +749,26 @@ static void run_queue(struct gfs2_glock *gl, const int nonblock) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { - struct gfs2_holder *gh = NULL; + struct gfs2_holder *gh; - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + if (test_bit(GLF_LOCK, &gl->gl_flags)) return; + set_bit(GLF_LOCK, &gl->gl_flags); + /* + * The GLF_DEMOTE_IN_PROGRESS flag is only set intermittently during + * locking operations. We have just started a locking operation by + * setting the GLF_LOCK flag, so the GLF_DEMOTE_IN_PROGRESS flag must + * be cleared. + */ GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); - if (test_bit(GLF_DEMOTE, &gl->gl_flags) && - gl->gl_demote_state != gl->gl_state) { + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + if (gl->gl_demote_state == gl->gl_state) { + gfs2_demote_wake(gl); + goto promote; + } + if (find_first_holder(gl)) goto out_unlock; if (nonblock) @@ -833,30 +776,33 @@ __acquires(&gl->gl_lockref.lock) set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; - } else { - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) - gfs2_demote_wake(gl); - if (do_promote(gl) == 0) - goto out_unlock; - gh = find_first_waiter(gl); - gl->gl_target = gh->gh_state; - if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) - do_error(gl, 0); /* Fail queued try locks */ + do_xmote(gl, NULL, gl->gl_target); + return; } + +promote: + do_promote(gl); + if (find_first_holder(gl)) + goto out_unlock; + gh = find_first_waiter(gl); + if (!gh) + goto out_unlock; + if (nonblock) + goto out_sched; + gl->gl_target = gh->gh_state; + if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + do_error(gl, 0); /* Fail queued try locks */ do_xmote(gl, gh, gl->gl_target); return; out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_atomic(); gl->gl_lockref.count++; - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); return; out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_atomic(); - return; } /** @@ -872,17 +818,14 @@ void glock_set_object(struct gfs2_glock *gl, void *object) prev_object = gl->gl_object; gl->gl_object = object; spin_unlock(&gl->gl_lockref.lock); - if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) { - pr_warn("glock=%u/%llx\n", - gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) gfs2_dump_glock(NULL, gl, true); - } } /** * glock_clear_object - clear the gl_object field of a glock * @gl: the glock + * @object: object the glock currently points at */ void glock_clear_object(struct gfs2_glock *gl, void *object) { @@ -892,13 +835,8 @@ void glock_clear_object(struct gfs2_glock *gl, void *object) prev_object = gl->gl_object; gl->gl_object = NULL; spin_unlock(&gl->gl_lockref.lock); - if (gfs2_assert_warn(gl->gl_name.ln_sbd, - prev_object == object || prev_object == NULL)) { - pr_warn("glock=%u/%llx\n", - gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object)) gfs2_dump_glock(NULL, gl, true); - } } void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) @@ -933,48 +871,76 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) gfs2_holder_uninit(&gh); } -static bool gfs2_try_evict(struct gfs2_glock *gl) +static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip; + + spin_lock(&gl->gl_lockref.lock); + ip = gl->gl_object; + if (ip && !igrab(&ip->i_inode)) + ip = NULL; + spin_unlock(&gl->gl_lockref.lock); + if (ip) { + wait_on_new_inode(&ip->i_inode); + if (is_bad_inode(&ip->i_inode)) { + iput(&ip->i_inode); + ip = NULL; + } + } + return ip; +} + +static void gfs2_try_to_evict(struct gfs2_glock *gl) { struct gfs2_inode *ip; - bool evicted = false; /* * If there is contention on the iopen glock and we have an inode, try - * to grab and release the inode so that it can be evicted. This will - * allow the remote node to go ahead and delete the inode without us - * having to do it, which will avoid rgrp glock thrashing. + * to grab and release the inode so that it can be evicted. The + * GLF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode + * should not be deleted locally. This will allow the remote node to + * go ahead and delete the inode without us having to do it, which will + * avoid rgrp glock thrashing. * * The remote node is likely still holding the corresponding inode * glock, so it will run before we get to verify that the delete has - * happened below. + * happened below. (Verification is triggered by the call to + * gfs2_queue_verify_delete() in gfs2_evict_inode().) */ - spin_lock(&gl->gl_lockref.lock); - ip = gl->gl_object; - if (ip && !igrab(&ip->i_inode)) - ip = NULL; - spin_unlock(&gl->gl_lockref.lock); + ip = gfs2_grab_existing_inode(gl); if (ip) { - gl->gl_no_formal_ino = ip->i_no_formal_ino; - set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + set_bit(GLF_DEFER_DELETE, &gl->gl_flags); d_prune_aliases(&ip->i_inode); iput(&ip->i_inode); + clear_bit(GLF_DEFER_DELETE, &gl->gl_flags); /* If the inode was evicted, gl->gl_object will now be NULL. */ - spin_lock(&gl->gl_lockref.lock); - ip = gl->gl_object; - if (ip) { - clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); - if (!igrab(&ip->i_inode)) - ip = NULL; - } - spin_unlock(&gl->gl_lockref.lock); + ip = gfs2_grab_existing_inode(gl); if (ip) { gfs2_glock_poke(ip->i_gl); iput(&ip->i_inode); } - evicted = !ip; } - return evicted; +} + +bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) + return false; + return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); +} + +bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + unsigned long delay; + + if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) + return false; + delay = later ? HZ + get_random_long() % (HZ * 9) : 0; + return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); } static void delete_work_func(struct work_struct *work) @@ -982,49 +948,34 @@ static void delete_work_func(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct inode *inode; - u64 no_addr = gl->gl_name.ln_number; + bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); - spin_lock(&gl->gl_lockref.lock); - clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); - spin_unlock(&gl->gl_lockref.lock); + /* + * Check for the GLF_VERIFY_DELETE above: this ensures that we won't + * immediately process GLF_VERIFY_DELETE work that the below call to + * gfs2_try_to_evict() queues. + */ - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - /* - * If we can evict the inode, give the remote node trying to - * delete the inode some time before verifying that the delete - * has happened. Otherwise, if we cause contention on the inode glock - * immediately, the remote node will think that we still have - * the inode in use, and so it will give up waiting. - * - * If we can't evict the inode, signal to the remote node that - * the inode is still in use. We'll later try to delete the - * inode locally in gfs2_evict_inode. - * - * FIXME: We only need to verify that the remote node has - * deleted the inode because nodes before this remote delete - * rework won't cooperate. At a later time, when we no longer - * care about compatibility with such nodes, we can skip this - * step entirely. - */ - if (gfs2_try_evict(gl)) { - if (gfs2_queue_delete_work(gl, 5 * HZ)) + if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) + gfs2_try_to_evict(gl); + + if (verify_delete) { + u64 no_addr = gl->gl_name.ln_number; + struct inode *inode; + + inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, + GFS2_BLKST_UNLINKED); + if (IS_ERR(inode)) { + if (PTR_ERR(inode) == -EAGAIN && + !test_bit(SDF_KILL, &sdp->sd_flags) && + gfs2_queue_verify_delete(gl, true)) return; + } else { + d_prune_aliases(inode); + iput(inode); } - goto out; } - inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, - GFS2_BLKST_UNLINKED); - if (IS_ERR(inode)) { - if (PTR_ERR(inode) == -EAGAIN && - (gfs2_queue_delete_work(gl, 5 * HZ))) - return; - } else { - d_prune_aliases(inode); - iput(inode); - } -out: gfs2_glock_put(gl); } @@ -1034,43 +985,44 @@ static void glock_work_func(struct work_struct *work) struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); unsigned int drop_refs = 1; - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { + spin_lock(&gl->gl_lockref.lock); + if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) { + clear_bit(GLF_HAVE_REPLY, &gl->gl_flags); finish_xmote(gl, gl->gl_reply); drop_refs++; } - spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { - unsigned long holdtime, now = jiffies; + if (gl->gl_name.ln_type == LM_TYPE_INODE) { + unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_hold_time; - if (time_before(now, holdtime)) - delay = holdtime - now; + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; + } if (!delay) { clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - gfs2_set_demote(gl); + gfs2_set_demote(GLF_DEMOTE, gl); } } run_queue(gl, 0); if (delay) { /* Keep one glock reference for the work we requeue. */ drop_refs--; - if (gl->gl_name.ln_type != LM_TYPE_INODE) - delay = 0; - __gfs2_glock_queue_work(gl, delay); + gfs2_glock_queue_work(gl, delay); } - /* - * Drop the remaining glock references manually here. (Mind that - * __gfs2_glock_queue_work depends on the lockref spinlock begin held - * here as well.) - */ + /* Drop the remaining glock references manually. */ + GLOCK_BUG_ON(gl, gl->gl_lockref.count < drop_refs); gl->gl_lockref.count -= drop_refs; if (!gl->gl_lockref.count) { - __gfs2_glock_put(gl); - return; + if (gl->gl_state == LM_ST_UNLOCKED) { + __gfs2_glock_put(gl); + return; + } + gfs2_glock_add_to_lru(gl); } spin_unlock(&gl->gl_lockref.lock); } @@ -1106,6 +1058,8 @@ again: out: rcu_read_unlock(); finish_wait(wq, &wait.wait); + if (gl) + gfs2_glock_remove_from_lru(gl); return gl; } @@ -1126,19 +1080,15 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp) { - struct super_block *s = sdp->sd_vfs; struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; struct gfs2_glock *gl, *tmp; struct address_space *mapping; - int ret = 0; gl = find_insert_glock(&name, NULL); - if (gl) { - *glp = gl; - return 0; - } + if (gl) + goto found; if (!create) return -ENOENT; @@ -1166,10 +1116,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_inc(&sdp->sd_glock_disposal); gl->gl_node.next = NULL; - gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; + gl->gl_flags = BIT(GLF_INITIAL); + if (glops->go_instantiate) + gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); gl->gl_name = name; + lockref_init(&gl->gl_lockref); lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); - gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; @@ -1189,32 +1141,31 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping = gfs2_glock2aspace(gl); if (mapping) { + gfp_t gfp_mask; + mapping->a_ops = &gfs2_meta_aops; - mapping->host = s->s_bdev->bd_inode; + mapping->host = sdp->sd_inode; mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->private_data = NULL; + gfp_mask = mapping_gfp_mask(sdp->sd_inode->i_mapping); + mapping_set_gfp_mask(mapping, gfp_mask); + mapping->i_private_data = NULL; mapping->writeback_index = 0; } tmp = find_insert_glock(&name, gl); - if (!tmp) { - *glp = gl; - goto out; - } - if (IS_ERR(tmp)) { - ret = PTR_ERR(tmp); - goto out_free; - } - *glp = tmp; + if (tmp) { + gfs2_glock_dealloc(&gl->gl_rcu); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_kill_wait); -out_free: - gfs2_glock_dealloc(&gl->gl_rcu); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + gl = tmp; + } -out: - return ret; +found: + *glp = gl; + return 0; } /** @@ -1223,7 +1174,7 @@ out: * @state: the state we're requesting * @flags: the modifier flags * @gh: the holder structure - * + * @ip: caller's return address for debugging */ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, @@ -1384,7 +1335,7 @@ out: } /** - * handle_callback - process a demote request + * request_demote - process a demote request * @gl: the glock * @state: the state the caller wants us to change to * @delay: zero to demote immediately; otherwise pending demote @@ -1394,13 +1345,10 @@ out: * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state, - unsigned long delay, bool remote) +static void request_demote(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote) { - if (delay) - set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - else - gfs2_set_demote(gl); + gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl); if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; @@ -1432,13 +1380,29 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) va_end(args); } +static bool gfs2_should_queue_trylock(struct gfs2_glock *gl, + struct gfs2_holder *gh) +{ + struct gfs2_holder *current_gh, *gh2; + + current_gh = find_first_holder(gl); + if (current_gh && !may_grant(gl, current_gh, gh)) + return false; + + list_for_each_entry(gh2, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) + continue; + if (!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + return false; + } + return true; +} + static inline bool pid_is_meaningful(const struct gfs2_holder *gh) { if (!(gh->gh_flags & GL_NOPID)) return true; - if (gh->gh_state == LM_ST_UNLOCKED) - return true; - return false; + return !test_bit(HIF_HOLDER, &gh->gh_iflags); } /** @@ -1452,28 +1416,20 @@ static inline bool pid_is_meaningful(const struct gfs2_holder *gh) */ static inline void add_to_queue(struct gfs2_holder *gh) -__releases(&gl->gl_lockref.lock) -__acquires(&gl->gl_lockref.lock) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; - int try_futile = 0; GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) GLOCK_BUG_ON(gl, true); - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - if (test_bit(GLF_LOCK, &gl->gl_flags)) { - struct gfs2_holder *current_gh; - - current_gh = find_first_holder(gl); - try_futile = !may_grant(gl, current_gh, gh); - } - if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) - goto fail; + if ((gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && + !gfs2_should_queue_trylock(gl, gh)) { + gh->gh_error = GLR_TRYFAILED; + gfs2_holder_wake(gh); + return; } list_for_each_entry(gh2, &gl->gl_holders, gh_list) { @@ -1485,37 +1441,10 @@ __acquires(&gl->gl_lockref.lock) continue; goto trap_recursive; } - list_for_each_entry(gh2, &gl->gl_holders, gh_list) { - if (try_futile && - !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { -fail: - gh->gh_error = GLR_TRYFAILED; - gfs2_holder_wake(gh); - return; - } - if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) - continue; - if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) - insert_pt = &gh2->gh_list; - } trace_gfs2_glock_queue(gh, 1); gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); - if (likely(insert_pt == NULL)) { - list_add_tail(&gh->gh_list, &gl->gl_holders); - if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) - goto do_cancel; - return; - } - list_add_tail(&gh->gh_list, insert_pt); -do_cancel: - gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); - if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { - spin_unlock(&gl->gl_lockref.lock); - if (sdp->sd_lockstruct.ls_ops->lm_cancel) - sdp->sd_lockstruct.ls_ops->lm_cancel(gl); - spin_lock(&gl->gl_lockref.lock); - } + list_add_tail(&gh->gh_list, &gl->gl_holders); return; trap_recursive: @@ -1543,26 +1472,44 @@ trap_recursive: int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - int error = 0; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + int error; - if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) + if (gfs2_withdrawn(sdp)) return -EIO; - if (test_bit(GLF_LRU, &gl->gl_flags)) - gfs2_glock_remove_from_lru(gl); + if (gh->gh_flags & GL_NOBLOCK) { + struct gfs2_holder *current_gh; + + error = -ECHILD; + spin_lock(&gl->gl_lockref.lock); + if (find_last_waiter(gl)) + goto unlock; + current_gh = find_first_holder(gl); + if (!may_grant(gl, current_gh, gh)) + goto unlock; + set_bit(HIF_HOLDER, &gh->gh_iflags); + list_add_tail(&gh->gh_list, &gl->gl_holders); + trace_gfs2_promote(gh); + error = 0; +unlock: + spin_unlock(&gl->gl_lockref.lock); + return error; + } gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); - if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && - test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + if (unlikely((LM_FLAG_RECOVER & gh->gh_flags) && + test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { + set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gl->gl_lockref.count++; - __gfs2_glock_queue_work(gl, 0); + gfs2_glock_queue_work(gl, 0); } run_queue(gl, 1); spin_unlock(&gl->gl_lockref.lock); + error = 0; if (!(gh->gh_flags & GL_ASYNC)) error = gfs2_glock_wait(gh); @@ -1581,12 +1528,6 @@ int gfs2_glock_poll(struct gfs2_holder *gh) return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } -static inline bool needs_demote(struct gfs2_glock *gl) -{ - return (test_bit(GLF_DEMOTE, &gl->gl_flags) || - test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); -} - static void __gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; @@ -1595,11 +1536,11 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) /* * This holder should not be cached, so mark it for demote. - * Note: this should be done before the check for needs_demote - * below. + * Note: this should be done before the glock_needs_demote + * check below. */ if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); + request_demote(gl, LM_ST_UNLOCKED, 0, false); list_del_init(&gh->gh_list); clear_bit(HIF_HOLDER, &gh->gh_iflags); @@ -1609,21 +1550,18 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) * If there hasn't been a demote request we are done. * (Let the remaining holders, if any, keep holding it.) */ - if (!needs_demote(gl)) { + if (!glock_needs_demote(gl)) { if (list_empty(&gl->gl_holders)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) - gfs2_glock_add_to_lru(gl); - if (unlikely(!fast_path)) { gl->gl_lockref.count++; if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && !test_bit(GLF_DEMOTE, &gl->gl_flags) && gl->gl_name.ln_type == LM_TYPE_INODE) delay = gl->gl_hold_time; - __gfs2_glock_queue_work(gl, delay); + gfs2_glock_queue_work(gl, delay); } } @@ -1635,7 +1573,6 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; spin_lock(&gl->gl_lockref.lock); if (!gfs2_holder_queued(gh)) { @@ -1647,29 +1584,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh) } if (list_is_first(&gh->gh_list, &gl->gl_holders) && - !test_bit(HIF_HOLDER, &gh->gh_iflags)) { + !test_bit(HIF_HOLDER, &gh->gh_iflags) && + test_bit(GLF_LOCK, &gl->gl_flags) && + !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && + !test_bit(GLF_CANCELING, &gl->gl_flags)) { + set_bit(GLF_CANCELING, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); spin_lock(&gl->gl_lockref.lock); - } - - /* - * If we're in the process of file system withdraw, we cannot just - * dequeue any glocks until our journal is recovered, lest we introduce - * file system corruption. We need two exceptions to this rule: We need - * to allow unlocking of nondisk glocks and the glock for our own - * journal that needs recovery. - */ - if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && - glock_blocked_by_withdraw(gl) && - gh->gh_gl != sdp->sd_jinode_gl) { - sdp->sd_glock_dqs_held++; - spin_unlock(&gl->gl_lockref.lock); - might_sleep(); - wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, - TASK_UNINTERRUPTIBLE); - spin_lock(&gl->gl_lockref.lock); + clear_bit(GLF_CANCELING, &gl->gl_flags); + clear_bit(GLF_LOCK, &gl->gl_flags); + if (!gfs2_holder_queued(gh)) + goto out; } __gfs2_glock_dq(gh); @@ -1833,21 +1760,23 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) { unsigned long delay = 0; - unsigned long holdtime; - unsigned long now = jiffies; gfs2_glock_hold(gl); spin_lock(&gl->gl_lockref.lock); - holdtime = gl->gl_tchange + gl->gl_hold_time; if (!list_empty(&gl->gl_holders) && gl->gl_name.ln_type == LM_TYPE_INODE) { + unsigned long now = jiffies; + unsigned long holdtime; + + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (time_before(now, holdtime)) delay = holdtime - now; - if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) delay = gl->gl_hold_time; } - handle_callback(gl, state, delay, true); - __gfs2_glock_queue_work(gl, delay); + request_demote(gl, state, delay, true); + gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); } @@ -1857,7 +1786,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) * * Glocks are not frozen if (a) the result of the dlm operation is * an error, (b) the locking operation was an unlock operation or - * (c) if there is a "noexp" flagged request anywhere in the queue + * (c) if there is a "recover" flagged request anywhere in the queue * * Returns: 1 if freezing should occur, 0 otherwise */ @@ -1874,7 +1803,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; - if (LM_FLAG_NOEXP & gh->gh_flags) + if (LM_FLAG_RECOVER & gh->gh_flags) return 0; } @@ -1895,19 +1824,20 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; spin_lock(&gl->gl_lockref.lock); + clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { - set_bit(GLF_FROZEN, &gl->gl_flags); + set_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); return; } } gl->gl_lockref.count++; - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - __gfs2_glock_queue_work(gl, 0); + set_bit(GLF_HAVE_REPLY, &gl->gl_flags); + gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -1927,6 +1857,16 @@ static int glock_cmp(void *priv, const struct list_head *a, return 0; } +static bool can_free_glock(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + return !test_bit(GLF_LOCK, &gl->gl_flags) && + !gl->gl_lockref.count && + (!test_bit(GLF_LFLUSH, &gl->gl_flags) || + test_bit(SDF_KILL, &sdp->sd_flags)); +} + /** * gfs2_dispose_glock_lru - Demote a list of glocks * @list: The list to dispose of @@ -1941,37 +1881,38 @@ static int glock_cmp(void *priv, const struct list_head *a, * private) */ -static void gfs2_dispose_glock_lru(struct list_head *list) +static unsigned long gfs2_dispose_glock_lru(struct list_head *list) __releases(&lru_lock) __acquires(&lru_lock) { struct gfs2_glock *gl; + unsigned long freed = 0; list_sort(NULL, list, glock_cmp); while(!list_empty(list)) { gl = list_first_entry(list, struct gfs2_glock, gl_lru); - list_del_init(&gl->gl_lru); - clear_bit(GLF_LRU, &gl->gl_flags); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: - list_add(&gl->gl_lru, &lru_list); - set_bit(GLF_LRU, &gl->gl_flags); - atomic_inc(&lru_count); + list_move(&gl->gl_lru, &lru_list); continue; } - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + if (!can_free_glock(gl)) { spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); + clear_bit(GLF_LRU, &gl->gl_flags); + freed++; gl->gl_lockref.count++; - if (demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); - __gfs2_glock_queue_work(gl, 0); + if (gl->gl_state != LM_ST_UNLOCKED) + request_demote(gl, LM_ST_UNLOCKED, 0, false); + gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } + return freed; } /** @@ -1983,30 +1924,21 @@ add_back_to_lru: * gfs2_dispose_glock_lru() above. */ -static long gfs2_scan_glock_lru(int nr) +static unsigned long gfs2_scan_glock_lru(unsigned long nr) { - struct gfs2_glock *gl; - LIST_HEAD(skipped); + struct gfs2_glock *gl, *next; LIST_HEAD(dispose); - long freed = 0; + unsigned long freed = 0; spin_lock(&lru_lock); - while ((nr-- >= 0) && !list_empty(&lru_list)) { - gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru); - - /* Test for being demotable */ - if (!test_bit(GLF_LOCK, &gl->gl_flags)) { + list_for_each_entry_safe(gl, next, &lru_list, gl_lru) { + if (!nr--) + break; + if (can_free_glock(gl)) list_move(&gl->gl_lru, &dispose); - atomic_dec(&lru_count); - freed++; - continue; - } - - list_move(&gl->gl_lru, &skipped); } - list_splice(&skipped, &lru_list); if (!list_empty(&dispose)) - gfs2_dispose_glock_lru(&dispose); + freed = gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); return freed; @@ -2026,11 +1958,7 @@ static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, return vfs_pressure_ratio(atomic_read(&lru_count)); } -static struct shrinker glock_shrinker = { - .seeks = DEFAULT_SEEKS, - .count_objects = gfs2_glock_shrink_count, - .scan_objects = gfs2_glock_shrink_scan, -}; +static struct shrinker *glock_shrinker; /** * glock_hash_walk - Call a function for glock in a hash bucket @@ -2063,37 +1991,21 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) rhashtable_walk_exit(&iter); } -bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay) -{ - bool queued; - - spin_lock(&gl->gl_lockref.lock); - queued = queue_delayed_work(gfs2_delete_workqueue, - &gl->gl_delete, delay); - if (queued) - set_bit(GLF_PENDING_DELETE, &gl->gl_flags); - spin_unlock(&gl->gl_lockref.lock); - return queued; -} - void gfs2_cancel_delete_work(struct gfs2_glock *gl) { - if (cancel_delayed_work(&gl->gl_delete)) { - clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); + clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); + clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); + if (cancel_delayed_work(&gl->gl_delete)) gfs2_glock_put(gl); - } -} - -bool gfs2_delete_work_queued(const struct gfs2_glock *gl) -{ - return test_bit(GLF_PENDING_DELETE, &gl->gl_flags); } static void flush_delete_work(struct gfs2_glock *gl) { if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + if (cancel_delayed_work(&gl->gl_delete)) { - queue_delayed_work(gfs2_delete_workqueue, + queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); } } @@ -2102,7 +2014,7 @@ static void flush_delete_work(struct gfs2_glock *gl) void gfs2_flush_delete_work(struct gfs2_sbd *sdp) { glock_hash_walk(flush_delete_work, sdp); - flush_workqueue(gfs2_delete_workqueue); + flush_workqueue(sdp->sd_delete_wq); } /** @@ -2113,12 +2025,16 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp) static void thaw_glock(struct gfs2_glock *gl) { - if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + if (!test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags)) return; if (!lockref_get_not_dead(&gl->gl_lockref)) return; - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + + gfs2_glock_remove_from_lru(gl); + spin_lock(&gl->gl_lockref.lock); + set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -2135,8 +2051,8 @@ static void clear_glock(struct gfs2_glock *gl) if (!__lockref_is_dead(&gl->gl_lockref)) { gl->gl_lockref.count++; if (gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - __gfs2_glock_queue_work(gl, 0); + request_demote(gl, LM_ST_UNLOCKED, 0, false); + gfs2_glock_queue_work(gl, 0); } spin_unlock(&gl->gl_lockref.lock); } @@ -2164,18 +2080,26 @@ static void dump_glock_func(struct gfs2_glock *gl) dump_glock(NULL, gl, true); } -static void withdraw_dq(struct gfs2_glock *gl) +static void withdraw_glock(struct gfs2_glock *gl) { spin_lock(&gl->gl_lockref.lock); - if (!__lockref_is_dead(&gl->gl_lockref) && - glock_blocked_by_withdraw(gl)) + if (!__lockref_is_dead(&gl->gl_lockref)) { + /* + * We don't want to write back any more dirty data. Unlock the + * remaining inode and resource group glocks; this will cause + * their ->go_inval() hooks to toss out all the remaining + * cached data, dirty or not. + */ + if (gl->gl_ops->go_inval && gl->gl_state != LM_ST_UNLOCKED) + request_demote(gl, LM_ST_UNLOCKED, 0, false); do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ + } spin_unlock(&gl->gl_lockref.lock); } -void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) +void gfs2_withdraw_glocks(struct gfs2_sbd *sdp) { - glock_hash_walk(withdraw_dq, sdp); + glock_hash_walk(withdraw_glock, sdp); } /** @@ -2187,14 +2111,31 @@ void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { + unsigned long start = jiffies; + bool timed_out = false; + set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); - flush_workqueue(glock_workqueue); + flush_workqueue(sdp->sd_glock_wq); glock_hash_walk(clear_glock, sdp); - flush_workqueue(glock_workqueue); - wait_event_timeout(sdp->sd_glock_wait, - atomic_read(&sdp->sd_glock_disposal) == 0, - HZ * 600); + flush_workqueue(sdp->sd_glock_wq); + + while (!timed_out) { + wait_event_timeout(sdp->sd_kill_wait, + !atomic_read(&sdp->sd_glock_disposal), + HZ * 60); + if (!atomic_read(&sdp->sd_glock_disposal)) + break; + timed_out = time_after(jiffies, start + (HZ * 600)); + fs_warn(sdp, "%u glocks left after %u seconds%s\n", + atomic_read(&sdp->sd_glock_disposal), + jiffies_to_msecs(jiffies - start) / 1000, + timed_out ? ":" : "; still waiting"); + } + gfs2_lm_unmount(sdp); + gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); + destroy_workqueue(sdp->sd_glock_wq); + sdp->sd_glock_wq = NULL; } static const char *state2str(unsigned state) @@ -2219,12 +2160,10 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 't'; if (flags & LM_FLAG_TRY_1CB) *p++ = 'T'; - if (flags & LM_FLAG_NOEXP) + if (flags & LM_FLAG_RECOVER) *p++ = 'e'; if (flags & LM_FLAG_ANY) *p++ = 'A'; - if (flags & LM_FLAG_PRIORITY) - *p++ = 'p'; if (flags & LM_FLAG_NODE_SCOPE) *p++ = 'n'; if (flags & GL_ASYNC) @@ -2292,13 +2231,13 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'y'; if (test_bit(GLF_LFLUSH, gflags)) *p++ = 'f'; - if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) - *p++ = 'i'; - if (test_bit(GLF_REPLY_PENDING, gflags)) + if (test_bit(GLF_PENDING_REPLY, gflags)) + *p++ = 'R'; + if (test_bit(GLF_HAVE_REPLY, gflags)) *p++ = 'r'; if (test_bit(GLF_INITIAL, gflags)) - *p++ = 'I'; - if (test_bit(GLF_FROZEN, gflags)) + *p++ = 'a'; + if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags)) *p++ = 'F'; if (!list_empty(&gl->gl_holders)) *p++ = 'q'; @@ -2308,14 +2247,18 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; - if (test_bit(GLF_PENDING_DELETE, gflags)) - *p++ = 'P'; - if (test_bit(GLF_FREEING, gflags)) - *p++ = 'x'; if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) *p++ = 'n'; if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) *p++ = 'N'; + if (test_bit(GLF_TRY_TO_EVICT, gflags)) + *p++ = 'e'; + if (test_bit(GLF_VERIFY_DELETE, gflags)) + *p++ = 'E'; + if (test_bit(GLF_DEFER_DELETE, gflags)) + *p++ = 's'; + if (test_bit(GLF_CANCELING, gflags)) + *p++ = 'C'; *p = 0; return buf; } @@ -2459,28 +2402,16 @@ int __init gfs2_glock_init(void) if (ret < 0) return ret; - glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZABLE, 0); - if (!glock_workqueue) { - rhashtable_destroy(&gl_hash_table); - return -ENOMEM; - } - gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZABLE, - 0); - if (!gfs2_delete_workqueue) { - destroy_workqueue(glock_workqueue); + glock_shrinker = shrinker_alloc(0, "gfs2-glock"); + if (!glock_shrinker) { rhashtable_destroy(&gl_hash_table); return -ENOMEM; } - ret = register_shrinker(&glock_shrinker, "gfs2-glock"); - if (ret) { - destroy_workqueue(gfs2_delete_workqueue); - destroy_workqueue(glock_workqueue); - rhashtable_destroy(&gl_hash_table); - return ret; - } + glock_shrinker->count_objects = gfs2_glock_shrink_count; + glock_shrinker->scan_objects = gfs2_glock_shrink_scan; + + shrinker_register(glock_shrinker); for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) init_waitqueue_head(glock_wait_table + i); @@ -2490,10 +2421,8 @@ int __init gfs2_glock_init(void) void gfs2_glock_exit(void) { - unregister_shrinker(&glock_shrinker); + shrinker_free(glock_shrinker); rhashtable_destroy(&gl_hash_table); - destroy_workqueue(glock_workqueue); - destroy_workqueue(gfs2_delete_workqueue); } static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) @@ -2503,8 +2432,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) if (gl) { if (n == 0) return; - if (!lockref_put_not_zero(&gl->gl_lockref)) - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); } for (;;) { gl = rhashtable_walk_next(&gi->hti); @@ -2726,22 +2654,18 @@ static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) i->file = NULL; } - rcu_read_lock(); for(;; i->fd++) { - struct inode *inode; - - i->file = task_lookup_next_fd_rcu(i->task, &i->fd); + i->file = fget_task_next(i->task, &i->fd); if (!i->file) { i->fd = 0; break; } - inode = file_inode(i->file); - if (inode->i_sb != i->sb) - continue; - if (get_file_rcu(i->file)) + + if (file_inode(i->file)->i_sb == i->sb) break; + + fput(i->file); } - rcu_read_unlock(); return i->file; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index f37ac087e2c1..55d5985f32a0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -58,23 +58,19 @@ enum { * LM_FLAG_TRY_1CB * Send one blocking callback if TRY is set and the lock is not granted. * - * LM_FLAG_NOEXP + * LM_FLAG_RECOVER * GFS sets this flag on lock requests it makes while doing journal recovery. - * These special requests should not be blocked due to the recovery like - * ordinary locks would be. + * While ordinary requests are blocked until the end of recovery, requests + * with this flag set do proceed. * * LM_FLAG_ANY * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may * also be granted in SHARED. The preferred state is whichever is compatible * with other granted locks, or the specified state if no other locks exist. * - * LM_FLAG_PRIORITY - * Override fairness considerations. Suppose a lock is held in a shared state - * and there is a pending request for the deferred state. A shared lock - * request with the priority flag would be allowed to bypass the deferred - * request and directly join the other shared lock. A shared lock request - * without the priority flag might be forced to wait until the deferred - * requested had acquired and released the lock. + * In addition, when a lock is already held in EX mode locally, a SHARED or + * DEFERRED mode request with the LM_FLAG_ANY flag set will be granted. + * (The LM_FLAG_ANY flag is only use for SHARED mode requests currently.) * * LM_FLAG_NODE_SCOPE * This holder agrees to share the lock within this node. In other words, @@ -84,15 +80,15 @@ enum { #define LM_FLAG_TRY 0x0001 #define LM_FLAG_TRY_1CB 0x0002 -#define LM_FLAG_NOEXP 0x0004 +#define LM_FLAG_RECOVER 0x0004 #define LM_FLAG_ANY 0x0008 -#define LM_FLAG_PRIORITY 0x0010 #define LM_FLAG_NODE_SCOPE 0x0020 #define GL_ASYNC 0x0040 #define GL_EXACT 0x0080 #define GL_SKIP 0x0100 #define GL_NOPID 0x0200 #define GL_NOCACHE 0x0400 +#define GL_NOBLOCK 0x0800 /* * lm_async_cb return flags @@ -100,12 +96,22 @@ enum { * LM_OUT_ST_MASK * Masks the lower two bits of lock state in the returned value. * + * LM_OUT_TRY_AGAIN + * The trylock request failed. + * + * LM_OUT_DEADLOCK + * The lock request failed because it would deadlock. + * * LM_OUT_CANCELED * The lock request was canceled. * + * LM_OUT_ERROR + * The lock request timed out or failed. */ #define LM_OUT_ST_MASK 0x00000003 +#define LM_OUT_TRY_AGAIN 0x00000020 +#define LM_OUT_DEADLOCK 0x00000010 #define LM_OUT_CANCELED 0x00000008 #define LM_OUT_ERROR 0x00000004 @@ -130,7 +136,7 @@ struct lm_lockops { void (*lm_first_done) (struct gfs2_sbd *sdp); void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, unsigned int result); - void (*lm_unmount) (struct gfs2_sbd *sdp); + void (*lm_unmount) (struct gfs2_sbd *sdp, bool clean); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, @@ -144,7 +150,6 @@ struct gfs2_glock_aspace { struct address_space mapping; }; -extern struct workqueue_struct *gfs2_delete_workqueue; static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { struct gfs2_holder *gh; @@ -166,21 +171,6 @@ out: return gh; } -static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) -{ - return gl->gl_state == LM_ST_EXCLUSIVE; -} - -static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl) -{ - return gl->gl_state == LM_ST_DEFERRED; -} - -static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) -{ - return gl->gl_state == LM_ST_SHARED; -} - static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) { if (gl->gl_ops->go_flags & GLOF_ASPACE) { @@ -191,40 +181,40 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) return NULL; } -extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - int create, struct gfs2_glock **glp); -extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); -extern void gfs2_glock_put(struct gfs2_glock *gl); -extern void gfs2_glock_queue_put(struct gfs2_glock *gl); +int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, + int create, struct gfs2_glock **glp); +struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); +void gfs2_glock_put(struct gfs2_glock *gl); +void gfs2_glock_put_async(struct gfs2_glock *gl); -extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, - u16 flags, struct gfs2_holder *gh, - unsigned long ip); +void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + u16 flags, struct gfs2_holder *gh, + unsigned long ip); static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh) { __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); } -extern void gfs2_holder_reinit(unsigned int state, u16 flags, - struct gfs2_holder *gh); -extern void gfs2_holder_uninit(struct gfs2_holder *gh); -extern int gfs2_glock_nq(struct gfs2_holder *gh); -extern int gfs2_glock_poll(struct gfs2_holder *gh); -extern int gfs2_instantiate(struct gfs2_holder *gh); -extern int gfs2_glock_holder_ready(struct gfs2_holder *gh); -extern int gfs2_glock_wait(struct gfs2_holder *gh); -extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_glock_dq(struct gfs2_holder *gh); -extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); -extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); -extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, u16 flags, - struct gfs2_holder *gh); -extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, +void gfs2_holder_reinit(unsigned int state, u16 flags, + struct gfs2_holder *gh); +void gfs2_holder_uninit(struct gfs2_holder *gh); +int gfs2_glock_nq(struct gfs2_holder *gh); +int gfs2_glock_poll(struct gfs2_holder *gh); +int gfs2_instantiate(struct gfs2_holder *gh); +int gfs2_glock_holder_ready(struct gfs2_holder *gh); +int gfs2_glock_wait(struct gfs2_holder *gh); +int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_glock_dq(struct gfs2_holder *gh); +void gfs2_glock_dq_wait(struct gfs2_holder *gh); +void gfs2_glock_dq_uninit(struct gfs2_holder *gh); +int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, + unsigned int state, u16 flags, + struct gfs2_holder *gh); +int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); +void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \ gfs2_dump_glock(NULL, gl, true); \ @@ -238,7 +228,7 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \ while (0) -extern __printf(2, 3) +__printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** @@ -266,28 +256,28 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, return error; } -extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); -extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); -extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay); -extern void gfs2_cancel_delete_work(struct gfs2_glock *gl); -extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl); -extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp); -extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); -extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); -extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); -extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); -extern void gfs2_glock_free(struct gfs2_glock *gl); - -extern int __init gfs2_glock_init(void); -extern void gfs2_glock_exit(void); - -extern void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); -extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); -extern void gfs2_register_debugfs(void); -extern void gfs2_unregister_debugfs(void); - -extern void glock_set_object(struct gfs2_glock *gl, void *object); -extern void glock_clear_object(struct gfs2_glock *gl, void *object); +void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); +void gfs2_glock_complete(struct gfs2_glock *gl, int ret); +bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); +bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later); +void gfs2_cancel_delete_work(struct gfs2_glock *gl); +void gfs2_flush_delete_work(struct gfs2_sbd *sdp); +void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); +void gfs2_withdraw_glocks(struct gfs2_sbd *sdp); +void gfs2_glock_thaw(struct gfs2_sbd *sdp); +void gfs2_glock_free(struct gfs2_glock *gl); +void gfs2_glock_free_later(struct gfs2_glock *gl); + +int __init gfs2_glock_init(void); +void gfs2_glock_exit(void); + +void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); +void gfs2_register_debugfs(void); +void gfs2_unregister_debugfs(void); + +void glock_set_object(struct gfs2_glock *gl, void *object); +void glock_clear_object(struct gfs2_glock *gl, void *object); extern const struct lm_lockops gfs2_dlm_ops; @@ -306,7 +296,13 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) return !list_empty(&gh->gh_list); } -extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); -extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); +void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); +bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); + +static inline bool glock_needs_demote(struct gfs2_glock *gl) +{ + return (test_bit(GLF_DEMOTE, &gl->gl_flags) || + test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); +} #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d78b61ecc1cd..2173ccf5034b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -11,6 +11,7 @@ #include <linux/bio.h> #include <linux/posix_acl.h> #include <linux/security.h> +#include <linux/log2.h> #include "gfs2.h" #include "incore.h" @@ -29,8 +30,6 @@ struct workqueue_struct *gfs2_freeze_wq; -extern struct workqueue_struct *gfs2_control_wq; - static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -39,12 +38,12 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page " "state 0x%lx\n", bh, (unsigned long long)bh->b_blocknr, bh->b_state, - bh->b_page->mapping, bh->b_page->flags); + bh->b_folio->mapping, bh->b_folio->flags.f); fs_err(sdp, "AIL glock %u:%llu mapping %p\n", gl->gl_name.ln_type, gl->gl_name.ln_number, gfs2_glock2aspace(gl)); gfs2_lm(sdp, "AIL error\n"); - gfs2_withdraw_delayed(sdp); + gfs2_withdraw(sdp); } /** @@ -90,7 +89,7 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; unsigned int revokes; - int ret; + int ret = 0; revokes = atomic_read(&gl->gl_ail_count); @@ -124,15 +123,18 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl) memset(&tr, 0, sizeof(tr)); set_bit(TR_ONSTACK, &tr.tr_flags); ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_); - if (ret) + if (ret) { + fs_err(sdp, "Transaction error %d: Unable to write revokes.", ret); goto flush; + } __gfs2_ail_flush(gl, 0, revokes); gfs2_trans_end(sdp); flush: - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | - GFS2_LFC_AIL_EMPTY_GL); - return 0; + if (!ret) + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | + GFS2_LFC_AIL_EMPTY_GL); + return ret; } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) @@ -162,7 +164,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static int gfs2_rgrp_metasync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *metamapping = &sdp->sd_aspace; + struct address_space *metamapping = gfs2_aspace(sdp); struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; @@ -193,7 +195,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl) struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); int error; - if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) + if (!rgd || !test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return 0; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); @@ -219,18 +221,22 @@ static int rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *mapping = &sdp->sd_aspace; + struct address_space *mapping = gfs2_aspace(sdp); struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); const unsigned bsize = sdp->sd_sb.sb_bsize; - loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; - loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; + loff_t start, end; + if (!rgd) + return; + start = (rgd->rd_addr * bsize) & PAGE_MASK; + end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); truncate_inode_pages_range(mapping, start, end); } -static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, +static void gfs2_rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf) { struct gfs2_rgrpd *rgd = gl->gl_object; @@ -323,7 +329,9 @@ static int inode_go_sync(struct gfs2_glock *gl) ret = gfs2_inode_metasync(gl); if (!error) error = ret; - gfs2_ail_empty_gl(gl); + ret = gfs2_ail_empty_gl(gl); + if (!error) + error = ret; /* * Writeback of the data mapping may cause the dirty flag to be set * so we have to clear it again here. @@ -351,6 +359,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gfs2_glock2inode(gl); + gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); + if (flags & DIO_METADATA) { struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); @@ -374,36 +384,24 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) gfs2_clear_glop_pending(ip); } -/** - * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int inode_go_demote_ok(const struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) - return 0; - - return 1; -} - static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); const struct gfs2_dinode *str = buf; - struct timespec64 atime; + struct timespec64 atime, iatime; u16 height, depth; umode_t mode = be32_to_cpu(str->di_mode); struct inode *inode = &ip->i_inode; - bool is_new = inode->i_state & I_NEW; + bool is_new = inode_state_read_once(inode) & I_NEW; - if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) - goto corrupt; - if (unlikely(!is_new && inode_wrong_type(inode, mode))) - goto corrupt; + if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) { + gfs2_consist_inode(ip); + return -EIO; + } + if (unlikely(!is_new && inode_wrong_type(inode, mode))) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); inode->i_mode = mode; if (is_new) { @@ -424,12 +422,13 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec64_compare(&inode->i_atime, &atime) < 0) - inode->i_atime = atime; - inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + iatime = inode_get_atime(inode); + if (timespec64_compare(&iatime, &atime) < 0) + inode_set_atime_to_ts(inode, atime); + inode_set_mtime(inode, be64_to_cpu(str->di_mtime), + be32_to_cpu(str->di_mtime_nsec)); + inode_set_ctime(inode, be64_to_cpu(str->di_ctime), + be32_to_cpu(str->di_ctime_nsec)); ip->i_goal = be64_to_cpu(str->di_goal_meta); ip->i_generation = be64_to_cpu(str->di_generation); @@ -439,26 +438,33 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ gfs2_set_inode_flags(inode); height = be16_to_cpu(str->di_height); - if (unlikely(height > GFS2_MAX_META_HEIGHT)) - goto corrupt; + if (unlikely(height > sdp->sd_max_height)) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_height = (u8)height; depth = be16_to_cpu(str->di_depth); - if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) - goto corrupt; + if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) { + gfs2_consist_inode(ip); + return -EIO; + } + if ((ip->i_diskflags & GFS2_DIF_EXHASH) && + depth < ilog2(sdp->sd_hash_ptrs)) { + gfs2_consist_inode(ip); + return -EIO; + } ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); - if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) - goto corrupt; - + if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) { + gfs2_consist_inode(ip); + return -EIO; + } if (S_ISREG(inode->i_mode)) gfs2_set_aops(inode); return 0; -corrupt: - gfs2_consist_inode(ip); - return -EIO; } /** @@ -468,7 +474,7 @@ corrupt: * Returns: errno */ -int gfs2_inode_refresh(struct gfs2_inode *ip) +static int gfs2_inode_refresh(struct gfs2_inode *ip) { struct buffer_head *dibh; int error; @@ -484,7 +490,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) /** * inode_go_instantiate - read in an inode if necessary - * @gh: The glock holder + * @gl: The glock * * Returns: errno */ @@ -492,11 +498,18 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) static int inode_go_instantiate(struct gfs2_glock *gl) { struct gfs2_inode *ip = gl->gl_object; + struct gfs2_glock *io_gl; + int error; if (!ip) /* no inode to populate - read it in later */ return 0; - return gfs2_inode_refresh(ip); + error = gfs2_inode_refresh(ip); + if (error) + return error; + io_gl = ip->i_iopen_gh.gh_gl; + io_gl->gl_no_formal_ino = ip->i_no_formal_ino; + return 0; } static int inode_go_held(struct gfs2_holder *gh) @@ -527,71 +540,54 @@ static int inode_go_held(struct gfs2_holder *gh) * */ -static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, +static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf) { struct gfs2_inode *ip = gl->gl_object; - struct inode *inode = &ip->i_inode; - unsigned long nrpages; + const struct inode *inode = &ip->i_inode; if (ip == NULL) return; - xa_lock_irq(&inode->i_data.i_pages); - nrpages = inode->i_data.nrpages; - xa_unlock_irq(&inode->i_data.i_pages); - gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu " "p:%lu\n", fs_id_buf, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, - IF2DT(ip->i_inode.i_mode), ip->i_flags, + IF2DT(inode->i_mode), ip->i_flags, (unsigned int)ip->i_diskflags, - (unsigned long long)i_size_read(inode), nrpages); + (unsigned long long)i_size_read(inode), + inode->i_data.nrpages); } /** - * freeze_go_sync - promote/demote the freeze glock + * freeze_go_callback - A cluster node is requesting a freeze * @gl: the glock + * @remote: true if this came from a different cluster node */ -static int freeze_go_sync(struct gfs2_glock *gl) +static void freeze_go_callback(struct gfs2_glock *gl, bool remote) { - int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct super_block *sb = sdp->sd_vfs; + + if (!remote || + (gl->gl_state != LM_ST_SHARED && + gl->gl_state != LM_ST_UNLOCKED) || + gl->gl_demote_state != LM_ST_UNLOCKED) + return; /* - * We need to check gl_state == LM_ST_SHARED here and not gl_req == - * LM_ST_EXCLUSIVE. That's because when any node does a freeze, - * all the nodes should have the freeze glock in SH mode and they all - * call do_xmote: One for EX and the others for UN. They ALL must - * freeze locally, and they ALL must queue freeze work. The freeze_work - * calls freeze_func, which tries to reacquire the freeze glock in SH, - * effectively waiting for the thaw on the node who holds it in EX. - * Once thawed, the work func acquires the freeze glock in - * SH and everybody goes back to thawed. + * Try to get an active super block reference to prevent racing with + * unmount (see super_trylock_shared()). But note that unmount isn't + * the only place where a write lock on s_umount is taken, and we can + * fail here because of things like remount as well. */ - if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && - !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) { - atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); - error = freeze_super(sdp->sd_vfs); - if (error) { - fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", - error); - if (gfs2_withdrawn(sdp)) { - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - return 0; - } - gfs2_assert_withdraw(sdp, 0); - } - queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); - if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | - GFS2_LFC_FREEZE_GO_SYNC); - else /* read-only mounts */ - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + if (down_read_trylock(&sb->s_umount)) { + atomic_inc(&sb->s_active); + up_read(&sb->s_umount); + if (!queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work)) + deactivate_super(sb); } - return 0; } /** @@ -609,31 +605,18 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl) if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); - error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); - if (gfs2_assert_withdraw_delayed(sdp, !error)) + error = gfs2_find_jhead(sdp->sd_jdesc, &head); + if (gfs2_assert_withdraw(sdp, !error)) return error; - if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags & - GFS2_LOG_HEAD_UNMOUNT)) + if (gfs2_assert_withdraw(sdp, head.lh_flags & + GFS2_LOG_HEAD_UNMOUNT)) return -EIO; - sdp->sd_log_sequence = head.lh_sequence + 1; - gfs2_log_pointers_init(sdp, head.lh_blkno); + gfs2_log_pointers_init(sdp, &head); } return 0; } /** - * freeze_go_demote_ok - * @gl: the glock - * - * Always returns 0 - */ - -static int freeze_go_demote_ok(const struct gfs2_glock *gl) -{ - return 0; -} - -/** * iopen_go_callback - schedule the dcache entry for the inode to be deleted * @gl: the glock * @remote: true if this came from a different cluster node @@ -645,105 +628,29 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) struct gfs2_inode *ip = gl->gl_object; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (!remote || sb_rdonly(sdp->sd_vfs)) + if (!remote || test_bit(SDF_KILL, &sdp->sd_flags)) return; if (gl->gl_demote_state == LM_ST_UNLOCKED && gl->gl_state == LM_ST_SHARED && ip) { gl->gl_lockref.count++; - if (!queue_delayed_work(gfs2_delete_workqueue, - &gl->gl_delete, 0)) + if (!gfs2_queue_try_to_evict(gl)) gl->gl_lockref.count--; } } -static int iopen_go_demote_ok(const struct gfs2_glock *gl) -{ - return !gfs2_delete_work_queued(gl); -} - -/** - * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it - * @gl: glock being freed - * - * For now, this is only used for the journal inode glock. In withdraw - * situations, we need to wait for the glock to be freed so that we know - * other nodes may proceed with recovery / journal replay. - */ -static void inode_go_free(struct gfs2_glock *gl) -{ - /* Note that we cannot reference gl_object because it's already set - * to NULL by this point in its lifecycle. */ - if (!test_bit(GLF_FREEING, &gl->gl_flags)) - return; - clear_bit_unlock(GLF_FREEING, &gl->gl_flags); - wake_up_bit(&gl->gl_flags, GLF_FREEING); -} - -/** - * nondisk_go_callback - used to signal when a node did a withdraw - * @gl: the nondisk glock - * @remote: true if this came from a different cluster node - * - */ -static void nondisk_go_callback(struct gfs2_glock *gl, bool remote) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - /* Ignore the callback unless it's from another node, and it's the - live lock. */ - if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK) - return; - - /* First order of business is to cancel the demote request. We don't - * really want to demote a nondisk glock. At best it's just to inform - * us of another node's withdraw. We'll keep it in SH mode. */ - clear_bit(GLF_DEMOTE, &gl->gl_flags); - clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - - /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */ - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || - test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || - test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) - return; - - /* We only care when a node wants us to unlock, because that means - * they want a journal recovered. */ - if (gl->gl_demote_state != LM_ST_UNLOCKED) - return; - - if (sdp->sd_args.ar_spectator) { - fs_warn(sdp, "Spectator node cannot recover journals.\n"); - return; - } - - fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n"); - set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); - /* - * We can't call remote_withdraw directly here or gfs2_recover_journal - * because this is called from the glock unlock function and the - * remote_withdraw needs to enqueue and dequeue the same "live" glock - * we were called from. So we queue it to the control work queue in - * lock_dlm. - */ - queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); -} - const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_inode_glops = { .go_sync = inode_go_sync, .go_inval = inode_go_inval, - .go_demote_ok = inode_go_demote_ok, .go_instantiate = inode_go_instantiate, .go_held = inode_go_held, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, - .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, - .go_free = inode_go_free, + .go_flags = GLOF_ASPACE | GLOF_LVB, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { @@ -756,41 +663,33 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { }; const struct gfs2_glock_operations gfs2_freeze_glops = { - .go_sync = freeze_go_sync, .go_xmote_bh = freeze_go_xmote_bh, - .go_demote_ok = freeze_go_demote_ok, + .go_callback = freeze_go_callback, .go_type = LM_TYPE_NONDISK, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, .go_callback = iopen_go_callback, .go_dump = inode_go_dump, - .go_demote_ok = iopen_go_demote_ok, - .go_flags = GLOF_LRU | GLOF_NONDISK, .go_subclass = 1, }; const struct gfs2_glock_operations gfs2_flock_glops = { .go_type = LM_TYPE_FLOCK, - .go_flags = GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { .go_type = LM_TYPE_NONDISK, - .go_flags = GLOF_NONDISK, - .go_callback = nondisk_go_callback, }; const struct gfs2_glock_operations gfs2_quota_glops = { .go_type = LM_TYPE_QUOTA, - .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK, + .go_flags = GLOF_LVB, }; const struct gfs2_glock_operations gfs2_journal_glops = { .go_type = LM_TYPE_JOURNAL, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations *gfs2_glops_list[] = { diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 695898afcaf1..9341423798df 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -22,7 +22,7 @@ extern const struct gfs2_glock_operations gfs2_quota_glops; extern const struct gfs2_glock_operations gfs2_journal_glops; extern const struct gfs2_glock_operations *gfs2_glops_list[]; -extern int gfs2_inode_metasync(struct gfs2_glock *gl); -extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); +int gfs2_inode_metasync(struct gfs2_glock *gl); +void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); #endif /* __GLOPS_DOT_H__ */ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c26765080f28..d05d8fe4e456 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -218,20 +218,16 @@ struct gfs2_glock_operations { int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); - int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_instantiate) (struct gfs2_glock *gl); int (*go_held)(struct gfs2_holder *gh); - void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, + void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); - void (*go_free)(struct gfs2_glock *gl); const int go_subclass; const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ #define GLOF_LVB 2 /* Lock Value Block attached */ -#define GLOF_LRU 4 /* LRU managed */ -#define GLOF_NONDISK 8 /* not I/O related */ }; enum { @@ -321,16 +317,18 @@ enum { GLF_DEMOTE_IN_PROGRESS = 5, GLF_DIRTY = 6, GLF_LFLUSH = 7, - GLF_INVALIDATE_IN_PROGRESS = 8, - GLF_REPLY_PENDING = 9, + GLF_HAVE_REPLY = 9, GLF_INITIAL = 10, - GLF_FROZEN = 11, + GLF_HAVE_FROZEN_REPLY = 11, GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, - GLF_PENDING_DELETE = 17, - GLF_FREEING = 18, /* Wait for glock to be freed */ + GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ + GLF_VERIFY_DELETE = 18, /* iopen glocks only */ + GLF_PENDING_REPLY = 19, + GLF_DEFER_DELETE = 20, /* iopen glocks only */ + GLF_CANCELING = 21, }; struct gfs2_glock { @@ -373,11 +371,8 @@ struct gfs2_glock { enum { GIF_QD_LOCKED = 1, - GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, - GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, - GIF_DEFERRED_DELETE = 7, }; struct gfs2_inode { @@ -451,7 +446,7 @@ struct gfs2_quota_data { s64 qd_change_sync; unsigned int qd_slot; - unsigned int qd_slot_count; + unsigned int qd_slot_ref; struct buffer_head *qd_bh; struct gfs2_quota_change *qd_bh_qc; @@ -522,8 +517,6 @@ struct gfs2_jdesc { struct list_head jd_revoke_list; unsigned int jd_replay_tail; - - u64 jd_no_addr; }; struct gfs2_statfs_change_host { @@ -536,6 +529,7 @@ struct gfs2_statfs_change_host { #define GFS2_QUOTA_OFF 0 #define GFS2_QUOTA_ACCOUNT 1 #define GFS2_QUOTA_ON 2 +#define GFS2_QUOTA_QUIET 3 /* on but not complaining */ #define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED #define GFS2_DATA_WRITEBACK 1 @@ -543,8 +537,7 @@ struct gfs2_statfs_change_host { #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW #define GFS2_ERRORS_WITHDRAW 0 -#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ -#define GFS2_ERRORS_RO 2 /* place holder for future feature */ +#define GFS2_ERRORS_DEACTIVATE 1 #define GFS2_ERRORS_PANIC 3 struct gfs2_args { @@ -560,7 +553,7 @@ struct gfs2_args { unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ - unsigned int ar_errors:2; /* errors=withdraw | panic */ + unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ @@ -586,6 +579,7 @@ struct gfs2_tune { unsigned int gt_complain_secs; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; + unsigned int gt_withdraw_helper_timeout; }; enum { @@ -599,18 +593,10 @@ enum { SDF_RORECOVERY = 7, /* read only recovery */ SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, - SDF_FS_FROZEN = 10, - SDF_WITHDRAWING = 11, /* Will withdraw eventually */ - SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ - SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ - SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are - withdrawing */ -}; - -enum gfs2_freeze_state { - SFS_UNFROZEN = 0, - SFS_STARTING_FREEZE = 1, - SFS_FROZEN = 2, + SDF_FREEZE_INITIATOR = 10, + SDF_KILL = 15, + SDF_EVICTING = 16, + SDF_FROZEN = 17, }; #define GFS2_FSNAME_LEN 256 @@ -660,6 +646,8 @@ struct lm_lockstruct { struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ char *ls_lvb_bits; + struct rw_semaphore ls_sem; + spinlock_t ls_recover_spin; /* protects following fields */ unsigned long ls_recover_flags; /* DFL_ */ uint32_t ls_recover_mount; /* gen in first recover_done cb */ @@ -718,11 +706,13 @@ struct gfs2_sbd { struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_freeze_gl; struct work_struct sd_freeze_work; - wait_queue_head_t sd_glock_wait; + struct work_struct sd_withdraw_work; + wait_queue_head_t sd_kill_wait; wait_queue_head_t sd_async_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; - struct completion sd_wdack; + struct completion sd_withdraw_helper; + int sd_withdraw_helper_status; struct delayed_work sd_control_work; /* Inode Stuff */ @@ -763,7 +753,6 @@ struct gfs2_sbd { struct gfs2_jdesc *sd_jdesc; struct gfs2_holder sd_journal_gh; struct gfs2_holder sd_jinode_gh; - struct gfs2_glock *sd_jinode_gl; struct gfs2_holder sd_sc_gh; struct buffer_head *sd_sc_bh; @@ -771,6 +760,11 @@ struct gfs2_sbd { struct completion sd_journal_ready; + /* Workqueue stuff */ + + struct workqueue_struct *sd_glock_wq; + struct workqueue_struct *sd_delete_wq; + /* Daemon stuff */ struct task_struct *sd_logd_process; @@ -780,7 +774,6 @@ struct gfs2_sbd { struct list_head sd_quota_list; atomic_t sd_quota_count; - struct mutex sd_quota_mutex; struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; @@ -792,7 +785,7 @@ struct gfs2_sbd { /* Log stuff */ - struct address_space sd_aspace; + struct inode *sd_inode; spinlock_t sd_log_lock; @@ -821,7 +814,6 @@ struct gfs2_sbd { atomic_t sd_log_in_flight; wait_queue_head_t sd_log_flush_wait; int sd_log_error; /* First log error */ - wait_queue_head_t sd_withdraw_wait; unsigned int sd_log_tail; unsigned int sd_log_flush_tail; @@ -834,8 +826,8 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; - atomic_t sd_freeze_state; struct mutex sd_freeze_mutex; + struct list_head sd_dead_glocks; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; @@ -845,9 +837,15 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ - unsigned long sd_glock_dqs_held; }; +#define GFS2_BAD_INO 1 + +static inline struct address_space *gfs2_aspace(struct gfs2_sbd *sdp) +{ + return sdp->sd_inode->i_mapping; +} + static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) { gl->gl_stats.stats[which]++; @@ -861,7 +859,7 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which) preempt_enable(); } -extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); +struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl); static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip) { diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 614db3055c02..36618e353199 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -89,6 +89,19 @@ static int iget_set(struct inode *inode, void *opaque) return 0; } +void gfs2_setup_inode(struct inode *inode) +{ + gfp_t gfp_mask; + + /* + * Ensure all page cache allocations are done from GFP_NOFS context to + * prevent direct reclaim recursion back into the filesystem and blowing + * stacks or deadlocking. + */ + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, gfp_mask & ~__GFP_FS); +} + /** * gfs2_inode_lookup - Lookup an inode * @sb: The super block @@ -127,11 +140,12 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, ip = GFS2_I(inode); - if (inode->i_state & I_NEW) { + if (inode_state_read_once(inode) & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_glock *io_gl; int extra_flags = 0; + gfs2_setup_inode(inode); error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (unlikely(error)) @@ -185,8 +199,9 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ - inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); - inode->i_atime.tv_nsec = 0; + inode_set_atime(inode, + 1LL << (8 * sizeof(inode_get_atime_sec(inode)) - 1), + 0); glock_set_object(ip->i_gl, ip); @@ -225,6 +240,10 @@ fail: gfs2_glock_dq_uninit(&ip->i_iopen_gh); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); + if (ip->i_gl) { + gfs2_glock_put(ip->i_gl); + ip->i_gl = NULL; + } iget_failed(inode); return ERR_PTR(error); } @@ -261,21 +280,28 @@ fail_iput: } -struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) +/** + * gfs2_lookup_meta - Look up an inode in a metadata directory + * @dip: The directory + * @name: The name of the inode + */ +struct inode *gfs2_lookup_meta(struct inode *dip, const char *name) { struct qstr qstr; struct inode *inode; + gfs2_str2qstr(&qstr, name); inode = gfs2_lookupi(dip, &qstr, 1); - /* gfs2_lookupi has inconsistent callers: vfs - * related routines expect NULL for no entry found, - * gfs2_lookup_simple callers expect ENOENT - * and do not check for NULL. + if (IS_ERR_OR_NULL(inode)) + return inode ? inode : ERR_PTR(-ENOENT); + + /* + * Must not call back into the filesystem when allocating + * pages in the metadata inode's address space. */ - if (inode == NULL) - return ERR_PTR(-ENOENT); - else - return inode; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + return inode; } @@ -320,7 +346,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, } if (!is_root) { - error = gfs2_permission(&init_user_ns, dir, MAY_EXEC); + error = gfs2_permission(&nop_mnt_idmap, dir, MAY_EXEC); if (error) goto out; } @@ -350,7 +376,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, { int error; - error = gfs2_permission(&init_user_ns, &dip->i_inode, + error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -407,7 +433,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) if (error) goto out_ipreserv; - error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1); if (error) goto out_trans_end; @@ -427,6 +453,72 @@ out: return error; } +static void gfs2_final_release_pages(struct gfs2_inode *ip) +{ + struct inode *inode = &ip->i_inode; + struct gfs2_glock *gl = ip->i_gl; + + /* This can only happen during incomplete inode creation. */ + if (unlikely(!gl)) + return; + + truncate_inode_pages(gfs2_glock2aspace(gl), 0); + truncate_inode_pages(&inode->i_data, 0); + + if (atomic_read(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } +} + +int gfs2_dinode_dealloc(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_rgrpd *rgd; + struct gfs2_holder gh; + int error; + + if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { + gfs2_consist_inode(ip); + return -EIO; + } + + gfs2_rindex_update(sdp); + + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); + if (error) + return error; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_qs; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); + if (error) + goto out_qs; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, + sdp->sd_jdesc->jd_blocks); + if (error) + goto out_rg_gunlock; + + gfs2_free_di(rgd, ip); + + gfs2_final_release_pages(ip); + + gfs2_trans_end(sdp); + +out_rg_gunlock: + gfs2_glock_dq_uninit(&gh); +out_qs: + gfs2_quota_unhold(ip); + return error; +} + static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *parent) { @@ -617,10 +709,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error; + int error, dealloc_error; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; + bool xattr_initialized = false; if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; @@ -647,7 +740,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!IS_ERR(inode)) { if (S_ISDIR(inode->i_mode)) { iput(inode); - inode = ERR_PTR(-EISDIR); + inode = NULL; + error = -EISDIR; goto fail_gunlock; } d_instantiate(dentry, inode); @@ -672,6 +766,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = -ENOMEM; if (!inode) goto fail_gunlock; + gfs2_setup_inode(inode); ip = GFS2_I(inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); @@ -686,7 +781,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, set_nlink(inode, S_ISDIR(mode) ? 2 : 1); inode->i_rdev = dev; inode->i_size = size; - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + simple_inode_init_ts(inode); munge_mode_uid_gid(dip, inode); check_and_update_goal(dip); ip->i_goal = dip->i_goal; @@ -732,12 +827,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) - goto fail_free_inode; + goto fail_dealloc_inode; error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) - goto fail_free_inode; + goto fail_dealloc_inode; gfs2_cancel_delete_work(io_gl); + io_gl->gl_no_formal_ino = ip->i_no_formal_ino; retry: error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); @@ -754,13 +850,16 @@ retry: error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (error) goto fail_gunlock3; + clear_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); error = gfs2_trans_begin(sdp, blocks, 0); if (error) goto fail_gunlock3; - if (blocks > 1) + if (blocks > 1) { gfs2_init_xattr(ip); + xattr_initialized = true; + } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -815,7 +914,22 @@ fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: gfs2_glock_put(io_gl); +fail_dealloc_inode: + dealloc_error = 0; + if (ip->i_eattr) + dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized); + clear_nlink(inode); + mark_inode_dirty(inode); + if (!dealloc_error) + dealloc_error = gfs2_dinode_dealloc(ip); + if (dealloc_error) + fs_warn(sdp, "%s: %d\n", __func__, dealloc_error); + ip->i_no_addr = 0; fail_free_inode: + if (ip->i_gl) { + gfs2_glock_put(ip->i_gl); + ip->i_gl = NULL; + } gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: @@ -825,11 +939,7 @@ fail_gunlock: gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { - set_bit(GIF_ALLOC_FAILED, &ip->i_flags); - clear_nlink(inode); - if (ip->i_no_addr) - mark_inode_dirty(inode); - if (inode->i_state & I_NEW) + if (inode_state_read_once(inode) & I_NEW) iget_failed(inode); else iput(inode); @@ -843,7 +953,7 @@ fail: /** * gfs2_create - Create a file - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The directory in which to create the file * @dentry: The dentry of the new file * @mode: The mode of the new file @@ -852,7 +962,7 @@ fail: * Returns: errno */ -static int gfs2_create(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl); @@ -933,7 +1043,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, struct gfs2_sbd *sdp = GFS2_SB(dir); struct inode *inode = d_inode(old_dentry); struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder ghs[2]; + struct gfs2_holder d_gh, gh; struct buffer_head *dibh; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; int error; @@ -945,14 +1055,14 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) return error; - gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - error = gfs2_glock_nq(ghs); /* parent */ + error = gfs2_glock_nq(&d_gh); if (error) goto out_parent; - error = gfs2_glock_nq(ghs + 1); /* child */ + error = gfs2_glock_nq(&gh); if (error) goto out_child; @@ -960,7 +1070,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) goto out_gunlock; - error = gfs2_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC); + error = gfs2_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC); if (error) goto out_gunlock; @@ -984,9 +1094,6 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_gunlock; - error = -EINVAL; - if (!ip->i_inode.i_nlink) - goto out_gunlock; error = -EMLINK; if (ip->i_inode.i_nlink == (u32)-1) goto out_gunlock; @@ -1024,7 +1131,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, gfs2_trans_add_meta(ip->i_gl, dibh); inc_nlink(&ip->i_inode); - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); ihold(inode); d_instantiate(dentry, inode); mark_inode_dirty(inode); @@ -1041,13 +1148,13 @@ out_gunlock_q: gfs2_quota_unlock(dip); out_gunlock: gfs2_dir_no_add(&da); - gfs2_glock_dq(ghs + 1); + gfs2_glock_dq(&gh); out_child: - gfs2_glock_dq(ghs); + gfs2_glock_dq(&d_gh); out_parent: gfs2_qa_put(dip); - gfs2_holder_uninit(ghs); - gfs2_holder_uninit(ghs + 1); + gfs2_holder_uninit(&d_gh); + gfs2_holder_uninit(&gh); return error; } @@ -1078,7 +1185,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (IS_APPEND(&dip->i_inode)) return -EPERM; - error = gfs2_permission(&init_user_ns, &dip->i_inode, + error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode, MAY_WRITE | MAY_EXEC); if (error) return error; @@ -1109,7 +1216,7 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip, return error; ip->i_entries = 0; - inode->i_ctime = current_time(inode); + inode_set_ctime_current(inode); if (S_ISDIR(inode->i_mode)) clear_nlink(inode); else @@ -1138,7 +1245,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct gfs2_sbd *sdp = GFS2_SB(dir); struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder ghs[3]; + struct gfs2_holder d_gh, r_gh, gh; struct gfs2_rgrpd *rgd; int error; @@ -1148,21 +1255,21 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) error = -EROFS; - gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); if (!rgd) goto out_inodes; - gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, ghs + 2); + gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, &r_gh); - error = gfs2_glock_nq(ghs); /* parent */ + error = gfs2_glock_nq(&d_gh); if (error) goto out_parent; - error = gfs2_glock_nq(ghs + 1); /* child */ + error = gfs2_glock_nq(&gh); if (error) goto out_child; @@ -1176,7 +1283,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) goto out_rgrp; } - error = gfs2_glock_nq(ghs + 2); /* rgrp */ + error = gfs2_glock_nq(&r_gh); /* rgrp */ if (error) goto out_rgrp; @@ -1192,22 +1299,22 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq(ghs + 2); + gfs2_glock_dq(&r_gh); out_rgrp: - gfs2_glock_dq(ghs + 1); + gfs2_glock_dq(&gh); out_child: - gfs2_glock_dq(ghs); + gfs2_glock_dq(&d_gh); out_parent: - gfs2_holder_uninit(ghs + 2); + gfs2_holder_uninit(&r_gh); out_inodes: - gfs2_holder_uninit(ghs + 1); - gfs2_holder_uninit(ghs); + gfs2_holder_uninit(&gh); + gfs2_holder_uninit(&d_gh); return error; } /** * gfs2_symlink - Create a symlink - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The directory to create the symlink in * @dentry: The dentry to put the symlink in * @symname: The thing which the link points to @@ -1215,7 +1322,7 @@ out_inodes: * Returns: errno */ -static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { unsigned int size; @@ -1229,24 +1336,25 @@ static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir, /** * gfs2_mkdir - Make a directory - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The parent directory of the new one * @dentry: The dentry of the new directory * @mode: The mode of the new directory * - * Returns: errno + * Returns: the dentry, or ERR_PTR(errno) */ -static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *gfs2_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir)); - return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0); + + return ERR_PTR(gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0)); } /** * gfs2_mknod - Make a special file - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dir: The directory in which the special file will reside * @dentry: The dentry of the special file * @mode: The mode of the special file @@ -1254,7 +1362,7 @@ static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir, * */ -static int gfs2_mknod(struct user_namespace *mnt_userns, struct inode *dir, +static int gfs2_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0); @@ -1275,27 +1383,19 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode) { - struct dentry *d; bool excl = !!(flags & O_EXCL); - if (!d_in_lookup(dentry)) - goto skip_lookup; - - d = __gfs2_lookup(dir, dentry, file); - if (IS_ERR(d)) - return PTR_ERR(d); - if (d != NULL) - dentry = d; - if (d_really_is_positive(dentry)) { - if (!(file->f_mode & FMODE_OPENED)) + if (d_in_lookup(dentry)) { + struct dentry *d = __gfs2_lookup(dir, dentry, file); + if (file->f_mode & FMODE_OPENED) { + if (IS_ERR(d)) + return PTR_ERR(d); + dput(d); + return excl && (flags & O_CREAT) ? -EEXIST : 0; + } + if (d || d_really_is_positive(dentry)) return finish_no_open(file, d); - dput(d); - return excl && (flags & O_CREAT) ? -EEXIST : 0; } - - BUG_ON(d != NULL); - -skip_lookup: if (!(flags & O_CREAT)) return -ENOENT; @@ -1366,7 +1466,7 @@ static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip, if (dir_rename) return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); mark_inode_dirty_sync(&ip->i_inode); return 0; } @@ -1504,7 +1604,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } } } else { - error = gfs2_permission(&init_user_ns, ndir, + error = gfs2_permission(&nop_mnt_idmap, ndir, MAY_WRITE | MAY_EXEC); if (error) goto out_gunlock; @@ -1541,7 +1641,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Check out the dir to be renamed */ if (dir_rename) { - error = gfs2_permission(&init_user_ns, d_inode(odentry), + error = gfs2_permission(&nop_mnt_idmap, d_inode(odentry), MAY_WRITE); if (error) goto out_gunlock; @@ -1705,13 +1805,13 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry, goto out_gunlock; if (S_ISDIR(old_mode)) { - error = gfs2_permission(&init_user_ns, odentry->d_inode, + error = gfs2_permission(&nop_mnt_idmap, odentry->d_inode, MAY_WRITE); if (error) goto out_gunlock; } if (S_ISDIR(new_mode)) { - error = gfs2_permission(&init_user_ns, ndentry->d_inode, + error = gfs2_permission(&nop_mnt_idmap, ndentry->d_inode, MAY_WRITE); if (error) goto out_gunlock; @@ -1766,7 +1866,7 @@ out: return error; } -static int gfs2_rename2(struct user_namespace *mnt_userns, struct inode *odir, +static int gfs2_rename2(struct mnt_idmap *idmap, struct inode *odir, struct dentry *odentry, struct inode *ndir, struct dentry *ndentry, unsigned int flags) { @@ -1841,7 +1941,7 @@ out: /** * gfs2_permission - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @inode: The inode * @mask: The mask to be tested * @@ -1852,19 +1952,27 @@ out: * Returns: errno */ -int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, +int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { + int may_not_block = mask & MAY_NOT_BLOCK; struct gfs2_inode *ip; struct gfs2_holder i_gh; + struct gfs2_glock *gl; int error; gfs2_holder_mark_uninitialized(&i_gh); ip = GFS2_I(inode); - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (mask & MAY_NOT_BLOCK) + gl = rcu_dereference_check(ip->i_gl, !may_not_block); + if (unlikely(!gl)) { + /* inode is getting torn down, must be RCU mode */ + WARN_ON_ONCE(!may_not_block); + return -ECHILD; + } + if (gfs2_glock_is_locked_by_me(gl) == NULL) { + if (may_not_block) return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; } @@ -1872,7 +1980,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) error = -EPERM; else - error = generic_permission(&init_user_ns, inode, mask); + error = generic_permission(&nop_mnt_idmap, inode, mask); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); @@ -1881,7 +1989,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode, static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr) { - setattr_copy(&init_user_ns, inode, attr); + setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } @@ -1909,7 +2017,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) kuid_t ouid, nuid; kgid_t ogid, ngid; int error; - struct gfs2_alloc_parms ap; + struct gfs2_alloc_parms ap = {}; ouid = inode->i_uid; ogid = inode->i_gid; @@ -1966,7 +2074,7 @@ out: /** * gfs2_setattr - Change attributes on an inode - * @mnt_userns: User namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @dentry: The dentry which is changing * @attr: The structure describing the change * @@ -1976,7 +2084,7 @@ out: * Returns: errno */ -static int gfs2_setattr(struct user_namespace *mnt_userns, +static int gfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -1992,11 +2100,11 @@ static int gfs2_setattr(struct user_namespace *mnt_userns, if (error) goto out; - error = may_setattr(&init_user_ns, inode, attr->ia_valid); + error = may_setattr(&nop_mnt_idmap, inode, attr->ia_valid); if (error) goto error; - error = setattr_prepare(&init_user_ns, dentry, attr); + error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) goto error; @@ -2007,7 +2115,7 @@ static int gfs2_setattr(struct user_namespace *mnt_userns, else { error = gfs2_setattr_simple(inode, attr); if (!error && attr->ia_valid & ATTR_MODE) - error = posix_acl_chmod(&init_user_ns, dentry, + error = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); } @@ -2022,7 +2130,7 @@ out: /** * gfs2_getattr - Read out an inode's attributes - * @mnt_userns: user namespace of the mount the inode was found from + * @idmap: idmap of the mount the inode was found from * @path: Object to query * @stat: The inode's stats * @request_mask: Mask of STATX_xxx flags indicating the caller's interests @@ -2037,7 +2145,7 @@ out: * Returns: errno */ -static int gfs2_getattr(struct user_namespace *mnt_userns, +static int gfs2_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -2066,7 +2174,7 @@ static int gfs2_getattr(struct user_namespace *mnt_userns, STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); @@ -2134,8 +2242,7 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset) return vfs_setpos(file, ret, inode->i_sb->s_maxbytes); } -static int gfs2_update_time(struct inode *inode, struct timespec64 *time, - int flags) +static int gfs2_update_time(struct inode *inode, int flags) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *gl = ip->i_gl; @@ -2143,14 +2250,15 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time, int error; gh = gfs2_glock_is_locked_by_me(gl); - if (gh && !gfs2_glock_is_held_excl(gl)) { + if (gh && gl->gl_state != LM_ST_EXCLUSIVE) { gfs2_glock_dq(gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh); error = gfs2_glock_nq(gh); if (error) return error; } - return generic_update_time(inode, time, flags); + generic_update_time(inode, flags); + return 0; } static const struct inode_operations gfs2_file_iops = { diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 0264d514dda7..2fcd96dd1361 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -13,9 +13,9 @@ #include "util.h" bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask); -extern int gfs2_internal_read(struct gfs2_inode *ip, - char *buf, loff_t *pos, unsigned size); -extern void gfs2_set_aops(struct inode *inode); +ssize_t gfs2_internal_read(struct gfs2_inode *ip, + char *buf, loff_t *pos, size_t size); +void gfs2_set_aops(struct inode *inode); static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) { @@ -44,19 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip) static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) { - inode->i_blocks = blocks << - (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); + inode->i_blocks = blocks << (inode->i_blkbits - SECTOR_SHIFT); } static inline u64 gfs2_get_inode_blocks(const struct inode *inode) { - return inode->i_blocks >> - (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); + return inode->i_blocks >> (inode->i_blkbits - SECTOR_SHIFT); } static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) { - change <<= inode->i_blkbits - GFS2_BASIC_BLOCK_SHIFT; + change <<= inode->i_blkbits - SECTOR_SHIFT; gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change)); inode->i_blocks += change; } @@ -88,33 +86,33 @@ err: return -EIO; } -extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino, - unsigned int blktype); -extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, - u64 no_formal_ino, - unsigned int blktype); - -extern int gfs2_inode_refresh(struct gfs2_inode *ip); - -extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root); -extern int gfs2_permission(struct user_namespace *mnt_userns, - struct inode *inode, int mask); -extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); -extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); -extern int gfs2_open_common(struct inode *inode, struct file *file); -extern loff_t gfs2_seek_data(struct file *file, loff_t offset); -extern loff_t gfs2_seek_hole(struct file *file, loff_t offset); +void gfs2_setup_inode(struct inode *inode); +struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, + u64 no_addr, u64 no_formal_ino, + unsigned int blktype); +struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, + u64 no_formal_ino, + unsigned int blktype); +int gfs2_dinode_dealloc(struct gfs2_inode *ip); + +struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, + int is_root); +int gfs2_permission(struct mnt_idmap *idmap, + struct inode *inode, int mask); +struct inode *gfs2_lookup_meta(struct inode *dip, const char *name); +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); +int gfs2_open_common(struct inode *inode, struct file *file); +loff_t gfs2_seek_data(struct file *file, loff_t offset); +loff_t gfs2_seek_hole(struct file *file, loff_t offset); extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; -extern int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa); -extern int gfs2_fileattr_set(struct user_namespace *mnt_userns, - struct dentry *dentry, struct fileattr *fa); -extern void gfs2_set_inode_flags(struct inode *inode); - +int gfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa); +int gfs2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct file_kattr *fa); +void gfs2_set_inode_flags(struct inode *inode); + #ifdef CONFIG_GFS2_FS_LOCKING_DLM extern const struct file_operations gfs2_file_fops; extern const struct file_operations gfs2_dir_fops; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 71911bf9ab34..b8d249925395 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -15,9 +15,6 @@ #include <linux/sched/signal.h> #include "incore.h" -#include "glock.h" -#include "glops.h" -#include "recovery.h" #include "util.h" #include "sys.h" #include "trace_gfs2.h" @@ -58,6 +55,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, /** * gfs2_update_reply_times - Update locking statistics * @gl: The glock to update + * @blocking: The operation may have been blocking * * This assumes that gl->gl_dstamp has been set earlier. * @@ -72,12 +70,12 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, * TRY_1CB flags are set are classified as non-blocking. All * other DLM requests are counted as (potentially) blocking. */ -static inline void gfs2_update_reply_times(struct gfs2_glock *gl) +static inline void gfs2_update_reply_times(struct gfs2_glock *gl, + bool blocking) { struct gfs2_pcpu_lkstats *lks; const unsigned gltype = gl->gl_name.ln_type; - unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ? - GFS2_LKS_SRTTB : GFS2_LKS_SRTT; + unsigned index = blocking ? GFS2_LKS_SRTTB : GFS2_LKS_SRTT; s64 rtt; preempt_disable(); @@ -119,9 +117,18 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; - unsigned ret = gl->gl_state; + bool blocking; + unsigned ret; + + blocking = test_bit(GLF_BLOCKING, &gl->gl_flags); + gfs2_update_reply_times(gl, blocking); + clear_bit(GLF_BLOCKING, &gl->gl_flags); + + /* If the glock is dead, we only react to a dlm_unlock() reply. */ + if (__lockref_is_dead(&gl->gl_lockref) && + gl->gl_lksb.sb_status != -DLM_EUNLOCK) + return; - gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) @@ -129,18 +136,19 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ - if (gl->gl_ops->go_free) - gl->gl_ops->go_free(gl); gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ - ret |= LM_OUT_CANCELED; + ret = LM_OUT_CANCELED; goto out; case -EAGAIN: /* Try lock fails */ + ret = LM_OUT_TRY_AGAIN; + goto out; case -EDEADLK: /* Deadlock detected */ + ret = LM_OUT_DEADLOCK; goto out; case -ETIMEDOUT: /* Canceled due to timeout */ - ret |= LM_OUT_ERROR; + ret = LM_OUT_ERROR; goto out; case 0: /* Success */ break; @@ -149,20 +157,22 @@ static void gdlm_ast(void *arg) } ret = gl->gl_req; - if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) { - if (gl->gl_req == LM_ST_SHARED) - ret = LM_ST_DEFERRED; - else if (gl->gl_req == LM_ST_DEFERRED) - ret = LM_ST_SHARED; - else - BUG(); - } - set_bit(GLF_INITIAL, &gl->gl_flags); + /* + * The GLF_INITIAL flag is initially set for new glocks. Upon the + * first successful new (non-conversion) request, we clear this flag to + * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the + * identifier to use for identifying it. + * + * Any failed initial requests do not create a DLM lock, so we ignore + * the gl->gl_lksb.sb_lkid values that come with such requests. + */ + + clear_bit(GLF_INITIAL, &gl->gl_flags); gfs2_glock_complete(gl, ret); return; out: - if (!test_bit(GLF_INITIAL, &gl->gl_flags)) + if (test_bit(GLF_INITIAL, &gl->gl_flags)) gl->gl_lksb.sb_lkid = 0; gfs2_glock_complete(gl, ret); } @@ -171,6 +181,9 @@ static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg; + if (__lockref_is_dead(&gl->gl_lockref)) + return; + switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); @@ -206,8 +219,21 @@ static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate) return -1; } +/* Taken from fs/dlm/lock.c. */ + +static bool middle_conversion(int cur, int req) +{ + return (cur == DLM_LOCK_PR && req == DLM_LOCK_CW) || + (cur == DLM_LOCK_CW && req == DLM_LOCK_PR); +} + +static bool down_conversion(int cur, int req) +{ + return !middle_conversion(cur, req) && req < cur; +} + static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, - const int req) + const int req, bool blocking) { u32 lkf = 0; @@ -222,23 +248,16 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, lkf |= DLM_LKF_NOQUEUEBAST; } - if (gfs_flags & LM_FLAG_PRIORITY) { - lkf |= DLM_LKF_NOORDER; - lkf |= DLM_LKF_HEADQUE; - } - - if (gfs_flags & LM_FLAG_ANY) { - if (req == DLM_LOCK_PR) - lkf |= DLM_LKF_ALTCW; - else if (req == DLM_LOCK_CW) - lkf |= DLM_LKF_ALTPR; - else - BUG(); - } - - if (gl->gl_lksb.sb_lkid != 0) { + if (!test_bit(GLF_INITIAL, &gl->gl_flags)) { lkf |= DLM_LKF_CONVERT; - if (test_bit(GLF_BLOCKING, &gl->gl_flags)) + + /* + * The DLM_LKF_QUECVT flag needs to be set for "first come, + * first served" semantics, but it must only be set for + * "upward" lock conversions or else DLM will reject the + * request as invalid. + */ + if (blocking) lkf |= DLM_LKF_QUECVT; } @@ -258,31 +277,43 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - int req; + bool blocking; + int cur, req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; int error; + gl->gl_req = req_state; + cur = make_mode(gl->gl_name.ln_sbd, gl->gl_state); req = make_mode(gl->gl_name.ln_sbd, req_state); - lkf = make_flags(gl, flags, req); + blocking = !down_conversion(cur, req) && + !(flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)); + lkf = make_flags(gl, flags, req, blocking); + if (blocking) + set_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); - if (gl->gl_lksb.sb_lkid) { - gfs2_update_request_times(gl); - } else { + if (test_bit(GLF_INITIAL, &gl->gl_flags)) { memset(strname, ' ', GDLM_STRNAME_BYTES - 1); strname[GDLM_STRNAME_BYTES - 1] = '\0'; gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type); gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number); gl->gl_dstamp = ktime_get_real(); + } else { + gfs2_update_request_times(gl); } /* * Submit the actual lock request. */ again: - error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, - GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, + GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + } + up_read(&ls->ls_sem); if (error == -EBUSY) { msleep(20); goto again; @@ -294,57 +325,75 @@ static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + uint32_t flags = 0; int error; - if (gl->gl_lksb.sb_lkid == 0) { + BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); + + if (test_bit(GLF_INITIAL, &gl->gl_flags)) { gfs2_glock_free(gl); return; } - clear_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); - /* don't want to call dlm if we've unmounted the lock protocol */ - if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { - gfs2_glock_free(gl); - return; - } - /* don't want to skip dlm_unlock writing the lvb when lock has one */ + /* + * When the lockspace is released, all remaining glocks will be + * unlocked automatically. This is more efficient than unlocking them + * individually, but when the lock is held in DLM_LOCK_EX or + * DLM_LOCK_PW mode, the lock value block (LVB) would be lost. + */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !gl->gl_lksb.sb_lvbptr) { - gfs2_glock_free(gl); + (!gl->gl_lksb.sb_lvbptr || gl->gl_state != LM_ST_EXCLUSIVE)) { + gfs2_glock_free_later(gl); return; } + if (gl->gl_lksb.sb_lvbptr) + flags |= DLM_LKF_VALBLK; + again: - error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, - NULL, gl); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags, + NULL, gl); + } + up_read(&ls->ls_sem); if (error == -EBUSY) { msleep(20); goto again; } + if (error == -ENODEV) { + gfs2_glock_free(gl); + return; + } + if (error) { fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); - return; } } static void gdlm_cancel(struct gfs2_glock *gl) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); + + down_read(&ls->ls_sem); + if (likely(ls->ls_dlm != NULL)) { + dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); + } + up_read(&ls->ls_sem); } /* * dlm/gfs2 recovery coordination using dlm_recover callbacks * - * 0. gfs2 checks for another cluster node withdraw, needing journal replay * 1. dlm_controld sees lockspace members change * 2. dlm_controld blocks dlm-kernel locking activity * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) @@ -519,7 +568,11 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x error %d\n", name, lksb->sb_lkid, error); @@ -546,9 +599,14 @@ static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, memset(strname, 0, GDLM_STRNAME_BYTES); snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); - error = dlm_lock(ls->ls_dlm, mode, lksb, flags, - strname, GDLM_STRNAME_BYTES - 1, - 0, sync_wait_cb, ls, NULL); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_lock(ls->ls_dlm, mode, lksb, flags, + strname, GDLM_STRNAME_BYTES - 1, + 0, sync_wait_cb, ls, NULL); + } + up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", name, lksb->sb_lkid, flags, mode, error); @@ -593,28 +651,6 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) &ls->ls_control_lksb, "control_lock"); } -/** - * remote_withdraw - react to a node withdrawing from the file system - * @sdp: The superblock - */ -static void remote_withdraw(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd; - int ret = 0, count = 0; - - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) - continue; - ret = gfs2_recover_journal(jd, true); - if (ret) - break; - count++; - } - - /* Now drop the additional reference we acquired */ - fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret); -} - static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); @@ -625,13 +661,6 @@ static void gfs2_control_func(struct work_struct *work) int recover_size; int i, error; - /* First check for other nodes that may have done a withdraw. */ - if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) { - remote_withdraw(sdp); - clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); - return; - } - spin_lock(&ls->ls_recover_spin); /* * No MOUNT_DONE means we're still mounting; control_mount() @@ -955,14 +984,15 @@ locks_done: if (sdp->sd_args.ar_spectator) { fs_info(sdp, "Recovery is required. Waiting for a " "non-spectator to mount.\n"); + spin_unlock(&ls->ls_recover_spin); msleep_interruptible(1000); } else { fs_info(sdp, "control_mount wait1 block %u start %u " "mount %u lvb %u flags %lx\n", block_gen, start_gen, mount_gen, lvb_gen, ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); } - spin_unlock(&ls->ls_recover_spin); goto restart; } @@ -1274,6 +1304,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) */ INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); + ls->ls_dlm = NULL; spin_lock_init(&ls->ls_recover_spin); ls->ls_recover_flags = 0; ls->ls_recover_mount = 0; @@ -1308,6 +1339,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) * create/join lockspace */ + init_rwsem(&ls->ls_sem); error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, &gdlm_lockspace_ops, sdp, &ops_result, &ls->ls_dlm); @@ -1351,7 +1383,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) return 0; fail_release: - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); fail_free: free_recover_size(ls); fail: @@ -1371,7 +1403,15 @@ static void gdlm_first_done(struct gfs2_sbd *sdp) fs_err(sdp, "mount first_done error %d\n", error); } -static void gdlm_unmount(struct gfs2_sbd *sdp) +/* + * gdlm_unmount - release our lockspace + * @sdp: the superblock + * @clean: Indicates whether or not the remaining nodes in the cluster should + * perform recovery. Recovery is necessary when a node withdraws and + * its journal remains dirty. Recovery isn't necessary when a node + * cleanly unmounts a filesystem. + */ +static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -1387,10 +1427,14 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) /* mounted_lock and control_lock will be purged in dlm recovery */ release: + down_write(&ls->ls_sem); if (ls->ls_dlm) { - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, + clean ? DLM_RELEASE_NORMAL : + DLM_RELEASE_RECOVER); ls->ls_dlm = NULL; } + up_write(&ls->ls_sem); free_recover_size(ls); } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 61323deb80bc..8312cd2cdae4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -31,6 +31,7 @@ #include "dir.h" #include "trace_gfs2.h" #include "trans.h" +#include "aops.h" static void gfs2_log_shutdown(struct gfs2_sbd *sdp); @@ -80,15 +81,6 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) brelse(bd->bd_bh); } -static int __gfs2_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret = mapping->a_ops->writepage(page, wbc); - mapping_set_error(mapping, ret); - return ret; -} - /** * gfs2_ail1_start_one - Start I/O on a transaction * @sdp: The superblock @@ -120,10 +112,8 @@ __acquires(&sdp->sd_ail_lock) &tr->tr_ail2_list); continue; } - if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { + if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) gfs2_io_error_bh(sdp, bh); - gfs2_withdraw_delayed(sdp); - } } if (gfs2_withdrawn(sdp)) { @@ -136,11 +126,15 @@ __acquires(&sdp->sd_ail_lock) continue; gl = bd->bd_gl; list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list); - mapping = bh->b_page->mapping; + mapping = bh->b_folio->mapping; if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping); + BUG_ON(GFS2_SB(mapping->host) != sdp); + if (gfs2_is_jdata(GFS2_I(mapping->host))) + ret = gfs2_jdata_writeback(mapping, wbc); + else + ret = mapping->a_ops->writepages(mapping, wbc); if (need_resched()) { blk_finish_plug(plug); cond_resched(); @@ -149,6 +143,7 @@ __acquires(&sdp->sd_ail_lock) spin_lock(&sdp->sd_ail_lock); if (ret == -ENODATA) /* if a jdata write into a new hole */ ret = 0; /* ignore it */ + mapping_set_error(mapping, ret); if (ret || wbc->nr_to_write <= 0) break; return -EBUSY; @@ -327,10 +322,8 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, continue; } if (!buffer_uptodate(bh) && - !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { + !cmpxchg(&sdp->sd_log_error, 0, -EIO)) gfs2_io_error_bh(sdp, bh); - gfs2_withdraw_delayed(sdp); - } /* * If we have space for revokes and the bd is no longer on any * buf list, we can just add a revoke for it immediately and @@ -352,14 +345,15 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, * @sdp: The superblock * @max_revokes: If non-zero, add revokes where appropriate * - * Tries to empty the ail1 lists, starting with the oldest first + * Tries to empty the ail1 lists, starting with the oldest first. + * Returns %true if the ail1 list is now empty. */ -static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) +static bool gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) { struct gfs2_trans *tr, *s; int oldest_tr = 1; - int ret; + bool empty; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { @@ -369,15 +363,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) oldest_tr = 0; } gfs2_log_update_flush_tail(sdp); - ret = list_empty(&sdp->sd_ail1_list); + empty = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); - if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) { - gfs2_lm(sdp, "fatal: I/O error(s)\n"); - gfs2_withdraw(sdp); - } - - return ret; + return empty; } static void gfs2_ail1_wait(struct gfs2_sbd *sdp) @@ -790,7 +779,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) { if (atomic_dec_return(&gl->gl_revokes) == 0) { clear_bit(GLF_LFLUSH, &gl->gl_flags); - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); } } @@ -914,9 +903,9 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, static void log_write_header(struct gfs2_sbd *sdp, u32 flags) { blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); + struct super_block *sb = sdp->sd_vfs; - gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); + gfs2_assert_withdraw(sdp, sb->s_writers.frozen != SB_FREEZE_COMPLETE); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); @@ -975,8 +964,9 @@ void gfs2_ail_drain(struct gfs2_sbd *sdp) static void empty_ail1_list(struct gfs2_sbd *sdp) { unsigned long start = jiffies; + bool empty = false; - for (;;) { + while (!empty) { if (time_after(jiffies, start + (HZ * 600))) { fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n", __func__, current->journal_info ? 1 : 0); @@ -985,8 +975,10 @@ static void empty_ail1_list(struct gfs2_sbd *sdp) } gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp, 0)) - return; + empty = gfs2_ail1_empty(sdp, 0); + + if (gfs2_withdrawn(sdp)) + break; } } @@ -1036,7 +1028,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) { struct gfs2_trans *tr = NULL; unsigned int reserved_blocks = 0, used_blocks = 0; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); + bool frozen = test_bit(SDF_FROZEN, &sdp->sd_flags); unsigned int first_log_head; unsigned int reserved_revokes = 0; @@ -1048,7 +1040,8 @@ repeat: * Do this check while holding the log_flush_lock to prevent new * buffers from being added to the ail via gfs2_pin() */ - if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + if (gfs2_withdrawn(sdp) || + !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) goto out; /* Log might have been flushed while we waited for the flush lock */ @@ -1067,8 +1060,8 @@ repeat: if (tr) { sdp->sd_log_tr = NULL; tr->tr_first = first_log_head; - if (unlikely (state == SFS_FROZEN)) { - if (gfs2_assert_withdraw_delayed(sdp, + if (unlikely(frozen)) { + if (gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) goto out_withdraw; } @@ -1092,8 +1085,8 @@ repeat: if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (unlikely(state == SFS_FROZEN)) - if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) + if (unlikely(frozen)) + if (gfs2_assert_withdraw(sdp, !reserved_revokes)) goto out_withdraw; gfs2_ordered_write(sdp); @@ -1102,7 +1095,8 @@ repeat: lops_before_commit(sdp, tr); if (gfs2_withdrawn(sdp)) goto out_withdraw; - gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE); + if (sdp->sd_jdesc) + gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE); if (gfs2_withdrawn(sdp)) goto out_withdraw; @@ -1136,8 +1130,6 @@ repeat: if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LOG_HEAD_FLUSH_FREEZE)) gfs2_log_shutdown(sdp); - if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE) - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } out_end: @@ -1149,13 +1141,11 @@ out_end: reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs; out: if (used_blocks != reserved_blocks) { - gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks); + gfs2_assert_withdraw(sdp, used_blocks < reserved_blocks); gfs2_log_release(sdp, reserved_blocks - used_blocks); } up_write(&sdp->sd_log_flush_lock); gfs2_trans_free(sdp, tr); - if (gfs2_withdrawing(sdp)) - gfs2_withdraw(sdp); trace_gfs2_log_flush(sdp, 0, flags); return; @@ -1230,6 +1220,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_log_unlock(sdp); } +static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) +{ + return atomic_read(&sdp->sd_log_pinned) + + atomic_read(&sdp->sd_log_blks_needed) >= + atomic_read(&sdp->sd_log_thresh1); +} + +static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) +{ + return sdp->sd_jdesc->jd_blocks - + atomic_read(&sdp->sd_log_blks_free) + + atomic_read(&sdp->sd_log_blks_needed) >= + atomic_read(&sdp->sd_log_thresh2); +} + /** * gfs2_log_commit - Commit a transaction to the log * @sdp: the filesystem @@ -1249,9 +1254,7 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { log_refund(sdp, tr); - if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) || - ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) > - atomic_read(&sdp->sd_log_thresh2))) + if (gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp)) wake_up(&sdp->sd_logd_waitq); } @@ -1274,24 +1277,6 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); } -static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) -{ - return (atomic_read(&sdp->sd_log_pinned) + - atomic_read(&sdp->sd_log_blks_needed) >= - atomic_read(&sdp->sd_log_thresh1)); -} - -static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) -{ - unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free); - - if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags)) - return 1; - - return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >= - atomic_read(&sdp->sd_log_thresh2); -} - /** * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks * @data: Pointer to GFS2 superblock @@ -1304,24 +1289,11 @@ int gfs2_logd(void *data) { struct gfs2_sbd *sdp = data; unsigned long t = 1; - DEFINE_WAIT(wait); + set_freezable(); while (!kthread_should_stop()) { - - if (gfs2_withdrawn(sdp)) { - msleep_interruptible(HZ); - continue; - } - /* Check for errors writing to the journal */ - if (sdp->sd_log_error) { - gfs2_lm(sdp, - "GFS2: fsid=%s: error %d: " - "withdrawing the file system to " - "prevent further damage.\n", - sdp->sd_fsname, sdp->sd_log_error); - gfs2_withdraw(sdp); - continue; - } + if (gfs2_withdrawn(sdp)) + break; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp, 0); @@ -1329,7 +1301,9 @@ int gfs2_logd(void *data) GFS2_LFC_LOGD_JFLUSH_REQD); } - if (gfs2_ail_flush_reqd(sdp)) { + if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || + gfs2_ail_flush_reqd(sdp)) { + clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags); gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp, 0); @@ -1339,19 +1313,13 @@ int gfs2_logd(void *data) t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; - try_to_freeze(); - - do { - prepare_to_wait(&sdp->sd_logd_waitq, &wait, - TASK_INTERRUPTIBLE); - if (!gfs2_ail_flush_reqd(sdp) && - !gfs2_jrnl_flush_reqd(sdp) && - !kthread_should_stop()) - t = schedule_timeout(t); - } while(t && !gfs2_ail_flush_reqd(sdp) && - !gfs2_jrnl_flush_reqd(sdp) && - !kthread_should_stop()); - finish_wait(&sdp->sd_logd_waitq, &wait); + t = wait_event_freezable_timeout(sdp->sd_logd_waitq, + test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || + gfs2_ail_flush_reqd(sdp) || + gfs2_jrnl_flush_reqd(sdp) || + gfs2_withdrawn(sdp) || + kthread_should_stop(), + t); } return 0; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 653cffcbf869..fc30ebdad83a 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -44,17 +44,6 @@ __releases(&sdp->sd_log_lock) spin_unlock(&sdp->sd_log_lock); } -static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, - unsigned int value) -{ - if (++value == sdp->sd_jdesc->jd_blocks) { - value = 0; - } - sdp->sd_log_tail = value; - sdp->sd_log_flush_tail = value; - sdp->sd_log_head = value; -} - static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -70,29 +59,29 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) } } -extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); -extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); -extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); -extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp); -extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); -extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); -extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - unsigned int *extra_revokes); -extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - unsigned int *extra_revokes); -extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, - u64 seq, u32 tail, u32 lblock, u32 flags, - blk_opf_t op_flags); -extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, - u32 type); -extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); -extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); -extern void log_flush_wait(struct gfs2_sbd *sdp); +void gfs2_ordered_del_inode(struct gfs2_inode *ip); +unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); +void gfs2_remove_from_ail(struct gfs2_bufdata *bd); +bool gfs2_log_is_empty(struct gfs2_sbd *sdp); +void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); +void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); +bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); +void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); +void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + u64 seq, u32 tail, u32 lblock, u32 flags, + blk_opf_t op_flags); +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + u32 type); +void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); +void log_flush_wait(struct gfs2_sbd *sdp); -extern int gfs2_logd(void *data); -extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); -extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); -extern void gfs2_flush_revokes(struct gfs2_sbd *sdp); -extern void gfs2_ail_drain(struct gfs2_sbd *sdp); +int gfs2_logd(void *data); +void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +void gfs2_glock_remove_revoke(struct gfs2_glock *gl); +void gfs2_flush_revokes(struct gfs2_sbd *sdp); +void gfs2_ail_drain(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 1902413d5d12..97ebe457c00a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -49,7 +49,7 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (test_set_buffer_pinned(bh)) gfs2_assert_withdraw(sdp, 0); if (!buffer_uptodate(bh)) - gfs2_io_error_bh_wd(sdp, bh); + gfs2_io_error_bh(sdp, bh); bd = bh->b_private; /* If this buffer is in the AIL and it has already been written * to in-place disk block, remove it from the AIL. @@ -157,7 +157,9 @@ u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock) /** * gfs2_end_log_write_bh - end log write of pagecache data with buffers * @sdp: The superblock - * @bvec: The bio_vec + * @folio: The folio + * @offset: The first byte within the folio that completed + * @size: The number of bytes that completed * @error: The i/o status * * This finds the relevant buffers and unlocks them and sets the @@ -166,17 +168,13 @@ u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock) * that is pinned in the pagecache. */ -static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, - struct bio_vec *bvec, - blk_status_t error) +static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct folio *folio, + size_t offset, size_t size, blk_status_t error) { struct buffer_head *bh, *next; - struct page *page = bvec->bv_page; - unsigned size; - bh = page_buffers(page); - size = bvec->bv_len; - while (bh_offset(bh) < bvec->bv_offset) + bh = folio_buffers(folio); + while (bh_offset(bh) < offset) bh = bh->b_this_page; do { if (error) @@ -186,7 +184,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, size -= bh->b_size; brelse(bh); bh = next; - } while(bh && size); + } while (bh && size); } /** @@ -203,23 +201,24 @@ static void gfs2_end_log_write(struct bio *bio) { struct gfs2_sbd *sdp = bio->bi_private; struct bio_vec *bvec; - struct page *page; struct bvec_iter_all iter_all; if (bio->bi_status) { - if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status)) + int err = blk_status_to_errno(bio->bi_status); + + if (!cmpxchg(&sdp->sd_log_error, 0, err)) fs_err(sdp, "Error %d writing to journal, jid=%u\n", - bio->bi_status, sdp->sd_jdesc->jd_jid); - gfs2_withdraw_delayed(sdp); - /* prevent more writes to the journal */ - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - wake_up(&sdp->sd_logd_waitq); + err, sdp->sd_jdesc->jd_jid); + gfs2_withdraw(sdp); } bio_for_each_segment_all(bvec, bio, iter_all) { - page = bvec->bv_page; - if (page_has_buffers(page)) - gfs2_end_log_write_bh(sdp, bvec, bio->bi_status); + struct page *page = bvec->bv_page; + struct folio *folio = page_folio(page); + + if (folio && folio_buffers(folio)) + gfs2_end_log_write_bh(sdp, folio, bvec->bv_offset, + bvec->bv_len, bio->bi_status); else mempool_free(page, gfs2_page_pool); } @@ -359,8 +358,8 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh) dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head); gfs2_log_incr_head(sdp); - gfs2_log_write(sdp, sdp->sd_jdesc, bh->b_page, bh->b_size, - bh_offset(bh), dblock); + gfs2_log_write(sdp, sdp->sd_jdesc, folio_page(bh->b_folio, 0), + bh->b_size, bh_offset(bh), dblock); } /** @@ -391,46 +390,40 @@ static void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page) * Simply unlock the pages in the bio. The main thread will wait on them and * process them in order as necessary. */ - static void gfs2_end_log_read(struct bio *bio) { - struct page *page; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + int error = blk_status_to_errno(bio->bi_status); + struct folio_iter fi; - bio_for_each_segment_all(bvec, bio, iter_all) { - page = bvec->bv_page; - if (bio->bi_status) { - int err = blk_status_to_errno(bio->bi_status); - - SetPageError(page); - mapping_set_error(page->mapping, err); - } - unlock_page(page); + bio_for_each_folio_all(fi, bio) { + /* We're abusing wb_err to get the error to gfs2_find_jhead */ + filemap_set_wb_err(fi.folio->mapping, error); + folio_end_read(fi.folio, !error); } bio_put(bio); } /** - * gfs2_jhead_pg_srch - Look for the journal head in a given page. + * gfs2_jhead_folio_search - Look for the journal head in a given page. * @jd: The journal descriptor * @head: The journal head to start from - * @page: The page to look in + * @folio: The folio to look in * * Returns: 1 if found, 0 otherwise. */ - -static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, - struct page *page) +static bool gfs2_jhead_folio_search(struct gfs2_jdesc *jd, + struct gfs2_log_header_host *head, + struct folio *folio) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host lh; - void *kaddr = kmap_atomic(page); + void *kaddr; unsigned int offset; bool ret = false; + VM_BUG_ON_FOLIO(folio_test_large(folio), folio); + kaddr = kmap_local_folio(folio, 0); for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { if (lh.lh_sequence >= head->lh_sequence) @@ -441,7 +434,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, } } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); return ret; } @@ -455,7 +448,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, * Find the folio with 'index' in the journal's mapping. Search the folio for * the journal head if requested (cleanup == false). Release refs on the * folio so the page cache can reclaim it. We grabbed a - * reference on this folio twice, first when we did a find_or_create_page() + * reference on this folio twice, first when we did a filemap_grab_folio() * to obtain the folio to add it to the bio and second when we do a * filemap_get_folio() here to get the folio to wait on while I/O on it is being * completed. @@ -474,13 +467,13 @@ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index, folio = filemap_get_folio(jd->jd_inode->i_mapping, index); folio_wait_locked(folio); - if (folio_test_error(folio)) + if (!folio_test_uptodate(folio)) *done = true; if (!*done) - *done = gfs2_jhead_pg_srch(jd, head, &folio->page); + *done = gfs2_jhead_folio_search(jd, head, folio); - /* filemap_get_folio() and the earlier find_or_create_page() */ + /* filemap_get_folio() and the earlier filemap_grab_folio() */ folio_put_refs(folio, 2); } @@ -491,7 +484,7 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO); bio_clone_blkg_association(new, prev); new->bi_iter.bi_sector = bio_end_sector(prev); - bio_chain(new, prev); + bio_chain(prev, new); submit_bio(prev); return new; } @@ -500,15 +493,13 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) * gfs2_find_jhead - find the head of a log * @jd: The journal descriptor * @head: The log descriptor for the head of the log is returned here - * @keep_cache: If set inode pages will not be truncated * * Do a search of a journal by reading it in large chunks using bios and find * the valid log entry with the highest sequence number. (i.e. the log head) * * Returns: 0 on success, errno otherwise */ -int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, - bool keep_cache) +int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct address_space *mapping = jd->jd_inode->i_mapping; @@ -518,9 +509,9 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, unsigned int shift = PAGE_SHIFT - bsize_shift; unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift; struct gfs2_journal_extent *je; - int sz, ret = 0; + int ret = 0; struct bio *bio = NULL; - struct page *page = NULL; + struct folio *folio = NULL; bool done = false; errseq_t since; @@ -533,11 +524,11 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, u64 dblock = je->dblock; for (; block < je->lblock + je->blocks; block++, dblock++) { - if (!page) { - page = find_or_create_page(mapping, - block >> shift, GFP_NOFS); - if (!page) { - ret = -ENOMEM; + if (!folio) { + folio = filemap_grab_folio(mapping, + block >> shift); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); done = true; goto out; } @@ -548,8 +539,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, sector_t sector = dblock << sdp->sd_fsb2bb_shift; if (bio_end_sector(bio) == sector) { - sz = bio_add_page(bio, page, bsize, off); - if (sz == bsize) + if (bio_add_folio(bio, folio, bsize, off)) goto block_added; } if (off) { @@ -569,12 +559,11 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read); bio->bi_opf = REQ_OP_READ; add_block_to_new_bio: - sz = bio_add_page(bio, page, bsize, off); - BUG_ON(sz != bsize); + bio_add_folio_nofail(bio, folio, bsize, off); block_added: off += bsize; - if (off == PAGE_SIZE) - page = NULL; + if (off == folio_size(folio)) + folio = NULL; if (blocks_submitted <= blocks_read + max_blocks) { /* Keep at least one bio in flight */ continue; @@ -598,8 +587,7 @@ out: if (!ret) ret = filemap_check_wb_err(mapping, since); - if (!keep_cache) - truncate_inode_pages(mapping, 0); + truncate_inode_pages(mapping, 0); return ret; } @@ -622,15 +610,13 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, static void gfs2_check_magic(struct buffer_head *bh) { - void *kaddr; __be32 *ptr; clear_buffer_escaped(bh); - kaddr = kmap_atomic(bh->b_page); - ptr = kaddr + bh_offset(bh); + ptr = kmap_local_folio(bh->b_folio, bh_offset(bh)); if (*ptr == cpu_to_be32(GFS2_MAGIC)) set_buffer_escaped(bh); - kunmap_atomic(kaddr); + kunmap_local(ptr); } static int blocknr_cmp(void *priv, const struct list_head *a, @@ -696,14 +682,12 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, lock_buffer(bd2->bd_bh); if (buffer_escaped(bd2->bd_bh)) { - void *kaddr; + void *p; + page = mempool_alloc(gfs2_page_pool, GFP_NOIO); - ptr = page_address(page); - kaddr = kmap_atomic(bd2->bd_bh->b_page); - memcpy(ptr, kaddr + bh_offset(bd2->bd_bh), - bd2->bd_bh->b_size); - kunmap_atomic(kaddr); - *(__be32 *)ptr = 0; + p = page_address(page); + memcpy_from_page(p, page, bh_offset(bd2->bd_bh), bd2->bd_bh->b_size); + *(__be32 *)p = 0; clear_buffer_escaped(bd2->bd_bh); unlock_buffer(bd2->bd_bh); brelse(bd2->bd_bh); diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 1412ffba1d44..be740bf33666 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -11,16 +11,18 @@ #include "incore.h" extern const struct gfs2_log_operations *gfs2_log_ops[]; -extern void gfs2_log_incr_head(struct gfs2_sbd *sdp); -extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); -extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, - struct page *page, unsigned size, unsigned offset, - u64 blkno); -extern void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf); -extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); -extern int gfs2_find_jhead(struct gfs2_jdesc *jd, - struct gfs2_log_header_host *head, bool keep_cache); -extern void gfs2_drain_revokes(struct gfs2_sbd *sdp); + +void gfs2_log_incr_head(struct gfs2_sbd *sdp); +u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); +void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + struct page *page, unsigned size, unsigned offset, + u64 blkno); +void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf); +void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); +int gfs2_find_jhead(struct gfs2_jdesc *jd, + struct gfs2_log_header_host *head); +void gfs2_drain_revokes(struct gfs2_sbd *sdp); + static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { return sdp->sd_ldptrs; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index afcb32854f14..9d65719353fa 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; - spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); @@ -111,7 +110,6 @@ static int __init init_gfs2_fs(void) gfs2_inode_cachep = kmem_cache_create("gfs2_inode", sizeof(struct gfs2_inode), 0, SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD| SLAB_ACCOUNT, gfs2_init_inode_once); if (!gfs2_inode_cachep) @@ -147,14 +145,15 @@ static int __init init_gfs2_fs(void) if (!gfs2_trans_cachep) goto fail_cachep8; - error = register_shrinker(&gfs2_qd_shrinker, "gfs2-qd"); + error = gfs2_qd_shrinker_init(); if (error) goto fail_shrinker; error = -ENOMEM; - gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); - if (!gfs_recovery_wq) + gfs2_recovery_wq = alloc_workqueue("gfs2_recovery", + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, + 0); + if (!gfs2_recovery_wq) goto fail_wq1; gfs2_control_wq = alloc_workqueue("gfs2_control", @@ -162,7 +161,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_control_wq) goto fail_wq2; - gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0); + gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", WQ_PERCPU, 0); if (!gfs2_freeze_wq) goto fail_wq3; @@ -194,9 +193,9 @@ fail_mempool: fail_wq3: destroy_workqueue(gfs2_control_wq); fail_wq2: - destroy_workqueue(gfs_recovery_wq); + destroy_workqueue(gfs2_recovery_wq); fail_wq1: - unregister_shrinker(&gfs2_qd_shrinker); + gfs2_qd_shrinker_exit(); fail_shrinker: kmem_cache_destroy(gfs2_trans_cachep); fail_cachep8: @@ -229,12 +228,12 @@ fail_lru: static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&gfs2_qd_shrinker); + gfs2_qd_shrinker_exit(); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); - destroy_workqueue(gfs_recovery_wq); + destroy_workqueue(gfs2_recovery_wq); destroy_workqueue(gfs2_control_wq); destroy_workqueue(gfs2_freeze_wq); list_lru_destroy(&gfs2_qd_lru); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3c41b864ee5b..e4356198d8d8 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -30,16 +30,16 @@ #include "util.h" #include "trace_gfs2.h" -static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) +static void gfs2_aspace_write_folio(struct folio *folio, + struct writeback_control *wbc) { struct buffer_head *bh, *head; int nr_underway = 0; blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); - BUG_ON(!PageLocked(page)); - BUG_ON(!page_has_buffers(page)); + BUG_ON(!folio_test_locked(folio)); - head = page_buffers(page); + head = folio_buffers(folio); bh = head; do { @@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb if (wbc->sync_mode != WB_SYNC_NONE) { lock_buffer(bh); } else if (!trylock_buffer(bh)) { - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); continue; } if (test_clear_buffer_dirty(bh)) { @@ -66,11 +66,11 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb } while ((bh = bh->b_this_page) != head); /* - * The page and its buffers are protected by PageWriteback(), so we can - * drop the bh refcounts early. + * The folio and its buffers are protected from truncation by + * the writeback flag, so we can drop the bh refcounts early. */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); + BUG_ON(folio_test_writeback(folio)); + folio_start_writeback(folio); do { struct buffer_head *next = bh->b_this_page; @@ -80,26 +80,38 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb } bh = next; } while (bh != head); - unlock_page(page); + folio_unlock(folio); if (nr_underway == 0) - end_page_writeback(page); + folio_end_writeback(folio); +} - return 0; +static int gfs2_aspace_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct folio *folio = NULL; + int error; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + gfs2_aspace_write_folio(folio, wbc); + + return error; } const struct address_space_operations gfs2_meta_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, - .writepage = gfs2_aspace_writepage, + .writepages = gfs2_aspace_writepages, .release_folio = gfs2_release_folio, + .migrate_folio = buffer_migrate_folio_norefs, }; const struct address_space_operations gfs2_rgrp_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, - .writepage = gfs2_aspace_writepage, + .writepages = gfs2_aspace_writepages, .release_folio = gfs2_release_folio, + .migrate_folio = buffer_migrate_folio_norefs, }; /** @@ -115,50 +127,45 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) { struct address_space *mapping = gfs2_glock2aspace(gl); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct page *page; + struct folio *folio; struct buffer_head *bh; unsigned int shift; unsigned long index; unsigned int bufnum; if (mapping == NULL) - mapping = &sdp->sd_aspace; + mapping = gfs2_aspace(sdp); shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; index = blkno >> shift; /* convert block to page */ bufnum = blkno - (index << shift); /* block buf index within page */ if (create) { - for (;;) { - page = grab_cache_page(mapping, index); - if (page) - break; - yield(); - } - if (!page_has_buffers(page)) - create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); + folio = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping) | __GFP_NOFAIL); + bh = folio_buffers(folio); + if (!bh) + bh = create_empty_buffers(folio, + sdp->sd_sb.sb_bsize, 0); } else { - page = find_get_page_flags(mapping, index, - FGP_LOCK|FGP_ACCESSED); - if (!page) + folio = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED, 0); + if (IS_ERR(folio)) return NULL; - if (!page_has_buffers(page)) { - bh = NULL; - goto out_unlock; - } + bh = folio_buffers(folio); } - /* Locate header for our buffer within our page */ - for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) - /* Do nothing */; - get_bh(bh); + if (!bh) + goto out_unlock; + bh = get_nth_bh(bh, bufnum); if (!buffer_mapped(bh)) map_bh(bh, sdp->sd_vfs, blkno); out_unlock: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return bh; } @@ -193,15 +200,14 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) static void gfs2_meta_read_endio(struct bio *bio) { - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - struct buffer_head *bh = page_buffers(page); - unsigned int len = bvec->bv_len; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; + struct buffer_head *bh = folio_buffers(folio); + size_t len = fi.length; - while (bh_offset(bh) < bvec->bv_offset) + while (bh_offset(bh) < fi.offset) bh = bh->b_this_page; do { struct buffer_head *next = bh->b_this_page; @@ -224,10 +230,10 @@ static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) struct bio *bio; bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO); - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT); while (num > 0) { bh = *bhs; - if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) { + if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) { BUG_ON(bio->bi_iter.bi_size == 0); break; } @@ -257,7 +263,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) { + if (gfs2_withdrawn(sdp)) { *bhp = NULL; return -EIO; } @@ -296,7 +302,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, if (unlikely(!buffer_uptodate(bh))) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh_wd(sdp, bh); + gfs2_io_error_bh(sdp, bh); brelse(bh); *bhp = NULL; return -EIO; @@ -315,7 +321,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) { - if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; wait_on_buffer(bh); @@ -323,10 +329,10 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh_wd(sdp, bh); + gfs2_io_error_bh(sdp, bh); return -EIO; } - if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; return 0; @@ -334,7 +340,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) void gfs2_remove_from_journal(struct buffer_head *bh, int meta) { - struct address_space *mapping = bh->b_page->mapping; + struct address_space *mapping = bh->b_folio->mapping; struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct gfs2_bufdata *bd = bh->b_private; struct gfs2_trans *tr = current->journal_info; @@ -405,26 +411,20 @@ static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno) { struct address_space *mapping = ip->i_inode.i_mapping; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct page *page; + struct folio *folio; struct buffer_head *bh; unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; unsigned long index = blkno >> shift; /* convert block to page */ unsigned int bufnum = blkno - (index << shift); - page = find_get_page_flags(mapping, index, FGP_LOCK|FGP_ACCESSED); - if (!page) - return NULL; - if (!page_has_buffers(page)) { - unlock_page(page); - put_page(page); + folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED, 0); + if (IS_ERR(folio)) return NULL; - } - /* Locate header for our buffer within our page */ - for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) - /* Do nothing */; - get_bh(bh); - unlock_page(page); - put_page(page); + bh = folio_buffers(folio); + if (bh) + bh = get_nth_bh(bh, bufnum); + folio_unlock(folio); + folio_put(folio); return bh; } @@ -442,11 +442,9 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; int ty; - if (!ip->i_gl) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (!ip->i_gl) return; - } gfs2_ail1_wipe(sdp, bstart, blen); while (blen) { diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index d0a58cdd433a..b7c8a6684d02 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -44,27 +44,25 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) struct gfs2_glock_aspace *gla = container_of(mapping, struct gfs2_glock_aspace, mapping); return gla->glock.gl_name.ln_sbd; - } else if (mapping->a_ops == &gfs2_rgrp_aops) - return container_of(mapping, struct gfs2_sbd, sd_aspace); - else + } else return inode->i_sb->s_fs_info; } -extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); -extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, - int rahead, struct buffer_head **bhp); -extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); -extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, - int create); +struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); +int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, + int rahead, struct buffer_head **bhp); +int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); +struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, + int create); enum { REMOVE_JDATA = 0, REMOVE_META = 1, }; -extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta); -extern void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); -extern int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num, - struct buffer_head **bhp); +void gfs2_remove_from_journal(struct buffer_head *bh, int meta); +void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); +int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num, + struct buffer_head **bhp); static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c0cf1d2d0ef5..e7a88b717991 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -60,19 +60,21 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_new_files_jdata = 0; gt->gt_max_readahead = BIT(18); gt->gt_complain_secs = 10; + gt->gt_withdraw_helper_timeout = 5; } void free_sbd(struct gfs2_sbd *sdp) { - if (sdp->sd_lkstats) - free_percpu(sdp->sd_lkstats); + struct super_block *sb = sdp->sd_vfs; + + free_percpu(sdp->sd_lkstats); + sb->s_fs_info = NULL; kfree(sdp); } static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; - struct address_space *mapping; sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); if (!sdp) @@ -87,11 +89,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) set_bit(SDF_NOJOURNALID, &sdp->sd_flags); gfs2_tune_init(&sdp->sd_tune); - init_waitqueue_head(&sdp->sd_glock_wait); + init_waitqueue_head(&sdp->sd_kill_wait); init_waitqueue_head(&sdp->sd_async_glock_wait); atomic_set(&sdp->sd_glock_disposal, 0); init_completion(&sdp->sd_locking_init); - init_completion(&sdp->sd_wdack); + init_completion(&sdp->sd_withdraw_helper); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -103,23 +105,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_completion(&sdp->sd_journal_ready); INIT_LIST_HEAD(&sdp->sd_quota_list); - mutex_init(&sdp->sd_quota_mutex); mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); spin_lock_init(&sdp->sd_bitmap_lock); INIT_LIST_HEAD(&sdp->sd_sc_inodes_list); - mapping = &sdp->sd_aspace; - - address_space_init_once(mapping); - mapping->a_ops = &gfs2_rgrp_aops; - mapping->host = sb->s_bdev->bd_inode; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->private_data = NULL; - mapping->writeback_index = 0; - spin_lock_init(&sdp->sd_log_lock); atomic_set(&sdp->sd_log_pinned, 0); INIT_LIST_HEAD(&sdp->sd_log_revokes); @@ -135,8 +126,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); mutex_init(&sdp->sd_freeze_mutex); + INIT_LIST_HEAD(&sdp->sd_dead_glocks); return sdp; @@ -173,7 +164,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } - if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE || + if (sb->sb_bsize < SECTOR_SIZE || sb->sb_bsize > PAGE_SIZE || (sb->sb_bsize & (sb->sb_bsize - 1))) { pr_warn("Invalid block size\n"); return -EINVAL; @@ -185,22 +176,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return 0; } -static void end_bio_io_page(struct bio *bio) -{ - struct page *page = bio->bi_private; - - if (!bio->bi_status) - SetPageUptodate(page); - else - pr_warn("error %d reading superblock\n", bio->bi_status); - unlock_page(page); -} - -static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) +static void gfs2_sb_in(struct gfs2_sbd *sdp, const struct gfs2_sb *str) { struct gfs2_sb_host *sb = &sdp->sd_sb; struct super_block *s = sdp->sd_vfs; - const struct gfs2_sb *str = buf; sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); sb->sb_type = be32_to_cpu(str->sb_header.mh_type); @@ -215,7 +194,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); - memcpy(&s->s_uuid, str->sb_uuid, 16); + super_set_uuid(s, str->sb_uuid, 16); } /** @@ -239,36 +218,22 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) { - struct super_block *sb = sdp->sd_vfs; - struct gfs2_sb *p; - struct page *page; - struct bio *bio; + struct gfs2_sb *sb; + int err; - page = alloc_page(GFP_NOFS); - if (unlikely(!page)) + sb = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (unlikely(!sb)) return -ENOMEM; - - ClearPageUptodate(page); - ClearPageDirty(page); - lock_page(page); - - bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS); - bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); - bio_add_page(bio, page, PAGE_SIZE, 0); - - bio->bi_end_io = end_bio_io_page; - bio->bi_private = page; - submit_bio(bio); - wait_on_page_locked(page); - bio_put(bio); - if (!PageUptodate(page)) { - __free_page(page); - return -EIO; - } - p = kmap(page); - gfs2_sb_in(sdp, p); - kunmap(page); - __free_page(page); + err = bdev_rw_virt(sdp->sd_vfs->s_bdev, + sector << (sdp->sd_vfs->s_blocksize_bits - SECTOR_SHIFT), + sb, PAGE_SIZE, REQ_OP_READ | REQ_META); + if (err) { + pr_warn("error %d reading superblock\n", err); + kfree(sb); + return err; + } + gfs2_sb_in(sdp, sb); + kfree(sb); return gfs2_check_sb(sdp, silent); } @@ -293,8 +258,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) return error; } - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - - GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) / sizeof(u64); @@ -407,7 +371,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_nq_num(sdp, GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, + LM_FLAG_RECOVER | GL_NOCACHE | GL_NOPID, mount_gh); if (error) { fs_err(sdp, "can't acquire mount glock: %d\n", error); @@ -417,7 +381,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_nq_num(sdp, GFS2_LIVE_LOCK, &gfs2_nondisk_glops, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, + LM_FLAG_RECOVER | GL_EXACT | GL_NOPID, &sdp->sd_live_gh); if (error) { fs_err(sdp, "can't acquire live glock: %d\n", error); @@ -434,7 +398,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops, CREATE, &sdp->sd_freeze_gl); if (error) { - fs_err(sdp, "can't create transaction glock: %d\n", error); + fs_err(sdp, "can't create freeze glock: %d\n", error); goto fail_rename; } @@ -522,7 +486,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent) sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); goto out; } - sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); + ret = -EINVAL; + if (!sb_set_blocksize(sb, sdp->sd_sb.sb_bsize)) + goto out; /* Get the root inode */ no_addr = sdp->sd_sb.sb_root_dir.no_addr; @@ -577,8 +543,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) mutex_lock(&sdp->sd_jindex_mutex); for (;;) { - struct gfs2_inode *jip; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); if (error) break; @@ -619,8 +583,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) d_mark_dontcache(jd->jd_inode); spin_lock(&sdp->sd_jindex_spin); jd->jd_jid = sdp->sd_journals++; - jip = GFS2_I(jd->jd_inode); - jd->jd_no_addr = jip->i_no_addr; list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); spin_unlock(&sdp->sd_jindex_spin); } @@ -649,7 +611,7 @@ static int init_statfs(struct gfs2_sbd *sdp) struct gfs2_jdesc *jd; struct gfs2_inode *ip; - sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); + sdp->sd_statfs_inode = gfs2_lookup_meta(master, "statfs"); if (IS_ERR(sdp->sd_statfs_inode)) { error = PTR_ERR(sdp->sd_statfs_inode); fs_err(sdp, "can't read in statfs inode: %d\n", error); @@ -658,7 +620,7 @@ static int init_statfs(struct gfs2_sbd *sdp) if (sdp->sd_args.ar_spectator) goto out; - pn = gfs2_lookup_simple(master, "per_node"); + pn = gfs2_lookup_meta(master, "per_node"); if (IS_ERR(pn)) { error = PTR_ERR(pn); fs_err(sdp, "can't find per_node directory: %d\n", error); @@ -675,7 +637,7 @@ static int init_statfs(struct gfs2_sbd *sdp) goto free_local; } sprintf(buf, "statfs_change%u", jd->jd_jid); - lsi->si_sc_inode = gfs2_lookup_simple(pn, buf); + lsi->si_sc_inode = gfs2_lookup_meta(pn, buf); if (IS_ERR(lsi->si_sc_inode)) { error = PTR_ERR(lsi->si_sc_inode); fs_err(sdp, "can't find local \"sc\" file#%u: %d\n", @@ -734,15 +696,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) struct inode *master = d_inode(sdp->sd_master_dir); struct gfs2_holder ji_gh; struct gfs2_inode *ip; - int jindex = 1; int error = 0; - if (undo) { - jindex = 0; + gfs2_holder_mark_uninitialized(&ji_gh); + if (undo) goto fail_statfs; - } - sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); + sdp->sd_jindex = gfs2_lookup_meta(master, "jindex"); if (IS_ERR(sdp->sd_jindex)) { fs_err(sdp, "can't lookup journal index: %d\n", error); return PTR_ERR(sdp->sd_jindex); @@ -782,7 +742,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, + LM_FLAG_RECOVER | GL_NOPID, &sdp->sd_journal_gh); if (error) { fs_err(sdp, "can't acquire journal glock: %d\n", error); @@ -790,9 +750,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) } ip = GFS2_I(sdp->sd_jdesc->jd_inode); - sdp->sd_jinode_gl = ip->i_gl; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | + LM_FLAG_RECOVER | GL_EXACT | GL_NOCACHE | GL_NOPID, &sdp->sd_jinode_gh); if (error) { @@ -852,24 +811,20 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) sdp->sd_log_idle = 1; set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); gfs2_glock_dq_uninit(&ji_gh); - jindex = 0; INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func); return 0; fail_statfs: uninit_statfs(sdp); fail_jinode_gh: - /* A withdraw may have done dq/uninit so now we need to check it */ - if (!sdp->sd_args.ar_spectator && - gfs2_holder_initialized(&sdp->sd_jinode_gh)) + if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); fail_journal_gh: - if (!sdp->sd_args.ar_spectator && - gfs2_holder_initialized(&sdp->sd_journal_gh)) + if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_journal_gh); fail_jindex: gfs2_jindex_free(sdp); - if (jindex) + if (gfs2_holder_initialized(&ji_gh)) gfs2_glock_dq_uninit(&ji_gh); fail: iput(sdp->sd_jindex); @@ -892,7 +847,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) goto fail; /* Read in the resource index inode */ - sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); + sdp->sd_rindex = gfs2_lookup_meta(master, "rindex"); if (IS_ERR(sdp->sd_rindex)) { error = PTR_ERR(sdp->sd_rindex); fs_err(sdp, "can't get resource index inode: %d\n", error); @@ -901,7 +856,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) sdp->sd_rindex_uptodate = 0; /* Read in the quota inode */ - sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); + sdp->sd_quota_inode = gfs2_lookup_meta(master, "quota"); if (IS_ERR(sdp->sd_quota_inode)) { error = PTR_ERR(sdp->sd_quota_inode); fs_err(sdp, "can't get quota file inode: %d\n", error); @@ -945,7 +900,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) if (undo) goto fail_qc_gh; - pn = gfs2_lookup_simple(master, "per_node"); + pn = gfs2_lookup_meta(master, "per_node"); if (IS_ERR(pn)) { error = PTR_ERR(pn); fs_err(sdp, "can't find per_node directory: %d\n", error); @@ -953,7 +908,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) } sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); - sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf); + sdp->sd_qc_inode = gfs2_lookup_meta(pn, buf); if (IS_ERR(sdp->sd_qc_inode)) { error = PTR_ERR(sdp->sd_qc_inode); fs_err(sdp, "can't find local \"qc\" file: %d\n", error); @@ -1078,8 +1033,8 @@ hostdata_error: void gfs2_lm_unmount(struct gfs2_sbd *sdp) { const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; - if (likely(!gfs2_withdrawn(sdp)) && lm->lm_unmount) - lm->lm_unmount(sdp); + if (!gfs2_withdrawn(sdp) && lm->lm_unmount) + lm->lm_unmount(sdp, true); } static int wait_on_journal(struct gfs2_sbd *sdp) @@ -1107,29 +1062,46 @@ static int init_threads(struct gfs2_sbd *sdp) struct task_struct *p; int error = 0; - p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); + p = kthread_create(gfs2_logd, sdp, "gfs2_logd/%s", sdp->sd_fsname); if (IS_ERR(p)) { error = PTR_ERR(p); - fs_err(sdp, "can't start logd thread: %d\n", error); + fs_err(sdp, "can't create logd thread: %d\n", error); return error; } + get_task_struct(p); sdp->sd_logd_process = p; - p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); + p = kthread_create(gfs2_quotad, sdp, "gfs2_quotad/%s", sdp->sd_fsname); if (IS_ERR(p)) { error = PTR_ERR(p); - fs_err(sdp, "can't start quotad thread: %d\n", error); + fs_err(sdp, "can't create quotad thread: %d\n", error); goto fail; } + get_task_struct(p); sdp->sd_quotad_process = p; + + wake_up_process(sdp->sd_logd_process); + wake_up_process(sdp->sd_quotad_process); return 0; fail: - kthread_stop(sdp->sd_logd_process); + kthread_stop_put(sdp->sd_logd_process); sdp->sd_logd_process = NULL; return error; } +void gfs2_destroy_threads(struct gfs2_sbd *sdp) +{ + if (sdp->sd_logd_process) { + kthread_stop_put(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; + } + if (sdp->sd_quotad_process) { + kthread_stop_put(sdp->sd_quotad_process); + sdp->sd_quotad_process = NULL; + } +} + /** * gfs2_fill_super - Read in superblock * @sb: The VFS superblock @@ -1143,7 +1115,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) int silent = fc->sb_flags & SB_SILENT; struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; - struct gfs2_holder freeze_gh; + struct address_space *mapping; int error; sdp = init_sbd(sb); @@ -1165,7 +1137,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; - sb->s_d_op = &gfs2_dops; + + set_default_d_op(sb, &gfs2_dops); sb->s_export_op = &gfs2_export_ops; sb->s_qcop = &gfs2_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; @@ -1175,10 +1148,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) /* Set up the buffer cache and fill in some fake block size values to allow us to read-in the on-disk superblock. */ - sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK); + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, SECTOR_SIZE); + error = -EINVAL; + if (!sdp->sd_sb.sb_bsize) + goto fail_free; sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - - GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT; sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift); sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit; @@ -1191,15 +1166,41 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sdp->sd_tune.gt_statfs_quantum = 30; } + /* Set up an address space for metadata writes */ + sdp->sd_inode = new_inode(sb); + error = -ENOMEM; + if (!sdp->sd_inode) + goto fail_free; + sdp->sd_inode->i_ino = GFS2_BAD_INO; + sdp->sd_inode->i_size = OFFSET_MAX; + + mapping = gfs2_aspace(sdp); + mapping->a_ops = &gfs2_rgrp_aops; + gfs2_setup_inode(sdp->sd_inode); + error = init_names(sdp, silent); if (error) - goto fail_free; + goto fail_iput; snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name); + error = -ENOMEM; + sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE | WQ_PERCPU, + 0, + sdp->sd_fsname); + if (!sdp->sd_glock_wq) + goto fail_iput; + + sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s", + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, 0, + sdp->sd_fsname); + if (!sdp->sd_delete_wq) + goto fail_glock_wq; + error = gfs2_sys_fs_add(sdp); if (error) - goto fail_free; + goto fail_delete_wq; gfs2_create_debugfs_file(sdp); @@ -1207,6 +1208,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) if (error) goto fail_debug; + INIT_WORK(&sdp->sd_withdraw_work, gfs2_withdraw_func); + error = init_locking(sdp, &mount_gh, DO); if (error) goto fail_lm; @@ -1260,27 +1263,20 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) if (!sb_rdonly(sb)) { error = init_threads(sdp); - if (error) { - gfs2_withdraw_delayed(sdp); + if (error) goto fail_per_node; - } } - error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp); if (error) goto fail_per_node; if (!sb_rdonly(sb)) error = gfs2_make_fs_rw(sdp); - gfs2_freeze_unlock(&freeze_gh); if (error) { - if (sdp->sd_quotad_process) - kthread_stop(sdp->sd_quotad_process); - sdp->sd_quotad_process = NULL; - if (sdp->sd_logd_process) - kthread_stop(sdp->sd_logd_process); - sdp->sd_logd_process = NULL; + gfs2_freeze_unlock(sdp); + gfs2_destroy_threads(sdp); fs_err(sdp, "can't make FS RW: %d\n", error); goto fail_per_node; } @@ -1309,9 +1305,15 @@ fail_lm: fail_debug: gfs2_delete_debugfs_file(sdp); gfs2_sys_fs_del(sdp); +fail_delete_wq: + destroy_workqueue(sdp->sd_delete_wq); +fail_glock_wq: + if (sdp->sd_glock_wq) + destroy_workqueue(sdp->sd_glock_wq); +fail_iput: + iput(sdp->sd_inode); fail_free: free_sbd(sdp); - sb->s_fs_info = NULL; return error; } @@ -1378,6 +1380,7 @@ static const struct constant_table gfs2_param_quota[] = { {"off", GFS2_QUOTA_OFF}, {"account", GFS2_QUOTA_ACCOUNT}, {"on", GFS2_QUOTA_ON}, + {"quiet", GFS2_QUOTA_QUIET}, {} }; @@ -1393,12 +1396,14 @@ static const struct constant_table gfs2_param_data[] = { }; enum opt_errors { - Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, - Opt_errors_panic = GFS2_ERRORS_PANIC, + Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, + Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE, + Opt_errors_panic = GFS2_ERRORS_PANIC, }; static const struct constant_table gfs2_param_errors[] = { {"withdraw", Opt_errors_withdraw }, + {"deactivate", Opt_errors_deactivate }, {"panic", Opt_errors_panic }, {} }; @@ -1585,12 +1590,6 @@ static int gfs2_reconfigure(struct fs_context *fc) fc->sb_flags |= SB_RDONLY; if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) { - struct gfs2_holder freeze_gh; - - error = gfs2_freeze_lock(sdp, &freeze_gh, 0); - if (error) - return -EINVAL; - if (fc->sb_flags & SB_RDONLY) { gfs2_make_fs_ro(sdp); } else { @@ -1598,7 +1597,6 @@ static int gfs2_reconfigure(struct fs_context *fc) if (error) errorfc(fc, "unable to remount read-write"); } - gfs2_freeze_unlock(&freeze_gh); } sdp->sd_args = *newargs; @@ -1720,6 +1718,55 @@ static int gfs2_meta_init_fs_context(struct fs_context *fc) return 0; } +/** + * gfs2_evict_inodes - evict inodes cooperatively + * @sb: the superblock + * + * When evicting an inode with a zero link count, we are trying to upgrade the + * inode's iopen glock from SH to EX mode in order to determine if we can + * delete the inode. The other nodes are supposed to evict the inode from + * their caches if they can, and to poke the inode's inode glock if they cannot + * do so. Either behavior allows gfs2_upgrade_iopen_glock() to proceed + * quickly, but if the other nodes are not cooperating, the lock upgrading + * attempt will time out. Since inodes are evicted sequentially, this can add + * up quickly. + * + * Function evict_inodes() tries to keep the s_inode_list_lock list locked over + * a long time, which prevents other inodes from being evicted concurrently. + * This precludes the cooperative behavior we are looking for. This special + * version of evict_inodes() avoids that. + * + * Modeled after drop_pagecache_sb(). + */ +static void gfs2_evict_inodes(struct super_block *sb) +{ + struct inode *inode, *toput_inode = NULL; + struct gfs2_sbd *sdp = sb->s_fs_info; + + set_bit(SDF_EVICTING, &sdp->sd_flags); + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + spin_lock(&inode->i_lock); + if ((inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW)) && + !need_resched()) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + + iput(toput_inode); + toput_inode = inode; + + cond_resched(); + spin_lock(&sb->s_inode_list_lock); + } + spin_unlock(&sb->s_inode_list_lock); + iput(toput_inode); +} + static void gfs2_kill_sb(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; @@ -1735,6 +1782,18 @@ static void gfs2_kill_sb(struct super_block *sb) sdp->sd_root_dir = NULL; sdp->sd_master_dir = NULL; shrink_dcache_sb(sb); + + gfs2_evict_inodes(sb); + + /* + * Flush and then drain the delete workqueue here (via + * destroy_workqueue()) to ensure that any delete work that + * may be running will also see the SDF_KILL flag. + */ + set_bit(SDF_KILL, &sdp->sd_flags); + gfs2_flush_delete_work(sdp); + destroy_workqueue(sdp->sd_delete_wq); + kill_block_super(sb); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1ed17226d9ed..b1692f12a602 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -106,58 +106,68 @@ static inline void spin_unlock_bucket(unsigned int hash) static void gfs2_qd_dealloc(struct rcu_head *rcu) { struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu); + struct gfs2_sbd *sdp = qd->qd_sbd; + kmem_cache_free(gfs2_quotad_cachep, qd); + if (atomic_dec_and_test(&sdp->sd_quota_count)) + wake_up(&sdp->sd_kill_wait); } -static void gfs2_qd_dispose(struct list_head *list) +static void gfs2_qd_dispose(struct gfs2_quota_data *qd) { - struct gfs2_quota_data *qd; - struct gfs2_sbd *sdp; - - while (!list_empty(list)) { - qd = list_first_entry(list, struct gfs2_quota_data, qd_lru); - sdp = qd->qd_gl->gl_name.ln_sbd; - - list_del(&qd->qd_lru); + struct gfs2_sbd *sdp = qd->qd_sbd; - /* Free from the filesystem-specific list */ - spin_lock(&qd_lock); - list_del(&qd->qd_list); - spin_unlock(&qd_lock); + spin_lock(&qd_lock); + list_del(&qd->qd_list); + spin_unlock(&qd_lock); - spin_lock_bucket(qd->qd_hash); - hlist_bl_del_rcu(&qd->qd_hlist); - spin_unlock_bucket(qd->qd_hash); + spin_lock_bucket(qd->qd_hash); + hlist_bl_del_rcu(&qd->qd_hlist); + spin_unlock_bucket(qd->qd_hash); + if (!gfs2_withdrawn(sdp)) { gfs2_assert_warn(sdp, !qd->qd_change); - gfs2_assert_warn(sdp, !qd->qd_slot_count); + gfs2_assert_warn(sdp, !qd->qd_slot_ref); gfs2_assert_warn(sdp, !qd->qd_bh_count); + } - gfs2_glock_put(qd->qd_gl); - atomic_dec(&sdp->sd_quota_count); + gfs2_glock_put(qd->qd_gl); + call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); +} + +static void gfs2_qd_list_dispose(struct list_head *list) +{ + struct gfs2_quota_data *qd; + + while (!list_empty(list)) { + qd = list_first_entry(list, struct gfs2_quota_data, qd_lru); + list_del(&qd->qd_lru); - /* Delete it from the common reclaim list */ - call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); + gfs2_qd_dispose(qd); } } static enum lru_status gfs2_qd_isolate(struct list_head *item, - struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) + struct list_lru_one *lru, void *arg) { struct list_head *dispose = arg; - struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); + struct gfs2_quota_data *qd = + list_entry(item, struct gfs2_quota_data, qd_lru); + enum lru_status status; if (!spin_trylock(&qd->qd_lockref.lock)) return LRU_SKIP; + status = LRU_SKIP; if (qd->qd_lockref.count == 0) { lockref_mark_dead(&qd->qd_lockref); list_lru_isolate_move(lru, &qd->qd_lru, dispose); + status = LRU_REMOVED; } spin_unlock(&qd->qd_lockref.lock); - return LRU_REMOVED; + return status; } static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, @@ -172,7 +182,7 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, freed = list_lru_shrink_walk(&gfs2_qd_lru, sc, gfs2_qd_isolate, &dispose); - gfs2_qd_dispose(&dispose); + gfs2_qd_list_dispose(&dispose); return freed; } @@ -183,13 +193,26 @@ static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc)); } -struct shrinker gfs2_qd_shrinker = { - .count_objects = gfs2_qd_shrink_count, - .scan_objects = gfs2_qd_shrink_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE, -}; +static struct shrinker *gfs2_qd_shrinker; + +int __init gfs2_qd_shrinker_init(void) +{ + gfs2_qd_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "gfs2-qd"); + if (!gfs2_qd_shrinker) + return -ENOMEM; + + gfs2_qd_shrinker->count_objects = gfs2_qd_shrink_count; + gfs2_qd_shrinker->scan_objects = gfs2_qd_shrink_scan; + + shrinker_register(gfs2_qd_shrinker); + + return 0; +} +void gfs2_qd_shrinker_exit(void) +{ + shrinker_free(gfs2_qd_shrinker); +} static u64 qd2index(struct gfs2_quota_data *qd) { @@ -200,12 +223,7 @@ static u64 qd2index(struct gfs2_quota_data *qd) static u64 qd2offset(struct gfs2_quota_data *qd) { - u64 offset; - - offset = qd2index(qd); - offset *= sizeof(struct gfs2_quota); - - return offset; + return qd2index(qd) * sizeof(struct gfs2_quota); } static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid) @@ -218,8 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str return NULL; qd->qd_sbd = sdp; - qd->qd_lockref.count = 1; - spin_lock_init(&qd->qd_lockref.lock); + lockref_init(&qd->qd_lockref); qd->qd_id = qid; qd->qd_slot = -1; INIT_LIST_HEAD(&qd->qd_lru); @@ -250,7 +267,7 @@ static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash, if (qd->qd_sbd != sdp) continue; if (lockref_get_not_dead(&qd->qd_lockref)) { - list_lru_del(&gfs2_qd_lru, &qd->qd_lru); + list_lru_del_obj(&gfs2_qd_lru, &qd->qd_lru); return qd; } } @@ -297,22 +314,33 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, } -static void qd_hold(struct gfs2_quota_data *qd) +static void __qd_hold(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; - gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); - lockref_get(&qd->qd_lockref); + struct gfs2_sbd *sdp = qd->qd_sbd; + gfs2_assert(sdp, qd->qd_lockref.count > 0); + qd->qd_lockref.count++; } static void qd_put(struct gfs2_quota_data *qd) { + struct gfs2_sbd *sdp; + if (lockref_put_or_lock(&qd->qd_lockref)) return; + BUG_ON(__lockref_is_dead(&qd->qd_lockref)); + sdp = qd->qd_sbd; + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + lockref_mark_dead(&qd->qd_lockref); + spin_unlock(&qd->qd_lockref.lock); + + gfs2_qd_dispose(qd); + return; + } + qd->qd_lockref.count = 0; - list_lru_add(&gfs2_qd_lru, &qd->qd_lru); + list_lru_add_obj(&gfs2_qd_lru, &qd->qd_lru); spin_unlock(&qd->qd_lockref.lock); - } static int slot_get(struct gfs2_quota_data *qd) @@ -322,20 +350,19 @@ static int slot_get(struct gfs2_quota_data *qd) int error = 0; spin_lock(&sdp->sd_bitmap_lock); - if (qd->qd_slot_count != 0) - goto out; - - error = -ENOSPC; - bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots); - if (bit < sdp->sd_quota_slots) { + if (qd->qd_slot_ref == 0) { + bit = find_first_zero_bit(sdp->sd_quota_bitmap, + sdp->sd_quota_slots); + if (bit >= sdp->sd_quota_slots) { + error = -ENOSPC; + goto out; + } set_bit(bit, sdp->sd_quota_bitmap); qd->qd_slot = bit; - error = 0; -out: - qd->qd_slot_count++; } + qd->qd_slot_ref++; +out: spin_unlock(&sdp->sd_bitmap_lock); - return error; } @@ -344,8 +371,8 @@ static void slot_hold(struct gfs2_quota_data *qd) struct gfs2_sbd *sdp = qd->qd_sbd; spin_lock(&sdp->sd_bitmap_lock); - gfs2_assert(sdp, qd->qd_slot_count); - qd->qd_slot_count++; + gfs2_assert(sdp, qd->qd_slot_ref); + qd->qd_slot_ref++; spin_unlock(&sdp->sd_bitmap_lock); } @@ -354,8 +381,8 @@ static void slot_put(struct gfs2_quota_data *qd) struct gfs2_sbd *sdp = qd->qd_sbd; spin_lock(&sdp->sd_bitmap_lock); - gfs2_assert(sdp, qd->qd_slot_count); - if (!--qd->qd_slot_count) { + gfs2_assert(sdp, qd->qd_slot_ref); + if (!--qd->qd_slot_ref) { BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap)); qd->qd_slot = -1; } @@ -364,20 +391,21 @@ static void slot_put(struct gfs2_quota_data *qd) static int bh_get(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; struct inode *inode = sdp->sd_qc_inode; struct gfs2_inode *ip = GFS2_I(inode); unsigned int block, offset; - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct iomap iomap = { }; int error; - mutex_lock(&sdp->sd_quota_mutex); - - if (qd->qd_bh_count++) { - mutex_unlock(&sdp->sd_quota_mutex); + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_bh_count) { + qd->qd_bh_count++; + spin_unlock(&qd->qd_lockref.lock); return 0; } + spin_unlock(&qd->qd_lockref.lock); block = qd->qd_slot / sdp->sd_qc_per_block; offset = qd->qd_slot % sdp->sd_qc_per_block; @@ -386,115 +414,101 @@ static int bh_get(struct gfs2_quota_data *qd) (loff_t)block << inode->i_blkbits, i_blocksize(inode), &iomap); if (error) - goto fail; + return error; error = -ENOENT; if (iomap.type != IOMAP_MAPPED) - goto fail; + return error; error = gfs2_meta_read(ip->i_gl, iomap.addr >> inode->i_blkbits, DIO_WAIT, 0, &bh); if (error) - goto fail; + return error; error = -EIO; if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) - goto fail_brelse; - - qd->qd_bh = bh; - qd->qd_bh_qc = (struct gfs2_quota_change *) - (bh->b_data + sizeof(struct gfs2_meta_header) + - offset * sizeof(struct gfs2_quota_change)); - - mutex_unlock(&sdp->sd_quota_mutex); + goto out; - return 0; + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_bh == NULL) { + qd->qd_bh = bh; + qd->qd_bh_qc = (struct gfs2_quota_change *) + (bh->b_data + sizeof(struct gfs2_meta_header) + + offset * sizeof(struct gfs2_quota_change)); + bh = NULL; + } + qd->qd_bh_count++; + spin_unlock(&qd->qd_lockref.lock); + error = 0; -fail_brelse: +out: brelse(bh); -fail: - qd->qd_bh_count--; - mutex_unlock(&sdp->sd_quota_mutex); return error; } static void bh_put(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; + struct buffer_head *bh = NULL; - mutex_lock(&sdp->sd_quota_mutex); + spin_lock(&qd->qd_lockref.lock); gfs2_assert(sdp, qd->qd_bh_count); if (!--qd->qd_bh_count) { - brelse(qd->qd_bh); + bh = qd->qd_bh; qd->qd_bh = NULL; qd->qd_bh_qc = NULL; } - mutex_unlock(&sdp->sd_quota_mutex); + spin_unlock(&qd->qd_lockref.lock); + brelse(bh); } -static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, - u64 *sync_gen) +static bool qd_grab_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, + u64 sync_gen) { + bool ret = false; + + spin_lock(&qd->qd_lockref.lock); if (test_bit(QDF_LOCKED, &qd->qd_flags) || !test_bit(QDF_CHANGE, &qd->qd_flags) || - (sync_gen && (qd->qd_sync_gen >= *sync_gen))) - return 0; + qd->qd_sync_gen >= sync_gen) + goto out; - if (!lockref_get_not_dead(&qd->qd_lockref)) - return 0; + if (__lockref_is_dead(&qd->qd_lockref)) + goto out; + qd->qd_lockref.count++; list_move_tail(&qd->qd_list, &sdp->sd_quota_list); set_bit(QDF_LOCKED, &qd->qd_flags); qd->qd_change_sync = qd->qd_change; slot_hold(qd); - return 1; + ret = true; + +out: + spin_unlock(&qd->qd_lockref.lock); + return ret; } -static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) +static void qd_ungrab_sync(struct gfs2_quota_data *qd) { - struct gfs2_quota_data *qd = NULL, *iter; - int error; - - *qdp = NULL; - - if (sb_rdonly(sdp->sd_vfs)) - return 0; - - spin_lock(&qd_lock); - - list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { - if (qd_check_sync(sdp, iter, &sdp->sd_quota_sync_gen)) { - qd = iter; - break; - } - } - - spin_unlock(&qd_lock); - - if (qd) { - gfs2_assert_warn(sdp, qd->qd_change_sync); - error = bh_get(qd); - if (error) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); - return error; - } - } - - *qdp = qd; - - return 0; + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); } -static void qd_unlock(struct gfs2_quota_data *qd) +static void qdsb_put(struct gfs2_quota_data *qd) { - gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd, - test_bit(QDF_LOCKED, &qd->qd_flags)); - clear_bit(QDF_LOCKED, &qd->qd_flags); bh_put(qd); slot_put(qd); qd_put(qd); } +static void qd_unlock(struct gfs2_quota_data *qd) +{ + spin_lock(&qd->qd_lockref.lock); + gfs2_assert_warn(qd->qd_sbd, test_bit(QDF_LOCKED, &qd->qd_flags)); + clear_bit(QDF_LOCKED, &qd->qd_flags); + spin_unlock(&qd->qd_lockref.lock); + qdsb_put(qd); +} + static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid, struct gfs2_quota_data **qdp) { @@ -521,13 +535,6 @@ fail: return error; } -static void qdsb_put(struct gfs2_quota_data *qd) -{ - bh_put(qd); - slot_put(qd); - qd_put(qd); -} - /** * gfs2_qa_get - make sure we have a quota allocations data structure, * if necessary @@ -591,6 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) || gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) { error = -EIO; + gfs2_qa_put(ip); goto out; } @@ -663,72 +671,81 @@ static int sort_qd(const void *a, const void *b) static void do_qc(struct gfs2_quota_data *qd, s64 change) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); struct gfs2_quota_change *qc = qd->qd_bh_qc; + bool needs_put = false; s64 x; - mutex_lock(&sdp->sd_quota_mutex); gfs2_trans_add_meta(ip->i_gl, qd->qd_bh); - if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { - qc->qc_change = 0; + /* + * The QDF_CHANGE flag indicates that the slot in the quota change file + * is used. Here, we use the value of qc->qc_change when the slot is + * used, and we assume a value of 0 otherwise. + */ + + spin_lock(&qd->qd_lockref.lock); + + x = 0; + if (test_bit(QDF_CHANGE, &qd->qd_flags)) + x = be64_to_cpu(qc->qc_change); + x += change; + qd->qd_change += change; + + if (!x && test_bit(QDF_CHANGE, &qd->qd_flags)) { + /* The slot in the quota change file becomes unused. */ + clear_bit(QDF_CHANGE, &qd->qd_flags); + qc->qc_flags = 0; + qc->qc_id = 0; + needs_put = true; + } else if (x && !test_bit(QDF_CHANGE, &qd->qd_flags)) { + /* The slot in the quota change file becomes used. */ + set_bit(QDF_CHANGE, &qd->qd_flags); + __qd_hold(qd); + slot_hold(qd); + qc->qc_flags = 0; if (qd->qd_id.type == USRQUOTA) qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id)); } - - x = be64_to_cpu(qc->qc_change) + change; qc->qc_change = cpu_to_be64(x); - spin_lock(&qd_lock); - qd->qd_change = x; - spin_unlock(&qd_lock); + spin_unlock(&qd->qd_lockref.lock); - if (!x) { - gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); - clear_bit(QDF_CHANGE, &qd->qd_flags); - qc->qc_flags = 0; - qc->qc_id = 0; + if (needs_put) { slot_put(qd); qd_put(qd); - } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { - qd_hold(qd); - slot_hold(qd); } - if (change < 0) /* Reset quiet flag if we freed some blocks */ clear_bit(QDF_QMSG_QUIET, &qd->qd_flags); - mutex_unlock(&sdp->sd_quota_mutex); } -static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, +static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index, unsigned off, void *buf, unsigned bytes) { + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct inode *inode = &ip->i_inode; - struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; struct buffer_head *bh; - void *kaddr; u64 blk; unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0; unsigned to_write = bytes, pg_off = off; - int done = 0; blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift); boff = off % bsize; - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return -ENOMEM; - if (!page_has_buffers(page)) - create_empty_buffers(page, bsize, 0); + folio = filemap_grab_folio(mapping, index); + if (IS_ERR(folio)) + return PTR_ERR(folio); + bh = folio_buffers(folio); + if (!bh) + bh = create_empty_buffers(folio, bsize, 0); - bh = page_buffers(page); - while (!done) { - /* Find the beginning block within the page */ + for (;;) { + /* Find the beginning block within the folio */ if (pg_off >= ((bnum * bsize) + bsize)) { bh = bh->b_this_page; bnum++; @@ -741,16 +758,14 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, goto unlock_out; /* If it's a newly allocated disk block, zero it */ if (buffer_new(bh)) - zero_user(page, bnum * bsize, bh->b_size); + folio_zero_range(folio, bnum * bsize, + bh->b_size); } - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); if (bh_read(bh, REQ_META | REQ_PRIO) < 0) goto unlock_out; - if (gfs2_is_jdata(ip)) - gfs2_trans_add_data(ip->i_gl, bh); - else - gfs2_ordered_add_inode(ip); + gfs2_trans_add_data(ip->i_gl, bh); /* If we need to write to the next block as well */ if (to_write > (bsize - boff)) { @@ -759,31 +774,29 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, boff = pg_off % bsize; continue; } - done = 1; + break; } - /* Write to the page, now that we have setup the buffer(s) */ - kaddr = kmap_atomic(page); - memcpy(kaddr + off, buf, bytes); - flush_dcache_page(page); - kunmap_atomic(kaddr); - unlock_page(page); - put_page(page); + /* Write to the folio, now that we have setup the buffer(s) */ + memcpy_to_folio(folio, off, buf, bytes); + flush_dcache_folio(folio); + folio_unlock(folio); + folio_put(folio); return 0; unlock_out: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return -EIO; } -static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp, +static int gfs2_write_disk_quota(struct gfs2_sbd *sdp, struct gfs2_quota *qp, loff_t loc) { unsigned long pg_beg; unsigned pg_off, nbytes, overflow = 0; - int pg_oflow = 0, error; + int error; void *ptr; nbytes = sizeof(struct gfs2_quota); @@ -792,17 +805,15 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp, pg_off = offset_in_page(loc); /* If the quota straddles a page boundary, split the write in two */ - if ((pg_off + nbytes) > PAGE_SIZE) { - pg_oflow = 1; + if ((pg_off + nbytes) > PAGE_SIZE) overflow = (pg_off + nbytes) - PAGE_SIZE; - } ptr = qp; - error = gfs2_write_buf_to_page(ip, pg_beg, pg_off, ptr, + error = gfs2_write_buf_to_page(sdp, pg_beg, pg_off, ptr, nbytes - overflow); /* If there's an overflow, write the remaining bytes to the next page */ - if (!error && pg_oflow) - error = gfs2_write_buf_to_page(ip, pg_beg + 1, 0, + if (!error && overflow) + error = gfs2_write_buf_to_page(sdp, pg_beg + 1, 0, ptr + nbytes - overflow, overflow); return error; @@ -810,7 +821,7 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp, /** * gfs2_adjust_quota - adjust record of current block usage - * @ip: The quota inode + * @sdp: The superblock * @loc: Offset of the entry in the quota file * @change: The amount of usage change to record * @qd: The quota data @@ -822,12 +833,12 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp, * Returns: 0 or -ve on error */ -static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, +static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc, s64 change, struct gfs2_quota_data *qd, struct qc_dqblk *fdq) { + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct inode *inode = &ip->i_inode; - struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_quota q; int err; u64 size; @@ -844,10 +855,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; loc -= sizeof(q); /* gfs2_internal_read would've advanced the loc ptr */ - err = -EIO; be64_add_cpu(&q.qu_value, change); if (((s64)be64_to_cpu(q.qu_value)) < 0) q.qu_value = 0; /* Never go negative on quota usage */ + spin_lock(&qd->qd_lockref.lock); qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & QC_SPC_SOFT) { @@ -863,13 +874,14 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, qd->qd_qb.qb_value = q.qu_value; } } + spin_unlock(&qd->qd_lockref.lock); - err = gfs2_write_disk_quota(ip, &q, loc); + err = gfs2_write_disk_quota(sdp, &q, loc); if (!err) { size = loc + sizeof(struct gfs2_quota); if (size > inode->i_size) i_size_write(inode, size); - inode->i_mtime = inode->i_atime = current_time(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); set_bit(QDF_REFRESH, &qd->qd_flags); } @@ -877,11 +889,12 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; } -static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) +static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda, + u64 sync_gen) { - struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd; + struct gfs2_sbd *sdp = (*qda)->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; unsigned int data_blocks, ind_blocks; struct gfs2_holder *ghs, i_gh; unsigned int qx, x; @@ -891,18 +904,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; - error = gfs2_qa_get(ip); - if (error) - return error; - gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); - if (!ghs) { - error = -ENOMEM; - goto out; - } + if (!ghs) + return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); inode_lock(&ip->i_inode); @@ -951,7 +958,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) for (x = 0; x < num_qd; x++) { qd = qda[x]; offset = qd2offset(qd); - error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL); + error = gfs2_adjust_quota(sdp, offset, qd->qd_change_sync, qd, + NULL); if (error) goto out_end_trans; @@ -959,8 +967,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) set_bit(QDF_REFRESH, &qd->qd_flags); } - error = 0; - out_end_trans: gfs2_trans_end(sdp); out_ipres: @@ -974,8 +980,15 @@ out_dq: kfree(ghs); gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC); -out: - gfs2_qa_put(ip); + if (!error) { + for (x = 0; x < num_qd; x++) { + qd = qda[x]; + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_sync_gen < sync_gen) + qd->qd_sync_gen = sync_gen; + spin_unlock(&qd->qd_lockref.lock); + } + } return error; } @@ -999,7 +1012,9 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) qlvb->qb_limit = q.qu_limit; qlvb->qb_warn = q.qu_warn; qlvb->qb_value = q.qu_value; + spin_lock(&qd->qd_lockref.lock); qd->qd_qb = *qlvb; + spin_unlock(&qd->qd_lockref.lock); return 0; } @@ -1007,11 +1022,12 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder *q_gh) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; int error; + gfs2_assert_warn(sdp, sdp == qd->qd_gl->gl_name.ln_sbd); restart: error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); if (error) @@ -1020,7 +1036,9 @@ restart: if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) force_refresh = FORCE; + spin_lock(&qd->qd_lockref.lock); qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; + spin_unlock(&qd->qd_lockref.lock); if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { gfs2_glock_dq_uninit(q_gh); @@ -1057,9 +1075,9 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; u32 x; - int error = 0; + int error; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; error = gfs2_quota_hold(ip, uid, gid); @@ -1087,56 +1105,56 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) return error; } -static int need_sync(struct gfs2_quota_data *qd) +static bool need_sync(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; struct gfs2_tune *gt = &sdp->sd_tune; - s64 value; + s64 value, change, limit; unsigned int num, den; - int do_sync = 1; + int ret = false; + spin_lock(&qd->qd_lockref.lock); if (!qd->qd_qb.qb_limit) - return 0; + goto out; - spin_lock(&qd_lock); - value = qd->qd_change; - spin_unlock(&qd_lock); + change = qd->qd_change; + if (change <= 0) + goto out; + value = (s64)be64_to_cpu(qd->qd_qb.qb_value); + limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); + if (value >= limit) + goto out; spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; den = gt->gt_quota_scale_den; spin_unlock(>->gt_spin); - if (value < 0) - do_sync = 0; - else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >= - (s64)be64_to_cpu(qd->qd_qb.qb_limit)) - do_sync = 0; - else { - value *= gfs2_jindex_size(sdp) * num; - value = div_s64(value, den); - value += (s64)be64_to_cpu(qd->qd_qb.qb_value); - if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) - do_sync = 0; - } + change *= gfs2_jindex_size(sdp) * num; + change = div_s64(change, den); + if (value + change < limit) + goto out; - return do_sync; + ret = true; +out: + spin_unlock(&qd->qd_lockref.lock); + return ret; } void gfs2_quota_unlock(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_quota_data *qda[4]; + struct gfs2_quota_data *qda[2 * GFS2_MAXQUOTAS]; unsigned int count = 0; u32 x; - int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { struct gfs2_quota_data *qd; - int sync; + bool sync; + int error; qd = ip->i_qadata->qa_qd[x]; sync = need_sync(qd); @@ -1146,17 +1164,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) continue; spin_lock(&qd_lock); - found = qd_check_sync(sdp, qd, NULL); + sync = qd_grab_sync(sdp, qd, U64_MAX); spin_unlock(&qd_lock); - if (!found) + if (!sync) continue; gfs2_assert_warn(sdp, qd->qd_change_sync); - if (bh_get(qd)) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); + error = bh_get(qd); + if (error) { + qd_ungrab_sync(qd); continue; } @@ -1164,7 +1181,9 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) } if (count) { - do_sync(count, qda); + u64 sync_gen = READ_ONCE(sdp->sd_quota_sync_gen); + + do_sync(count, qda, sync_gen); for (x = 0; x < count; x++) qd_unlock(qda[x]); } @@ -1174,16 +1193,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) #define MAX_LINE 256 -static int print_message(struct gfs2_quota_data *qd, char *type) +static void print_message(struct gfs2_quota_data *qd, char *type) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; - - fs_info(sdp, "quota %s for %s %u\n", - type, - (qd->qd_id.type == USRQUOTA) ? "user" : "group", - from_kqid(&init_user_ns, qd->qd_id)); + struct gfs2_sbd *sdp = qd->qd_sbd; - return 0; + if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) { + fs_info(sdp, "quota %s for %s %u\n", + type, + (qd->qd_id.type == USRQUOTA) ? "user" : "group", + from_kqid(&init_user_ns, qd->qd_id)); + } } /** @@ -1223,12 +1242,12 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, qid_eq(qd->qd_id, make_kqid_gid(gid)))) continue; + spin_lock(&qd->qd_lockref.lock); warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn); limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); value = (s64)be64_to_cpu(qd->qd_qb.qb_value); - spin_lock(&qd_lock); value += qd->qd_change; - spin_unlock(&qd_lock); + spin_unlock(&qd->qd_lockref.lock); if (limit > 0 && (limit - value) < ap->allowed) ap->allowed = limit - value; @@ -1253,7 +1272,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, * HZ)) { quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); - error = print_message(qd, "warning"); + print_message(qd, "warning"); + error = 0; qd->qd_last_warn = jiffies; } } @@ -1267,7 +1287,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 x; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON || + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF || gfs2_assert_warn(sdp, change)) return; if (ip->i_diskflags & GFS2_DIF_SYSTEM) @@ -1291,40 +1311,55 @@ int gfs2_quota_sync(struct super_block *sb, int type) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; unsigned int max_qd = PAGE_SIZE / sizeof(struct gfs2_holder); - unsigned int num_qd; - unsigned int x; + u64 sync_gen; int error = 0; + if (sb_rdonly(sdp->sd_vfs)) + return 0; + qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); if (!qda) return -ENOMEM; mutex_lock(&sdp->sd_quota_sync_mutex); - sdp->sd_quota_sync_gen++; + sync_gen = sdp->sd_quota_sync_gen + 1; do { - num_qd = 0; + struct gfs2_quota_data *iter; + unsigned int num_qd = 0; + unsigned int x; - for (;;) { - error = qd_fish(sdp, qda + num_qd); - if (error || !qda[num_qd]) - break; - if (++num_qd == max_qd) - break; + spin_lock(&qd_lock); + list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { + if (qd_grab_sync(sdp, iter, sync_gen)) { + qda[num_qd++] = iter; + if (num_qd == max_qd) + break; + } } + spin_unlock(&qd_lock); - if (num_qd) { - if (!error) - error = do_sync(num_qd, qda); + if (!num_qd) + break; + + for (x = 0; x < num_qd; x++) { + error = bh_get(qda[x]); if (!error) - for (x = 0; x < num_qd; x++) - qda[x]->qd_sync_gen = - sdp->sd_quota_sync_gen; + continue; + + while (x < num_qd) + qd_ungrab_sync(qda[--num_qd]); + break; + } - for (x = 0; x < num_qd; x++) - qd_unlock(qda[x]); + if (!error) { + WRITE_ONCE(sdp->sd_quota_sync_gen, sync_gen); + error = do_sync(num_qd, qda, sync_gen); } - } while (!error && num_qd == max_qd); + + for (x = 0; x < num_qd; x++) + qd_unlock(qda[x]); + } while (!error); mutex_unlock(&sdp->sd_quota_sync_mutex); kfree(qda); @@ -1359,6 +1394,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) unsigned int found = 0; unsigned int hash; unsigned int bm_size; + struct buffer_head *bh; u64 dblock; u32 extlen = 0; int error; @@ -1378,8 +1414,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return error; for (x = 0; x < blocks; x++) { - struct buffer_head *bh; - const struct gfs2_quota_change *qc; + struct gfs2_quota_change *qc; unsigned int y; if (!extlen) { @@ -1392,15 +1427,13 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); if (!bh) goto fail; - if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) { - brelse(bh); - goto fail; - } + if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) + goto fail_brelse; - qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); + qc = (struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; y++, slot++) { - struct gfs2_quota_data *qd; + struct gfs2_quota_data *old_qd, *qd; s64 qc_change = be64_to_cpu(qc->qc_change); u32 qc_flags = be32_to_cpu(qc->qc_flags); enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ? @@ -1413,29 +1446,51 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) hash = gfs2_qd_hash(sdp, qc_id); qd = qd_alloc(hash, sdp, qc_id); - if (qd == NULL) { - brelse(bh); - goto fail; - } + if (qd == NULL) + goto fail_brelse; + qd->qd_lockref.count = 0; set_bit(QDF_CHANGE, &qd->qd_flags); qd->qd_change = qc_change; qd->qd_slot = slot; - qd->qd_slot_count = 1; + qd->qd_slot_ref = 1; spin_lock(&qd_lock); + spin_lock_bucket(hash); + old_qd = gfs2_qd_search_bucket(hash, sdp, qc_id); + if (old_qd) { + fs_err(sdp, "Corruption found in quota_change%u" + "file: duplicate identifier in " + "slot %u\n", + sdp->sd_jdesc->jd_jid, slot); + + spin_unlock_bucket(hash); + spin_unlock(&qd_lock); + qd_put(old_qd); + + gfs2_glock_put(qd->qd_gl); + kmem_cache_free(gfs2_quotad_cachep, qd); + + /* zero out the duplicate slot */ + lock_buffer(bh); + memset(qc, 0, sizeof(*qc)); + mark_buffer_dirty(bh); + unlock_buffer(bh); + + continue; + } BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap)); list_add(&qd->qd_list, &sdp->sd_quota_list); atomic_inc(&sdp->sd_quota_count); - spin_unlock(&qd_lock); - - spin_lock_bucket(hash); hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]); spin_unlock_bucket(hash); + spin_unlock(&qd_lock); found++; } + if (buffer_dirty(bh)) + sync_dirty_buffer(bh); brelse(bh); dblock++; extlen--; @@ -1446,6 +1501,10 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return 0; +fail_brelse: + if (buffer_dirty(bh)) + sync_dirty_buffer(bh); + brelse(bh); fail: gfs2_quota_cleanup(sdp); return error; @@ -1453,36 +1512,36 @@ fail: void gfs2_quota_cleanup(struct gfs2_sbd *sdp) { - struct list_head *head = &sdp->sd_quota_list; struct gfs2_quota_data *qd; + LIST_HEAD(dispose); + int count; - spin_lock(&qd_lock); - while (!list_empty(head)) { - qd = list_last_entry(head, struct gfs2_quota_data, qd_list); - - list_del(&qd->qd_list); - - /* Also remove if this qd exists in the reclaim list */ - list_lru_del(&gfs2_qd_lru, &qd->qd_lru); - atomic_dec(&sdp->sd_quota_count); - spin_unlock(&qd_lock); - - spin_lock_bucket(qd->qd_hash); - hlist_bl_del_rcu(&qd->qd_hlist); - spin_unlock_bucket(qd->qd_hash); - - gfs2_assert_warn(sdp, !qd->qd_change); - gfs2_assert_warn(sdp, !qd->qd_slot_count); - gfs2_assert_warn(sdp, !qd->qd_bh_count); + BUG_ON(!test_bit(SDF_NORECOVERY, &sdp->sd_flags) && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); - gfs2_glock_put(qd->qd_gl); - call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); + spin_lock(&qd_lock); + list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_lockref.count != 0) { + spin_unlock(&qd->qd_lockref.lock); + continue; + } + lockref_mark_dead(&qd->qd_lockref); + spin_unlock(&qd->qd_lockref.lock); - spin_lock(&qd_lock); + list_lru_del_obj(&gfs2_qd_lru, &qd->qd_lru); + list_add(&qd->qd_lru, &dispose); } spin_unlock(&qd_lock); - gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); + gfs2_qd_list_dispose(&dispose); + + wait_event_timeout(sdp->sd_kill_wait, + (count = atomic_read(&sdp->sd_quota_count)) == 0, + HZ * 60); + + if (count != 0) + fs_err(sdp, "%d left-over quota data objects\n", count); kvfree(sdp->sd_quota_bitmap); sdp->sd_quota_bitmap = NULL; @@ -1499,20 +1558,6 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) } } -static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, - int (*fxn)(struct super_block *sb, int type), - unsigned long t, unsigned long *timeo, - unsigned int *new_timeo) -{ - if (t >= *timeo) { - int error = fxn(sdp->sd_vfs, 0); - quotad_error(sdp, msg, error); - *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; - } else { - *timeo -= t; - } -} - void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { if (!sdp->sd_statfs_force_sync) { sdp->sd_statfs_force_sync = 1; @@ -1530,42 +1575,51 @@ void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { int gfs2_quotad(void *data) { struct gfs2_sbd *sdp = data; - struct gfs2_tune *tune = &sdp->sd_tune; - unsigned long statfs_timeo = 0; - unsigned long quotad_timeo = 0; - unsigned long t = 0; - DEFINE_WAIT(wait); + unsigned long now = jiffies; + unsigned long statfs_deadline = now; + unsigned long quotad_deadline = now; + set_freezable(); while (!kthread_should_stop()) { + unsigned long t; if (gfs2_withdrawn(sdp)) - goto bypass; - /* Update the master statfs file */ - if (sdp->sd_statfs_force_sync) { - int error = gfs2_statfs_sync(sdp->sd_vfs, 0); + break; + + now = jiffies; + if (sdp->sd_statfs_force_sync || + time_after(now, statfs_deadline)) { + unsigned int quantum; + int error; + + /* Update the master statfs file */ + error = gfs2_statfs_sync(sdp->sd_vfs, 0); quotad_error(sdp, "statfs", error); - statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; + + quantum = gfs2_tune_get(sdp, gt_statfs_quantum); + statfs_deadline = now + quantum * HZ; } - else - quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, - &statfs_timeo, - &tune->gt_statfs_quantum); + if (time_after(now, quotad_deadline)) { + unsigned int quantum; + int error; - /* Update quota file */ - quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, - "ad_timeo, &tune->gt_quota_quantum); + /* Update the quota file */ + error = gfs2_quota_sync(sdp->sd_vfs, 0); + quotad_error(sdp, "sync", error); - try_to_freeze(); + quantum = gfs2_tune_get(sdp, gt_quota_quantum); + quotad_deadline = now + quantum * HZ; + } -bypass: - t = min(quotad_timeo, statfs_timeo); + t = min(statfs_deadline - now, quotad_deadline - now); + wait_event_freezable_timeout(sdp->sd_quota_wait, + sdp->sd_statfs_force_sync || + gfs2_withdrawn(sdp) || + kthread_should_stop(), + t); - prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); - if (!sdp->sd_statfs_force_sync) - t -= schedule_timeout(t); - else + if (sdp->sd_statfs_force_sync) t = 0; - finish_wait(&sdp->sd_quota_wait, &wait); } return 0; @@ -1578,6 +1632,8 @@ static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state) memset(state, 0, sizeof(*state)); switch (sdp->sd_args.ar_quota) { + case GFS2_QUOTA_QUIET: + fallthrough; case GFS2_QUOTA_ON: state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED; state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED; @@ -1706,7 +1762,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { - struct gfs2_alloc_parms ap = { .aflags = 0, }; + struct gfs2_alloc_parms ap = {}; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; @@ -1724,7 +1780,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, goto out_release; /* Apply changes */ - error = gfs2_adjust_quota(ip, offset, 0, qd, fdq); + error = gfs2_adjust_quota(sdp, offset, 0, qd, fdq); if (!error) clear_bit(QDF_QMSG_QUIET, &qd->qd_flags); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 21ada332d555..988f38dc5b2c 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -15,27 +15,27 @@ struct gfs2_sbd; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID -extern int gfs2_qa_get(struct gfs2_inode *ip); -extern void gfs2_qa_put(struct gfs2_inode *ip); -extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); -extern void gfs2_quota_unhold(struct gfs2_inode *ip); +int gfs2_qa_get(struct gfs2_inode *ip); +void gfs2_qa_put(struct gfs2_inode *ip); +int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +void gfs2_quota_unhold(struct gfs2_inode *ip); -extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); -extern void gfs2_quota_unlock(struct gfs2_inode *ip); +int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); +void gfs2_quota_unlock(struct gfs2_inode *ip); -extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, - struct gfs2_alloc_parms *ap); -extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, - kuid_t uid, kgid_t gid); +int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, + struct gfs2_alloc_parms *ap); +void gfs2_quota_change(struct gfs2_inode *ip, s64 change, + kuid_t uid, kgid_t gid); -extern int gfs2_quota_sync(struct super_block *sb, int type); -extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid); +int gfs2_quota_sync(struct super_block *sb, int type); +int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid); -extern int gfs2_quota_init(struct gfs2_sbd *sdp); -extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); -extern int gfs2_quotad(void *data); +int gfs2_quota_init(struct gfs2_sbd *sdp); +void gfs2_quota_cleanup(struct gfs2_sbd *sdp); +int gfs2_quotad(void *data); -extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); +void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) @@ -44,13 +44,13 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, int ret; ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ - if (capable(CAP_SYS_RESOURCE) || - sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF || + capable(CAP_SYS_RESOURCE)) return 0; ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) return ret; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) return 0; ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap); if (ret) @@ -59,8 +59,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, } extern const struct quotactl_ops gfs2_quotactl_ops; -extern struct shrinker gfs2_qd_shrinker; +int __init gfs2_qd_shrinker_init(void); +void gfs2_qd_shrinker_exit(void); extern struct list_lru gfs2_qd_lru; -extern void __init gfs2_quota_hash_init(void); + +void __init gfs2_quota_hash_init(void); #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 2bb085a72e8e..8c8202c68b64 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -27,7 +27,7 @@ #include "util.h" #include "dir.h" -struct workqueue_struct *gfs_recovery_wq; +struct workqueue_struct *gfs2_recovery_wq; int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh) @@ -118,6 +118,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd) int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh, unsigned int blkno, struct gfs2_log_header_host *head) { + const u32 zero = 0; u32 hash, crc; if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) || @@ -126,7 +127,7 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh, return 1; hash = crc32(~0, lh, LH_V1_SIZE - 4); - hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */ + hash = ~crc32(hash, &zero, 4); /* assume lh_hash is zero */ if (be32_to_cpu(lh->lh_hash) != hash) return 1; @@ -263,16 +264,12 @@ static void clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - u32 lblock = head->lh_blkno; - gfs2_replay_incr_blk(jd, &lblock); - gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock, + gfs2_replay_incr_blk(jd, &head->lh_blkno); + head->lh_sequence++; + gfs2_write_log_header(sdp, jd, head->lh_sequence, 0, head->lh_blkno, GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY, REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC); - if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) { - sdp->sd_log_flush_head = lblock; - gfs2_log_incr_head(sdp); - } } @@ -404,7 +401,7 @@ void gfs2_recover_func(struct work_struct *work) struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; - struct gfs2_holder j_gh, ji_gh, thaw_gh; + struct gfs2_holder j_gh, ji_gh; ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep; int ro = 0; unsigned int pass; @@ -420,14 +417,15 @@ void gfs2_recover_func(struct work_struct *work) if (sdp->sd_args.ar_spectator) goto fail; if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { - fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", + fs_info(sdp, "jid=%u: Trying to acquire journal glock...\n", jd->jd_jid); jlocked = 1; - /* Acquire the journal lock so we can do recovery */ + /* Acquire the journal glock so we can do recovery */ error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE, + LM_FLAG_RECOVER | LM_FLAG_TRY | + GL_NOCACHE, &j_gh); switch (error) { case 0: @@ -443,7 +441,8 @@ void gfs2_recover_func(struct work_struct *work) } error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); + LM_FLAG_RECOVER | GL_NOCACHE, + &ji_gh); if (error) goto fail_gunlock_j; } else { @@ -457,7 +456,7 @@ void gfs2_recover_func(struct work_struct *work) if (error) goto fail_gunlock_ji; - error = gfs2_find_jhead(jd, &head, true); + error = gfs2_find_jhead(jd, &head); if (error) goto fail_gunlock_ji; t_jhd = ktime_get(); @@ -465,14 +464,14 @@ void gfs2_recover_func(struct work_struct *work) ktime_ms_delta(t_jhd, t_jlck)); if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", - jd->jd_jid); + mutex_lock(&sdp->sd_freeze_mutex); - /* Acquire a shared hold on the freeze lock */ - - error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY); - if (error) + if (test_bit(SDF_FROZEN, &sdp->sd_flags)) { + mutex_unlock(&sdp->sd_freeze_mutex); + fs_warn(sdp, "jid=%u: Can't replay: filesystem " + "is frozen\n", jd->jd_jid); goto fail_gunlock_ji; + } if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) { ro = 1; @@ -496,7 +495,7 @@ void gfs2_recover_func(struct work_struct *work) fs_warn(sdp, "jid=%u: Can't replay: read-only block " "device\n", jd->jd_jid); error = -EROFS; - goto fail_gunlock_thaw; + goto fail_gunlock_nofreeze; } t_tlck = ktime_get(); @@ -514,7 +513,7 @@ void gfs2_recover_func(struct work_struct *work) lops_after_scan(jd, error, pass); if (error) { up_read(&sdp->sd_log_flush_lock); - goto fail_gunlock_thaw; + goto fail_gunlock_nofreeze; } } @@ -522,7 +521,7 @@ void gfs2_recover_func(struct work_struct *work) clean_journal(jd, &head); up_read(&sdp->sd_log_flush_lock); - gfs2_freeze_unlock(&thaw_gh); + mutex_unlock(&sdp->sd_freeze_mutex); t_rep = ktime_get(); fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, " "jhead:%lldms, tlck:%lldms, replay:%lldms]\n", @@ -533,6 +532,9 @@ void gfs2_recover_func(struct work_struct *work) ktime_ms_delta(t_rep, t_tlck)); } + if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) + gfs2_log_pointers_init(sdp, &head); + gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); if (jlocked) { @@ -543,8 +545,8 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; -fail_gunlock_thaw: - gfs2_freeze_unlock(&thaw_gh); +fail_gunlock_nofreeze: + mutex_unlock(&sdp->sd_freeze_mutex); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); @@ -570,7 +572,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) return -EBUSY; /* we have JDF_RECOVERY, queue should always succeed */ - rv = queue_work(gfs_recovery_wq, &jd->jd_work); + rv = queue_work(gfs2_recovery_wq, &jd->jd_work); BUG_ON(!rv); if (wait) @@ -580,3 +582,13 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) return wait ? jd->jd_recover_error : 0; } +void gfs2_log_pointers_init(struct gfs2_sbd *sdp, + struct gfs2_log_header_host *head) +{ + sdp->sd_log_sequence = head->lh_sequence + 1; + gfs2_replay_incr_blk(sdp->sd_jdesc, &head->lh_blkno); + sdp->sd_log_tail = head->lh_blkno; + sdp->sd_log_flush_head = head->lh_blkno; + sdp->sd_log_flush_tail = head->lh_blkno; + sdp->sd_log_head = head->lh_blkno; +} diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 0d30f8e804f4..5a5ba72ecd75 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -9,7 +9,7 @@ #include "incore.h" -extern struct workqueue_struct *gfs_recovery_wq; +extern struct workqueue_struct *gfs2_recovery_wq; static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk) { @@ -17,17 +17,19 @@ static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk) *blk = 0; } -extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, +int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh); -extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); -extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); -extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); +int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +void gfs2_revoke_clean(struct gfs2_jdesc *jd); -extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); -extern void gfs2_recover_func(struct work_struct *work); -extern int __get_log_header(struct gfs2_sbd *sdp, - const struct gfs2_log_header *lh, unsigned int blkno, +int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); +void gfs2_recover_func(struct work_struct *work); +int __get_log_header(struct gfs2_sbd *sdp, + const struct gfs2_log_header *lh, unsigned int blkno, + struct gfs2_log_header_host *head); +void gfs2_log_pointers_init(struct gfs2_sbd *sdp, struct gfs2_log_header_host *head); #endif /* __RECOVERY_DOT_H__ */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f602fb844951..b14e54b38ee8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -159,13 +159,13 @@ static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) } /** - * gfs2_bit_search + * gfs2_bit_search - search bitmap for a state * @ptr: Pointer to bitmap data * @mask: Mask to use (normally 0x55555.... but adjusted for search start) * @state: The state we are searching for * - * We xor the bitmap data with a patter which is the bitwise opposite - * of what we are looking for, this gives rise to a pattern of ones + * We xor the bitmap data with a pattern which is the bitwise opposite + * of what we are looking for. This gives rise to a pattern of ones * wherever there is a match. Since we have two bits per entry, we * take this pattern, shift it down by one place and then and it with * the original. All the even bit positions (0,2,4, etc) then represent @@ -814,11 +814,11 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi = rgd->rd_bits + (length - 1); if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) { gfs2_lm(sdp, - "ri_addr = %llu\n" - "ri_length = %u\n" - "ri_data0 = %llu\n" - "ri_data = %u\n" - "ri_bitbytes = %u\n" + "ri_addr=%llu " + "ri_length=%u " + "ri_data0=%llu " + "ri_data=%u " + "ri_bitbytes=%u " "start=%u len=%u offset=%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_length, @@ -1188,7 +1188,7 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) /** * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps - * @gh: the glock holder representing the rgrpd to read in + * @gl: the glock representing the rgrpd to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. @@ -1879,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip */ ip = gl->gl_object; - if (ip || !gfs2_queue_delete_work(gl, 0)) + if (ip || !gfs2_queue_verify_delete(gl, false)) gfs2_glock_put(gl); else found++; @@ -1967,7 +1967,7 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) } /** - * gfs2_rgrp_used_recently + * gfs2_rgrp_used_recently - test if an rgrp has been used recently * @rs: The block reservation with the rgrp to test * @msecs: The time limit in milliseconds * @@ -1987,10 +1987,8 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - u32 skip; - get_random_bytes(&skip, sizeof(skip)); - return skip % sdp->sd_rgrps; + return get_random_u32() % sdp->sd_rgrps; } static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) @@ -2306,7 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt); - if (rgd->rd_sbd->sd_args.ar_rgrplvb) { + if (rgd->rd_sbd->sd_args.ar_rgrplvb && rgd->rd_rgl) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf, @@ -2411,13 +2409,12 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, * @bn: Used to return the starting block number * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 - * @generation: the generation number of the inode * * Returns: 0 or error */ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, - bool dinode, u64 *generation) + bool dinode) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; @@ -2477,10 +2474,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, rbm.rgd->rd_free -= *nblocks; spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { + u64 generation; + rbm.rgd->rd_dinodes++; - *generation = rbm.rgd->rd_igeneration++; - if (*generation == 0) - *generation = rbm.rgd->rd_igeneration++; + generation = rbm.rgd->rd_igeneration++; + if (generation == 0) + generation = rbm.rgd->rd_igeneration++; + ip->i_generation = generation; } gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); @@ -2584,8 +2584,8 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - rgrp_unlock_local(rgd); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); + rgrp_unlock_local(rgd); gfs2_statfs_change(sdp, 0, +1, -1); trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 00b30cf893af..8d20e99385db 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -22,38 +22,38 @@ struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; -extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); +void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); -extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); -extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); -extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); +struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact); +struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); +struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); -extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); -extern int gfs2_rindex_update(struct gfs2_sbd *sdp); -extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); -extern int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl); -extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); +void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); +int gfs2_rindex_update(struct gfs2_sbd *sdp); +void gfs2_free_clones(struct gfs2_rgrpd *rgd); +int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl); +void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); -extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); +struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, - struct gfs2_alloc_parms *ap); -extern void gfs2_inplace_release(struct gfs2_inode *ip); - -extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, - bool dinode, u64 *generation); - -extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip); -extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, - u64 bstart, u32 blen, int meta); -extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, - u64 bstart, u32 blen); -extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); -extern void gfs2_unlink_di(struct inode *inode); -extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, - unsigned int type); +int gfs2_inplace_reserve(struct gfs2_inode *ip, + struct gfs2_alloc_parms *ap); +void gfs2_inplace_release(struct gfs2_inode *ip); + +int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, + bool dinode); + +void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +void gfs2_rs_delete(struct gfs2_inode *ip); +void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, + u64 bstart, u32 blen, int meta); +void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, + u64 bstart, u32 blen); +void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); +void gfs2_unlink_di(struct inode *inode); +int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, + unsigned int type); struct gfs2_rgrp_list { unsigned int rl_rgrps; @@ -62,18 +62,19 @@ struct gfs2_rgrp_list { struct gfs2_holder *rl_ghs; }; -extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, - u64 block); -extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, - unsigned int state, u16 flags); -extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); -extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); -extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, - const char *fs_id_buf); -extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, - struct buffer_head *bh, - const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); -extern int gfs2_fitrim(struct file *filp, void __user *argp); +void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, + u64 block); +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, + unsigned int state, u16 flags); +void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); +u64 gfs2_ri_total(struct gfs2_sbd *sdp); +void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, + const char *fs_id_buf); +int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, + struct buffer_head *bh, + const struct gfs2_bitmap *bi, unsigned minlen, + u64 *ptrimmed); +int gfs2_fitrim(struct file *filp, void __user *argp); /* This is how to tell if a reservation is in the rgrp tree: */ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) @@ -88,9 +89,9 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) return first <= block && block < last; } -extern void check_and_update_goal(struct gfs2_inode *ip); +void check_and_update_goal(struct gfs2_inode *ip); -extern void rgrp_lock_local(struct gfs2_rgrpd *rgd); -extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd); +void rgrp_lock_local(struct gfs2_rgrpd *rgd); +void rgrp_unlock_local(struct gfs2_rgrpd *rgd); #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 999cc146d708..f6cd907b3ec6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -44,10 +44,10 @@ #include "xattr.h" #include "lops.h" -enum dinode_demise { - SHOULD_DELETE_DINODE, - SHOULD_NOT_DELETE_DINODE, - SHOULD_DEFER_EVICTION, +enum evict_behavior { + EVICT_SHOULD_DELETE, + EVICT_SHOULD_SKIP_DELETE, + EVICT_SHOULD_DEFER_DELETE, }; /** @@ -67,9 +67,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) sdp->sd_journals = 0; spin_unlock(&sdp->sd_jindex_spin); + down_write(&sdp->sd_log_flush_lock); sdp->sd_jdesc = NULL; + up_write(&sdp->sd_log_flush_lock); + while (!list_empty(&list)) { jd = list_first_entry(&list, struct gfs2_jdesc, jd_list); + BUG_ON(jd->jd_log_bio); gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); @@ -130,28 +134,22 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_log_header_host head; int error; j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); if (gfs2_withdrawn(sdp)) return -EIO; - error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); - if (error || gfs2_withdrawn(sdp)) - return error; - - if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - gfs2_consist(sdp); + if (sdp->sd_log_sequence == 0) { + fs_err(sdp, "unknown status of our own journal jid %d", + sdp->sd_lockstruct.ls_jid); return -EIO; } - /* Initialize some head of the log stuff */ - sdp->sd_log_sequence = head.lh_sequence + 1; - gfs2_log_pointers_init(sdp, head.lh_blkno); - error = gfs2_quota_init(sdp); - if (!error && !gfs2_withdrawn(sdp)) + if (!error && gfs2_withdrawn(sdp)) + error = -EIO; + if (!error) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); return error; } @@ -328,7 +326,12 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; - int error; + int error, error2; + + /* + * Grab all the journal glocks in SH mode. We are *probably* doing + * that to prevent recovery. + */ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); @@ -345,17 +348,19 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) list_add(&lfcc->list, &list); } + gfs2_freeze_unlock(sdp); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOPID, + LM_FLAG_RECOVER | GL_NOPID, &sdp->sd_freeze_gh); if (error) - goto out; + goto relock_shared; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); if (error) break; - error = gfs2_find_jhead(jd, &lh, false); + error = gfs2_find_jhead(jd, &lh); if (error) break; if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { @@ -364,8 +369,14 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) } } - if (error) - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + if (!error) + goto out; /* success */ + + gfs2_freeze_unlock(sdp); + +relock_shared: + error2 = gfs2_freeze_lock_shared(sdp); + gfs2_assert_withdraw(sdp, !error2); out: while (!list_empty(&list)) { @@ -393,9 +404,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_nlink = cpu_to_be32(inode->i_nlink); str->di_size = cpu_to_be64(i_size_read(inode)); str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); - str->di_atime = cpu_to_be64(inode->i_atime.tv_sec); - str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); + str->di_atime = cpu_to_be64(inode_get_atime_sec(inode)); + str->di_mtime = cpu_to_be64(inode_get_mtime_sec(inode)); + str->di_ctime = cpu_to_be64(inode_get_ctime_sec(inode)); str->di_goal_meta = cpu_to_be64(ip->i_goal); str->di_goal_data = cpu_to_be64(ip->i_goal); @@ -410,9 +421,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_entries = cpu_to_be32(ip->i_entries); str->di_eattr = cpu_to_be64(ip->i_eattr); - str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec); - str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); + str->di_atime_nsec = cpu_to_be32(inode_get_atime_nsec(inode)); + str->di_mtime_nsec = cpu_to_be32(inode_get_mtime_nsec(inode)); + str->di_ctime_nsec = cpu_to_be32(inode_get_ctime_nsec(inode)); } /** @@ -459,7 +470,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) * @flags: The type of dirty * * Unfortunately it can be called under any combination of inode - * glock and transaction lock, so we have to check carefully. + * glock and freeze glock, so we have to check carefully. * * At the moment this deals only with atime - it should be possible * to expand that role in future, once a review of the locking has @@ -476,13 +487,11 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; - if (unlikely(!ip->i_gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + /* This can only happen during incomplete inode creation. */ + if (unlikely(!ip->i_gl)) return; - } - if (unlikely(gfs2_withdrawn(sdp))) + if (gfs2_withdrawn(sdp)) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); @@ -529,38 +538,32 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp) { int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_flush_delete_work(sdp); - if (!log_write_allowed && current == sdp->sd_quotad_process) - fs_warn(sdp, "The quotad daemon is withdrawing.\n"); - else if (sdp->sd_quotad_process) - kthread_stop(sdp->sd_quotad_process); - sdp->sd_quotad_process = NULL; + if (!test_bit(SDF_KILL, &sdp->sd_flags)) + gfs2_flush_delete_work(sdp); - if (!log_write_allowed && current == sdp->sd_logd_process) - fs_warn(sdp, "The logd daemon is withdrawing.\n"); - else if (sdp->sd_logd_process) - kthread_stop(sdp->sd_logd_process); - sdp->sd_logd_process = NULL; + gfs2_destroy_threads(sdp); if (log_write_allowed) { gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); + /* We do two log flushes here. The first one commits dirty inodes + * and rgrps to the journal, but queues up revokes to the ail list. + * The second flush writes out and removes the revokes. + * + * The first must be done before the FLUSH_SHUTDOWN code + * clears the LIVE flag, otherwise it will not be able to start + * a transaction to write its revokes, and the error will cause + * a withdraw of the file system. */ + gfs2_log_flush(sdp, NULL, GFS2_LFC_MAKE_FS_RO); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LFC_MAKE_FS_RO); wait_event_timeout(sdp->sd_log_waitq, gfs2_log_is_empty(sdp), HZ * 5); gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp)); - } else { - wait_event_timeout(sdp->sd_log_waitq, - gfs2_log_is_empty(sdp), - HZ * 5); } gfs2_quota_cleanup(sdp); - - if (!log_write_allowed) - sdp->sd_vfs->s_flags |= SB_RDONLY; } /** @@ -591,15 +594,23 @@ restart: } spin_unlock(&sdp->sd_jindex_spin); - if (!sb_rdonly(sb)) { + if (!sb_rdonly(sb)) gfs2_make_fs_ro(sdp); + else { + if (gfs2_withdrawn(sdp)) + gfs2_destroy_threads(sdp); + + gfs2_quota_cleanup(sdp); } - WARN_ON(gfs2_withdrawing(sdp)); + + flush_work(&sdp->sd_withdraw_work); /* At this point, we're through modifying the disk */ /* Release stuff */ + gfs2_freeze_unlock(sdp); + iput(sdp->sd_jindex); iput(sdp->sd_statfs_inode); iput(sdp->sd_rindex); @@ -625,12 +636,9 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); - truncate_inode_pages_final(&sdp->sd_aspace); + iput(sdp->sd_inode); gfs2_delete_debugfs_file(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp); - /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); free_sbd(sdp); } @@ -654,59 +662,94 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) return sdp->sd_log_error; } -void gfs2_freeze_func(struct work_struct *work) +static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who, const void *freeze_owner) { + struct super_block *sb = sdp->sd_vfs; int error; - struct gfs2_holder freeze_gh; + + error = gfs2_freeze_lock_shared(sdp); + if (error) + goto fail; + error = thaw_super(sb, who, freeze_owner); + if (!error) + return 0; + +fail: + fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error); + gfs2_assert_withdraw(sdp, 0); + return error; +} + +void gfs2_freeze_func(struct work_struct *work) +{ struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work); struct super_block *sb = sdp->sd_vfs; + int error; - atomic_inc(&sb->s_active); - error = gfs2_freeze_lock(sdp, &freeze_gh, 0); - if (error) { - gfs2_assert_withdraw(sdp, 0); - } else { - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - error = thaw_super(sb); - if (error) { - fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", - error); - gfs2_assert_withdraw(sdp, 0); - } - gfs2_freeze_unlock(&freeze_gh); - } + mutex_lock(&sdp->sd_freeze_mutex); + error = -EBUSY; + if (test_bit(SDF_FROZEN, &sdp->sd_flags)) + goto freeze_failed; + + error = freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL); + if (error) + goto freeze_failed; + + gfs2_freeze_unlock(sdp); + set_bit(SDF_FROZEN, &sdp->sd_flags); + + error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE, NULL); + if (error) + goto out; + + clear_bit(SDF_FROZEN, &sdp->sd_flags); + goto out; + +freeze_failed: + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); + +out: + mutex_unlock(&sdp->sd_freeze_mutex); deactivate_super(sb); - clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags); - wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN); - return; } /** - * gfs2_freeze - prevent further writes to the filesystem + * gfs2_freeze_super - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem + * @who: freeze flags + * @freeze_owner: owner of the freeze * */ -static int gfs2_freeze(struct super_block *sb) +static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who, + const void *freeze_owner) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; - mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) { - error = -EBUSY; - goto out; + if (!mutex_trylock(&sdp->sd_freeze_mutex)) + return -EBUSY; + if (test_bit(SDF_FROZEN, &sdp->sd_flags)) { + mutex_unlock(&sdp->sd_freeze_mutex); + return -EBUSY; } for (;;) { - if (gfs2_withdrawn(sdp)) { - error = -EINVAL; + error = freeze_super(sb, who, freeze_owner); + if (error) { + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", + error); goto out; } error = gfs2_lock_fs_check_clean(sdp); - if (!error) + if (!error) { + set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); + set_bit(SDF_FROZEN, &sdp->sd_flags); break; + } + + (void)gfs2_do_thaw(sdp, who, freeze_owner); if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); @@ -720,32 +763,58 @@ static int gfs2_freeze(struct super_block *sb) fs_err(sdp, "retrying...\n"); msleep(1000); } - set_bit(SDF_FS_FROZEN, &sdp->sd_flags); + out: mutex_unlock(&sdp->sd_freeze_mutex); return error; } +static int gfs2_freeze_fs(struct super_block *sb) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | + GFS2_LFC_FREEZE_GO_SYNC); + if (gfs2_withdrawn(sdp)) + return -EIO; + } + return 0; +} + /** - * gfs2_unfreeze - reallow writes to the filesystem + * gfs2_thaw_super - reallow writes to the filesystem * @sb: the VFS structure for the filesystem + * @who: freeze flags + * @freeze_owner: owner of the freeze * */ -static int gfs2_unfreeze(struct super_block *sb) +static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who, + const void *freeze_owner) { struct gfs2_sbd *sdp = sb->s_fs_info; + int error; - mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || - !gfs2_holder_initialized(&sdp->sd_freeze_gh)) { + if (!mutex_trylock(&sdp->sd_freeze_mutex)) + return -EBUSY; + if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) { mutex_unlock(&sdp->sd_freeze_mutex); return -EINVAL; } - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + atomic_inc(&sb->s_active); + gfs2_freeze_unlock(sdp); + + error = gfs2_do_thaw(sdp, who, freeze_owner); + + if (!error) { + clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); + clear_bit(SDF_FROZEN, &sdp->sd_flags); + } mutex_unlock(&sdp->sd_freeze_mutex); - return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE); + deactivate_super(sb); + return error; } /** @@ -911,6 +980,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = sc.sc_dinodes + sc.sc_free; buf->f_ffree = sc.sc_free; buf->f_namelen = GFS2_FNAMESIZE; + buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); return 0; } @@ -933,11 +1003,12 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) + if (glock_needs_demote(gl)) clear_nlink(inode); } @@ -952,22 +1023,18 @@ static int gfs2_drop_inode(struct inode *inode) struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; gfs2_glock_hold(gl); - if (!gfs2_queue_delete_work(gl, 0)) - gfs2_glock_queue_put(gl); + if (!gfs2_queue_verify_delete(gl, true)) + gfs2_glock_put_async(gl); return 0; } - return generic_drop_inode(inode); -} + /* + * No longer cache inodes when trying to evict them all. + */ + if (test_bit(SDF_EVICTING, &sdp->sd_flags)) + return 1; -static int is_ancestor(const struct dentry *d1, const struct dentry *d2) -{ - do { - if (d1 == d2) - return 1; - d1 = d1->d_parent; - } while (!IS_ROOT(d1)); - return 0; + return inode_generic_drop(inode); } /** @@ -982,9 +1049,16 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) { struct gfs2_sbd *sdp = root->d_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; - int val; + unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum; - if (is_ancestor(root, sdp->sd_master_dir)) + spin_lock(&sdp->sd_tune.gt_spin); + logd_secs = sdp->sd_tune.gt_logd_secs; + quota_quantum = sdp->sd_tune.gt_quota_quantum; + statfs_quantum = sdp->sd_tune.gt_statfs_quantum; + statfs_slow = sdp->sd_tune.gt_statfs_slow; + spin_unlock(&sdp->sd_tune.gt_spin); + + if (is_subdir(root, sdp->sd_master_dir)) seq_puts(s, ",meta"); if (args->ar_lockproto[0]) seq_show_option(s, "lockproto", args->ar_lockproto); @@ -1012,6 +1086,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) case GFS2_QUOTA_ON: state = "on"; break; + case GFS2_QUOTA_QUIET: + state = "quiet"; + break; default: state = "unknown"; break; @@ -1037,17 +1114,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) } if (args->ar_discard) seq_puts(s, ",discard"); - val = sdp->sd_tune.gt_logd_secs; - if (val != 30) - seq_printf(s, ",commit=%d", val); - val = sdp->sd_tune.gt_statfs_quantum; - if (val != 30) - seq_printf(s, ",statfs_quantum=%d", val); - else if (sdp->sd_tune.gt_statfs_slow) + if (logd_secs != 30) + seq_printf(s, ",commit=%d", logd_secs); + if (statfs_quantum != 30) + seq_printf(s, ",statfs_quantum=%d", statfs_quantum); + else if (statfs_slow) seq_puts(s, ",statfs_quantum=0"); - val = sdp->sd_tune.gt_quota_quantum; - if (val != 60) - seq_printf(s, ",quota_quantum=%d", val); + if (quota_quantum != 60) + seq_printf(s, ",quota_quantum=%d", quota_quantum); if (args->ar_statfs_percent) seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent); if (args->ar_errors != GFS2_ERRORS_DEFAULT) { @@ -1057,6 +1131,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) case GFS2_ERRORS_WITHDRAW: state = "withdraw"; break; + case GFS2_ERRORS_DEACTIVATE: + state = "deactivate"; + break; case GFS2_ERRORS_PANIC: state = "panic"; break; @@ -1077,76 +1154,6 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) return 0; } -static void gfs2_final_release_pages(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_glock *gl = ip->i_gl; - - if (unlikely(!gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); - return; - } - - truncate_inode_pages(gfs2_glock2aspace(gl), 0); - truncate_inode_pages(&inode->i_data, 0); - - if (atomic_read(&gl->gl_revokes) == 0) { - clear_bit(GLF_LFLUSH, &gl->gl_flags); - clear_bit(GLF_DIRTY, &gl->gl_flags); - } -} - -static int gfs2_dinode_dealloc(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd; - struct gfs2_holder gh; - int error; - - if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { - gfs2_consist_inode(ip); - return -EIO; - } - - error = gfs2_rindex_update(sdp); - if (error) - return error; - - error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); - if (error) - return error; - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); - if (!rgd) { - gfs2_consist_inode(ip); - error = -EIO; - goto out_qs; - } - - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NODE_SCOPE, &gh); - if (error) - goto out_qs; - - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, - sdp->sd_jdesc->jd_blocks); - if (error) - goto out_rg_gunlock; - - gfs2_free_di(rgd, ip); - - gfs2_final_release_pages(ip); - - gfs2_trans_end(sdp); - -out_rg_gunlock: - gfs2_glock_dq_uninit(&gh); -out_qs: - gfs2_quota_unhold(ip); - return error; -} - /** * gfs2_glock_put_eventually * @gl: The glock to put @@ -1158,58 +1165,60 @@ out_qs: static void gfs2_glock_put_eventually(struct gfs2_glock *gl) { if (current->flags & PF_MEMALLOC) - gfs2_glock_queue_put(gl); + gfs2_glock_put_async(gl); else gfs2_glock_put(gl); } -static bool gfs2_upgrade_iopen_glock(struct inode *inode) +static enum evict_behavior gfs2_upgrade_iopen_glock(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder *gh = &ip->i_iopen_gh; - long timeout = 5 * HZ; int error; gh->gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(gh); /* - * If there are no other lock holders, we'll get the lock immediately. + * If there are no other lock holders, we will immediately get + * exclusive access to the iopen glock here. + * * Otherwise, the other nodes holding the lock will be notified about - * our locking request. If they don't have the inode open, they'll - * evict the cached inode and release the lock. Otherwise, if they - * poke the inode glock, we'll take this as an indication that they - * still need the iopen glock and that they'll take care of deleting - * the inode when they're done. As a last resort, if another node - * keeps holding the iopen glock without showing any activity on the - * inode glock, we'll eventually time out. + * our locking request (see iopen_go_callback()). If they do not have + * the inode open, they are expected to evict the cached inode and + * release the lock, allowing us to proceed. + * + * Otherwise, if they cannot evict the inode, they are expected to poke + * the inode glock (note: not the iopen glock). We will notice that + * and stop waiting for the iopen glock immediately. The other node(s) + * are then expected to take care of deleting the inode when they no + * longer use it. * - * Note that we're passing the LM_FLAG_TRY_1CB flag to the first - * locking request as an optimization to notify lock holders as soon as - * possible. Without that flag, they'd be notified implicitly by the - * second locking request. + * As a last resort, if another node keeps holding the iopen glock + * without showing any activity on the inode glock, we will eventually + * time out and fail the iopen glock upgrade. */ - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh); - error = gfs2_glock_nq(gh); - if (error != GLR_TRYFAILED) - return !error; - gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh); error = gfs2_glock_nq(gh); if (error) - return false; + return EVICT_SHOULD_SKIP_DELETE; - timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait, + wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !test_bit(HIF_WAIT, &gh->gh_iflags) || - test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags), - timeout); + glock_needs_demote(ip->i_gl), + 5 * HZ); if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_dq(gh); - return false; + if (glock_needs_demote(ip->i_gl)) + return EVICT_SHOULD_SKIP_DELETE; + return EVICT_SHOULD_DEFER_DELETE; } - return gfs2_glock_holder_ready(gh) == 0; + error = gfs2_glock_holder_ready(gh); + if (error) + return EVICT_SHOULD_SKIP_DELETE; + return EVICT_SHOULD_DELETE; } /** @@ -1222,58 +1231,47 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) * * Returns: the fate of the dinode */ -static enum dinode_demise evict_should_delete(struct inode *inode, - struct gfs2_holder *gh) +static enum evict_behavior evict_should_delete(struct inode *inode, + struct gfs2_holder *gh) { struct gfs2_inode *ip = GFS2_I(inode); struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; int ret; - if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) - goto should_delete; - - if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) - return SHOULD_DEFER_EVICTION; + if (gfs2_holder_initialized(&ip->i_iopen_gh) && + test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags)) + return EVICT_SHOULD_DEFER_DELETE; /* Deletes should never happen under memory pressure anymore. */ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) - return SHOULD_DEFER_EVICTION; + return EVICT_SHOULD_DEFER_DELETE; /* Must not read inode block until block type has been verified */ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh); - if (unlikely(ret)) { - glock_clear_object(ip->i_iopen_gh.gh_gl, ip); - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - return SHOULD_DEFER_EVICTION; - } + if (unlikely(ret)) + return EVICT_SHOULD_SKIP_DELETE; if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino)) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); if (ret) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; ret = gfs2_instantiate(gh); if (ret) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; /* * The inode may have been recreated in the meantime. */ if (inode->i_nlink) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; -should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && - test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - if (!gfs2_upgrade_iopen_glock(inode)) { - gfs2_holder_uninit(&ip->i_iopen_gh); - return SHOULD_NOT_DELETE_DINODE; - } - } - return SHOULD_DELETE_DINODE; + test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) + return gfs2_upgrade_iopen_glock(inode); + return EVICT_SHOULD_DELETE; } /** @@ -1293,7 +1291,7 @@ static int evict_unlinked_inode(struct inode *inode) } if (ip->i_eattr) { - ret = gfs2_ea_dealloc(ip); + ret = gfs2_ea_dealloc(ip, true); if (ret) goto out; } @@ -1304,9 +1302,6 @@ static int evict_unlinked_inode(struct inode *inode) goto out; } - if (ip->i_gl) - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); - /* * As soon as we clear the bitmap for the dinode, gfs2_create_inode() * can get called to recreate it, or even gfs2_inode_lookup() if the @@ -1320,6 +1315,9 @@ static int evict_unlinked_inode(struct inode *inode) */ ret = gfs2_dinode_dealloc(ip); + if (!ret && ip->i_gl) + gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); + out: return ret; } @@ -1384,16 +1382,35 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + enum evict_behavior behavior; int ret; + gfs2_holder_mark_uninitialized(&gh); if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; - gfs2_holder_mark_uninitialized(&gh); - ret = evict_should_delete(inode, &gh); - if (ret == SHOULD_DEFER_EVICTION) + /* + * In case of an incomplete mount, gfs2_evict_inode() may be called for + * system files without having an active journal to write to. In that + * case, skip the filesystem evict. + */ + if (!sdp->sd_jdesc) goto out; - if (ret == SHOULD_DELETE_DINODE) + + behavior = evict_should_delete(inode, &gh); + if (behavior == EVICT_SHOULD_DEFER_DELETE && + !test_bit(SDF_KILL, &sdp->sd_flags)) { + struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; + + if (io_gl) { + gfs2_glock_hold(io_gl); + if (!gfs2_queue_verify_delete(io_gl, true)) + gfs2_glock_put(io_gl); + goto out; + } + behavior = EVICT_SHOULD_SKIP_DELETE; + } + if (behavior == EVICT_SHOULD_DELETE) ret = evict_unlinked_inode(inode); else ret = evict_linked_inode(inode); @@ -1401,13 +1418,11 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (gfs2_holder_initialized(&gh)) { - glock_clear_object(ip->i_gl, ip); - gfs2_glock_dq_uninit(&gh); - } if (ret && ret != GLR_TRYFAILED && ret != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", ret); out: + if (gfs2_holder_initialized(&gh)) + gfs2_glock_dq_uninit(&gh); truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); @@ -1427,9 +1442,8 @@ out: if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); - gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put_eventually(ip->i_gl); - ip->i_gl = NULL; + rcu_assign_pointer(ip->i_gl, NULL); } } @@ -1441,11 +1455,13 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) if (!ip) return NULL; ip->i_no_addr = 0; + ip->i_no_formal_ino = 0; ip->i_flags = 0; ip->i_gl = NULL; gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); + ip->i_diskflags = 0; ip->i_rahead = 0; return &ip->i_inode; } @@ -1455,7 +1471,7 @@ static void gfs2_free_inode(struct inode *inode) kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode)); } -extern void free_local_statfs_inodes(struct gfs2_sbd *sdp) +void free_local_statfs_inodes(struct gfs2_sbd *sdp) { struct local_statfs_inode *lsi, *safe; @@ -1470,8 +1486,8 @@ extern void free_local_statfs_inodes(struct gfs2_sbd *sdp) } } -extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, - unsigned int index) +struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, + unsigned int index) { struct local_statfs_inode *lsi; @@ -1492,8 +1508,9 @@ const struct super_operations gfs2_super_ops = { .evict_inode = gfs2_evict_inode, .put_super = gfs2_put_super, .sync_fs = gfs2_sync_fs, - .freeze_super = gfs2_freeze, - .thaw_super = gfs2_unfreeze, + .freeze_super = gfs2_freeze_super, + .freeze_fs = gfs2_freeze_fs, + .thaw_super = gfs2_thaw_super, .statfs = gfs2_statfs, .drop_inode = gfs2_drop_inode, .show_options = gfs2_show_options, diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 58d13fd77aed..173f1e74c2a9 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -15,7 +15,7 @@ #define GFS2_FS_FORMAT_MIN (1801) #define GFS2_FS_FORMAT_MAX (1802) -extern void gfs2_lm_unmount(struct gfs2_sbd *sdp); +void gfs2_lm_unmount(struct gfs2_sbd *sdp); static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) { @@ -26,31 +26,32 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) return x; } -extern void gfs2_jindex_free(struct gfs2_sbd *sdp); +void gfs2_jindex_free(struct gfs2_sbd *sdp); -extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); -extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); -extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, - struct gfs2_inode **ipp); +struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); +int gfs2_jdesc_check(struct gfs2_jdesc *jd); +int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, + struct gfs2_inode **ipp); -extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); -extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp); -extern void gfs2_online_uevent(struct gfs2_sbd *sdp); -extern int gfs2_statfs_init(struct gfs2_sbd *sdp); -extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, - s64 dinodes); -extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, - const void *buf); -extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, - void *buf); -extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); -extern int gfs2_statfs_sync(struct super_block *sb, int type); -extern void gfs2_freeze_func(struct work_struct *work); +int gfs2_make_fs_rw(struct gfs2_sbd *sdp); +void gfs2_make_fs_ro(struct gfs2_sbd *sdp); +void gfs2_online_uevent(struct gfs2_sbd *sdp); +void gfs2_destroy_threads(struct gfs2_sbd *sdp); +int gfs2_statfs_init(struct gfs2_sbd *sdp); +void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, + s64 dinodes); +void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, + const void *buf); +void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, + void *buf); +void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); +int gfs2_statfs_sync(struct super_block *sb, int type); +void gfs2_freeze_func(struct work_struct *work); -extern void free_local_statfs_inodes(struct gfs2_sbd *sdp); -extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, - unsigned int index); -extern void free_sbd(struct gfs2_sbd *sdp); +void free_local_statfs_inodes(struct gfs2_sbd *sdp); +struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, + unsigned int index); +void free_sbd(struct gfs2_sbd *sdp); extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; @@ -58,8 +59,8 @@ extern const struct export_operations gfs2_export_ops; extern const struct super_operations gfs2_super_ops; extern const struct dentry_operations gfs2_dops; -extern const struct xattr_handler *gfs2_xattr_handlers_max[]; -extern const struct xattr_handler **gfs2_xattr_handlers_min; +extern const struct xattr_handler * const gfs2_xattr_handlers_max[]; +extern const struct xattr_handler * const *gfs2_xattr_handlers_min; #endif /* __SUPER_DOT_H__ */ diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d87ea98cf535..7051db9dbea0 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -59,7 +59,7 @@ static struct kset *gfs2_kset; static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u:%u\n", + return sysfs_emit(buf, "%u:%u\n", MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); } @@ -68,7 +68,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) unsigned long f = sdp->sd_flags; ssize_t s; - s = snprintf(buf, PAGE_SIZE, + s = sysfs_emit(buf, "Journal Checked: %d\n" "Journal Live: %d\n" "Journal ID: %d\n" @@ -82,11 +82,9 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "RO Recovery: %d\n" "Skip DLM Unlock: %d\n" "Force AIL Flush: %d\n" + "FS Freeze Initiator: %d\n" "FS Frozen: %d\n" - "Withdrawing: %d\n" - "Withdraw In Prog: %d\n" - "Remote Withdraw: %d\n" - "Withdraw Recovery: %d\n" + "Killing: %d\n" "sd_log_error: %d\n" "sd_log_flush_lock: %d\n" "sd_log_num_revoke: %u\n" @@ -96,7 +94,10 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "sd_log_flush_head: %d\n" "sd_log_flush_tail: %d\n" "sd_log_blks_reserved: %d\n" - "sd_log_revokes_available: %d\n", + "sd_log_revokes_available: %d\n" + "sd_log_pinned: %d\n" + "sd_log_thresh1: %d\n" + "sd_log_thresh2: %d\n", test_bit(SDF_JOURNAL_CHECKED, &f), test_bit(SDF_JOURNAL_LIVE, &f), (sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0), @@ -110,11 +111,9 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_RORECOVERY, &f), test_bit(SDF_SKIP_DLM_UNLOCK, &f), test_bit(SDF_FORCE_AIL_FLUSH, &f), - test_bit(SDF_FS_FROZEN, &f), - test_bit(SDF_WITHDRAWING, &f), - test_bit(SDF_WITHDRAW_IN_PROG, &f), - test_bit(SDF_REMOTE_WITHDRAW, &f), - test_bit(SDF_WITHDRAW_RECOVERY, &f), + test_bit(SDF_FREEZE_INITIATOR, &f), + test_bit(SDF_FROZEN, &f), + test_bit(SDF_KILL, &f), sdp->sd_log_error, rwsem_is_locked(&sdp->sd_log_flush_lock), sdp->sd_log_num_revoke, @@ -124,13 +123,16 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) sdp->sd_log_flush_head, sdp->sd_log_flush_tail, sdp->sd_log_blks_reserved, - atomic_read(&sdp->sd_log_revokes_available)); + atomic_read(&sdp->sd_log_revokes_available), + atomic_read(&sdp->sd_log_pinned), + atomic_read(&sdp->sd_log_thresh1), + atomic_read(&sdp->sd_log_thresh2)); return s; } static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); + return sysfs_emit(buf, "%s\n", sdp->sd_fsname); } static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) @@ -140,7 +142,7 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) buf[0] = '\0'; if (uuid_is_null(&s->s_uuid)) return 0; - return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid); + return sysfs_emit(buf, "%pUB\n", &s->s_uuid); } static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) @@ -148,7 +150,7 @@ static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) struct super_block *sb = sdp->sd_vfs; int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; - return snprintf(buf, PAGE_SIZE, "%d\n", frozen); + return sysfs_emit(buf, "%d\n", frozen); } static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -164,10 +166,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) switch (n) { case 0: - error = thaw_super(sdp->sd_vfs); + error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL); break; case 1: - error = freeze_super(sdp->sd_vfs); + error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL); break; default: return -EINVAL; @@ -184,7 +186,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) { unsigned int b = gfs2_withdrawn(sdp); - return snprintf(buf, PAGE_SIZE, "%u\n", b); + return sysfs_emit(buf, "%u\n", b); } static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -326,7 +328,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len return -EINVAL; if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) fs_info(sdp, "demote interface used\n"); - rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl); + rv = gfs2_glock_get(sdp, glnum, glops, NO_CREATE, &gl); if (rv) return rv; gfs2_glock_cb(gl, glmode); @@ -387,7 +389,7 @@ static struct kobj_type gfs2_ktype = { static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf) { const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops; - return sprintf(buf, "%s\n", ops->lm_proto_name); + return sysfs_emit(buf, "%s\n", ops->lm_proto_name); } static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) @@ -398,7 +400,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) val = 1; - ret = sprintf(buf, "%d\n", val); + ret = sysfs_emit(buf, "%d\n", val); return ret; } @@ -423,33 +425,27 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) return len; } -static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf) -{ - int val = completion_done(&sdp->sd_wdack) ? 1 : 0; - - return sprintf(buf, "%d\n", val); -} - -static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp, + const char *buf, + size_t len) { int ret, val; ret = kstrtoint(buf, 0, &val); if (ret) return ret; - - if ((val == 1) && - !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - complete(&sdp->sd_wdack); - else + if (val < 0 || val > 1) return -EINVAL; + + sdp->sd_withdraw_helper_status = val; + complete(&sdp->sd_withdraw_helper); return len; } static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_first); + return sysfs_emit(buf, "%d\n", ls->ls_first); } static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -482,7 +478,7 @@ out: static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); + return sysfs_emit(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); } int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) @@ -540,18 +536,18 @@ out: static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid_done); + return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_done); } static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid_status); + return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_status); } static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) { - return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); + return sysfs_emit(buf, "%d\n", sdp->sd_lockstruct.ls_jid); } static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -589,7 +585,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store); +GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store); GDLM_ATTR(jid, 0644, jid_show, jid_store); GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); GDLM_ATTR(first_done, 0444, first_done_show, NULL); @@ -616,7 +612,7 @@ static struct attribute *lock_module_attrs[] = { static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u %u\n", + return sysfs_emit(buf, "%u %u\n", sdp->sd_tune.gt_quota_scale_num, sdp->sd_tune.gt_quota_scale_den); } @@ -669,7 +665,7 @@ static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store) #define TUNE_ATTR_2(name, store) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ - return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \ + return sysfs_emit(buf, "%u\n", sdp->sd_tune.gt_##name); \ } \ TUNE_ATTR_3(name, name##_show, store) @@ -688,6 +684,7 @@ TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); +TUNE_ATTR(withdraw_helper_timeout, 1); static struct attribute *tune_attrs[] = { &tune_attr_quota_warn_period.attr, @@ -698,6 +695,7 @@ static struct attribute *tune_attrs[] = { &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, + &tune_attr_withdraw_helper_timeout.attr, NULL, }; @@ -754,7 +752,6 @@ fail_reg: fs_err(sdp, "error %d adding sysfs files\n", error); kobject_put(&sdp->sd_kobj); wait_for_completion(&sdp->sd_kobj_unregister); - sb->s_fs_info = NULL; return error; } @@ -767,10 +764,10 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) wait_for_completion(&sdp->sd_kobj_unregister); } -static int gfs2_uevent(struct kobject *kobj, struct kobj_uevent_env *env) +static int gfs2_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct super_block *s = sdp->sd_vfs; + const struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + const struct super_block *s = sdp->sd_vfs; add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a5deb9f86831..fcfbf68ec725 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -52,13 +52,19 @@ {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \ {(1UL << GLF_DIRTY), "y" }, \ {(1UL << GLF_LFLUSH), "f" }, \ - {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ - {(1UL << GLF_REPLY_PENDING), "r" }, \ - {(1UL << GLF_INITIAL), "I" }, \ - {(1UL << GLF_FROZEN), "F" }, \ + {(1UL << GLF_PENDING_REPLY), "R" }, \ + {(1UL << GLF_HAVE_REPLY), "r" }, \ + {(1UL << GLF_INITIAL), "a" }, \ + {(1UL << GLF_HAVE_FROZEN_REPLY), "F" }, \ {(1UL << GLF_LRU), "L" }, \ {(1UL << GLF_OBJECT), "o" }, \ - {(1UL << GLF_BLOCKING), "b" }) + {(1UL << GLF_BLOCKING), "b" }, \ + {(1UL << GLF_INSTANTIATE_NEEDED), "n" }, \ + {(1UL << GLF_INSTANTIATE_IN_PROG), "N" }, \ + {(1UL << GLF_TRY_TO_EVICT), "e" }, \ + {(1UL << GLF_VERIFY_DELETE), "E" }, \ + {(1UL << GLF_DEFER_DELETE), "s" }, \ + {(1UL << GLF_CANCELING), "C" }) #ifndef NUMPTY #define NUMPTY diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 63fec11ef2ce..6df65540e13d 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -49,7 +49,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, } BUG_ON(blocks == 0 && revokes == 0); - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + if (gfs2_withdrawn(sdp)) return -EROFS; tr->tr_ip = ip; @@ -85,25 +85,30 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, */ down_read(&sdp->sd_log_flush_lock); + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) + goto out_not_live; if (gfs2_log_try_reserve(sdp, tr, &extra_revokes)) goto reserved; + up_read(&sdp->sd_log_flush_lock); gfs2_log_reserve(sdp, tr, &extra_revokes); down_read(&sdp->sd_log_flush_lock); - -reserved: - gfs2_log_release_revokes(sdp, extra_revokes); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { - gfs2_log_release_revokes(sdp, tr->tr_revokes); - up_read(&sdp->sd_log_flush_lock); + revokes = tr->tr_revokes + extra_revokes; + gfs2_log_release_revokes(sdp, revokes); gfs2_log_release(sdp, tr->tr_reserved); - sb_end_intwrite(sdp->sd_vfs); - return -EROFS; + goto out_not_live; } +reserved: + gfs2_log_release_revokes(sdp, extra_revokes); current->journal_info = tr; - return 0; + +out_not_live: + up_read(&sdp->sd_log_flush_lock); + sb_end_intwrite(sdp->sd_vfs); + return -EROFS; } int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, @@ -226,14 +231,35 @@ out: unlock_buffer(bh); } +void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio, + size_t from, size_t len) +{ + struct buffer_head *head = folio_buffers(folio); + unsigned int bsize = head->b_size; + struct buffer_head *bh; + size_t to = from + len; + size_t start, end; + + for (bh = head, start = 0; bh != head || !start; + bh = bh->b_this_page, start = end) { + end = start + bsize; + if (end <= from) + continue; + if (start >= to) + break; + set_buffer_uptodate(bh); + gfs2_trans_add_data(gl, bh); + } +} + void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct super_block *sb = sdp->sd_vfs; struct gfs2_bufdata *bd; struct gfs2_meta_header *mh; struct gfs2_trans *tr = current->journal_info; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lock_buffer(bh); if (buffer_pinned(bh)) { @@ -245,12 +271,12 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) if (bd == NULL) { gfs2_log_unlock(sdp); unlock_buffer(bh); - lock_page(bh->b_page); + folio_lock(bh->b_folio); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh); else bd = bh->b_private; - unlock_page(bh->b_page); + folio_unlock(bh->b_folio); lock_buffer(bh); gfs2_log_lock(sdp); } @@ -267,13 +293,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } - if (unlikely(state == SFS_FROZEN)) { - fs_info(sdp, "GFS2:adding buf while frozen\n"); - gfs2_assert_withdraw(sdp, 0); - } - if (unlikely(gfs2_withdrawn(sdp))) { + if (gfs2_withdrawn(sdp)) { fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n", (unsigned long long)bd->bd_bh->b_blocknr); + goto out_unlock; + } + if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) { + fs_info(sdp, "GFS2:adding buf while frozen\n"); + gfs2_withdraw(sdp); + goto out_unlock; } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index c76ad9a4c75a..790c55f59e61 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -34,17 +34,19 @@ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned return rgd->rd_length; } -extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, - unsigned int blocks, unsigned int revokes, - unsigned long ip); -extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, - unsigned int revokes); - -extern void gfs2_trans_end(struct gfs2_sbd *sdp); -extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); -extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); -extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); -extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); -extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr); +int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, + unsigned int blocks, unsigned int revokes, + unsigned long ip); +int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, + unsigned int revokes); + +void gfs2_trans_end(struct gfs2_sbd *sdp); +void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio, + size_t from, size_t len); +void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); +void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr); #endif /* __TRANS_DOT_H__ */ diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 7a6aeffcdf5c..02603200846d 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -9,6 +9,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> +#include <linux/kthread.h> #include <linux/crc32.h> #include <linux/gfs2_ondisk.h> #include <linux/delay.h> @@ -57,7 +58,7 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, struct gfs2_inode *ip; ip = GFS2_I(jd->jd_inode); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_RECOVER | GL_EXACT | GL_NOCACHE, &j_gh); if (error) { if (verbose) @@ -72,7 +73,7 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, "mount.\n"); goto out_unlock; } - error = gfs2_find_jhead(jd, &head, false); + error = gfs2_find_jhead(jd, &head); if (error) { if (verbose) fs_err(sdp, "Error parsing journal for spectator " @@ -93,222 +94,53 @@ out_unlock: } /** - * gfs2_freeze_lock - hold the freeze glock + * gfs2_freeze_lock_shared - hold the freeze glock * @sdp: the superblock - * @freeze_gh: pointer to the requested holder - * @caller_flags: any additional flags needed by the caller */ -int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, - int caller_flags) +int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) { - int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; + int flags = LM_FLAG_RECOVER | GL_EXACT; int error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, - freeze_gh); + &sdp->sd_freeze_gh); if (error && error != GLR_TRYFAILED) - fs_err(sdp, "can't lock the freeze lock: %d\n", error); + fs_err(sdp, "can't lock the freeze glock: %d\n", error); return error; } -void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) +void gfs2_freeze_unlock(struct gfs2_sbd *sdp) { - if (gfs2_holder_initialized(freeze_gh)) - gfs2_glock_dq_uninit(freeze_gh); + if (gfs2_holder_initialized(&sdp->sd_freeze_gh)) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); } -static void signal_our_withdraw(struct gfs2_sbd *sdp) +static void do_withdraw(struct gfs2_sbd *sdp) { - struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; - struct inode *inode; - struct gfs2_inode *ip; - struct gfs2_glock *i_gl; - u64 no_formal_ino; - int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - int ret = 0; - int tries; - - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) + down_write(&sdp->sd_log_flush_lock); + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + up_write(&sdp->sd_log_flush_lock); return; - - gfs2_ail_drain(sdp); /* frees all transactions */ - inode = sdp->sd_jdesc->jd_inode; - ip = GFS2_I(inode); - i_gl = ip->i_gl; - no_formal_ino = ip->i_no_formal_ino; - - /* Prevent any glock dq until withdraw recovery is complete */ - set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - /* - * Don't tell dlm we're bailing until we have no more buffers in the - * wind. If journal had an IO error, the log code should just purge - * the outstanding buffers rather than submitting new IO. Making the - * file system read-only will flush the journal, etc. - * - * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown - * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write - * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and - * therefore we need to clear SDF_JOURNAL_LIVE manually. - */ - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (!sb_rdonly(sdp->sd_vfs)) { - struct gfs2_holder freeze_gh; - - gfs2_holder_mark_uninitialized(&freeze_gh); - if (sdp->sd_freeze_gl && - !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { - ret = gfs2_freeze_lock(sdp, &freeze_gh, - log_write_allowed ? 0 : LM_FLAG_TRY); - if (ret == GLR_TRYFAILED) - ret = 0; - } - if (!ret) - gfs2_make_fs_ro(sdp); - /* - * Dequeue any pending non-system glock holders that can no - * longer be granted because the file system is withdrawn. - */ - gfs2_gl_dq_holders(sdp); - gfs2_freeze_unlock(&freeze_gh); - } - - if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ - if (!ret) - ret = -EIO; - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - goto skip_recovery; - } - /* - * Drop the glock for our journal so another node can recover it. - */ - if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { - gfs2_glock_dq_wait(&sdp->sd_journal_gh); - gfs2_holder_uninit(&sdp->sd_journal_gh); - } - sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&sdp->sd_jinode_gh); - if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { - /* Make sure gfs2_unfreeze works if partially-frozen */ - flush_work(&sdp->sd_freeze_work); - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); - thaw_super(sdp->sd_vfs); - } else { - wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, - TASK_UNINTERRUPTIBLE); - } - - /* - * holder_uninit to force glock_put, to force dlm to let go - */ - gfs2_holder_uninit(&sdp->sd_jinode_gh); - - /* - * Note: We need to be careful here: - * Our iput of jd_inode will evict it. The evict will dequeue its - * glock, but the glock dq will wait for the withdraw unless we have - * exception code in glock_dq. - */ - iput(inode); - sdp->sd_jdesc->jd_inode = NULL; - /* - * Wait until the journal inode's glock is freed. This allows try locks - * on other nodes to be successful, otherwise we remain the owner of - * the glock as far as dlm is concerned. - */ - if (i_gl->gl_ops->go_free) { - set_bit(GLF_FREEING, &i_gl->gl_flags); - wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); } + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); - /* - * Dequeue the "live" glock, but keep a reference so it's never freed. - */ - gfs2_glock_hold(live_gl); - gfs2_glock_dq_wait(&sdp->sd_live_gh); - /* - * We enqueue the "live" glock in EX so that all other nodes - * get a demote request and act on it. We don't really want the - * lock in EX, so we send a "try" lock with 1CB to produce a callback. - */ - fs_warn(sdp, "Requesting recovery of jid %d.\n", - sdp->sd_lockstruct.ls_jid); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, - LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID, - &sdp->sd_live_gh); - msleep(GL_GLOCK_MAX_HOLD); - /* - * This will likely fail in a cluster, but succeed standalone: - */ - ret = gfs2_glock_nq(&sdp->sd_live_gh); + gfs2_ail_drain(sdp); /* frees all transactions */ - /* - * If we actually got the "live" lock in EX mode, there are no other - * nodes available to replay our journal. So we try to replay it - * ourselves. We hold the "live" glock to prevent other mounters - * during recovery, then just dequeue it and reacquire it in our - * normal SH mode. Just in case the problem that caused us to - * withdraw prevents us from recovering our journal (e.g. io errors - * and such) we still check if the journal is clean before proceeding - * but we may wait forever until another mounter does the recovery. - */ - if (ret == 0) { - fs_warn(sdp, "No other mounters found. Trying to recover our " - "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); - if (gfs2_recover_journal(sdp->sd_jdesc, 1)) - fs_warn(sdp, "Unable to recover our journal jid %d.\n", - sdp->sd_lockstruct.ls_jid); - gfs2_glock_dq_wait(&sdp->sd_live_gh); - gfs2_holder_reinit(LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, - &sdp->sd_live_gh); - gfs2_glock_nq(&sdp->sd_live_gh); - } + wake_up(&sdp->sd_logd_waitq); + wake_up(&sdp->sd_quota_wait); - gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + wait_event_timeout(sdp->sd_log_waitq, + gfs2_log_is_empty(sdp), + HZ * 5); - /* - * At this point our journal is evicted, so we need to get a new inode - * for it. Once done, we need to call gfs2_find_jhead which - * calls gfs2_map_journal_extents to map it for us again. - * - * Note that we don't really want it to look up a FREE block. The - * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup - * which would otherwise fail because it requires grabbing an rgrp - * glock, which would fail with -EIO because we're withdrawing. - */ - inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, - sdp->sd_jdesc->jd_no_addr, no_formal_ino, - GFS2_BLKST_FREE); - if (IS_ERR(inode)) { - fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", - sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); - goto skip_recovery; - } - sdp->sd_jdesc->jd_inode = inode; - d_mark_dontcache(inode); + sdp->sd_vfs->s_flags |= SB_RDONLY; /* - * Now wait until recovery is complete. + * Dequeue any pending non-system glock holders that can no + * longer be granted because the file system is withdrawn. */ - for (tries = 0; tries < 10; tries++) { - ret = check_journal_clean(sdp, sdp->sd_jdesc, false); - if (!ret) - break; - msleep(HZ); - fs_warn(sdp, "Waiting for journal recovery jid %d.\n", - sdp->sd_lockstruct.ls_jid); - } -skip_recovery: - if (!ret) - fs_warn(sdp, "Journal recovery complete for jid %d.\n", - sdp->sd_lockstruct.ls_jid); - else - fs_warn(sdp, "Journal recovery skipped for jid %d until next " - "mount.\n", sdp->sd_lockstruct.ls_jid); - fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); - sdp->sd_glock_dqs_held = 0; - wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); + gfs2_withdraw_glocks(sdp); } void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) @@ -327,50 +159,108 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) va_end(args); } -int gfs2_withdraw(struct gfs2_sbd *sdp) +/** + * gfs2_offline_uevent - run gfs2_withdraw_helper + * @sdp: The GFS2 superblock + */ +static bool gfs2_offline_uevent(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - const struct lm_lockops *lm = ls->ls_ops; + long timeout; - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && - test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { - if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)) - return -1; + /* Skip protocol "lock_nolock" which doesn't require shared storage. */ + if (!ls->ls_ops->lm_lock) + return false; + + /* + * The gfs2_withdraw_helper replies by writing one of the following + * status codes to "/sys$DEVPATH/lock_module/withdraw": + * + * 0 - The shared block device has been marked inactive. Future write + * operations will fail. + * + * 1 - The shared block device may still be active and carry out + * write operations. + * + * If the "offline" uevent isn't reacted upon in time, the event + * handler is assumed to have failed. + */ - wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG, - TASK_UNINTERRUPTIBLE); - return -1; + sdp->sd_withdraw_helper_status = -1; + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ; + wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout); + if (sdp->sd_withdraw_helper_status == -1) { + fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper"); + } else { + fs_err(sdp, "%s %s with status %d\n", + "gfs2_withdraw_helper", + sdp->sd_withdraw_helper_status == 0 ? + "succeeded" : "failed", + sdp->sd_withdraw_helper_status); } + return sdp->sd_withdraw_helper_status == 0; +} + +void gfs2_withdraw_func(struct work_struct *work) +{ + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work); + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + const struct lm_lockops *lm = ls->ls_ops; + bool device_inactive; - set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); + if (test_bit(SDF_KILL, &sdp->sd_flags)) + return; - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { - fs_err(sdp, "about to withdraw this file system\n"); - BUG_ON(sdp->sd_args.ar_debug); + BUG_ON(sdp->sd_args.ar_debug); - signal_our_withdraw(sdp); + /* + * Try to deactivate the shared block device so that no more I/O will + * go through. If successful, we can immediately trigger remote + * recovery. Otherwise, we must first empty out all our local caches. + */ - kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + device_inactive = gfs2_offline_uevent(sdp); - if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - wait_for_completion(&sdp->sd_wdack); + if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive) + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); - if (lm->lm_unmount) { - fs_err(sdp, "telling LM to unmount\n"); - lm->lm_unmount(sdp); + if (lm->lm_unmount) { + if (device_inactive) { + lm->lm_unmount(sdp, false); + do_withdraw(sdp); + } else { + do_withdraw(sdp); + lm->lm_unmount(sdp, false); } - set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); - fs_err(sdp, "File system withdrawn\n"); + } else { + do_withdraw(sdp); + } + + fs_err(sdp, "file system withdrawn\n"); +} + +void gfs2_withdraw(struct gfs2_sbd *sdp) +{ + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW || + sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) { + if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) + return; + dump_stack(); - clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); - smp_mb__after_atomic(); - wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); + /* + * There is no need to withdraw when the superblock hasn't been + * fully initialized, yet. + */ + if (!(sdp->sd_vfs->s_flags & SB_BORN)) + return; + fs_err(sdp, "about to withdraw this file system\n"); + schedule_work(&sdp->sd_withdraw_work); + return; } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); - - return -1; } /* @@ -378,28 +268,17 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) */ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line, - bool delayed) + const char *function, char *file, unsigned int line) { if (gfs2_withdrawn(sdp)) return; fs_err(sdp, - "fatal: assertion \"%s\" failed\n" - " function = %s, file = %s, line = %u\n", + "fatal: assertion \"%s\" failed - " + "function = %s, file = %s, line = %u\n", assertion, function, file, line); - /* - * If errors=panic was specified on mount, it won't help to delay the - * withdraw. - */ - if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) - delayed = false; - - if (delayed) - gfs2_withdraw_delayed(sdp); - else - gfs2_withdraw(sdp); + gfs2_withdraw(sdp); dump_stack(); } @@ -416,7 +295,8 @@ void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, return; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) - fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", + fs_warn(sdp, "warning: assertion \"%s\" failed - " + "function = %s, file = %s, line = %u\n", assertion, function, file, line); if (sdp->sd_args.ar_debug) @@ -425,10 +305,10 @@ void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, dump_stack(); if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) - panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + panic("GFS2: fsid=%s: warning: assertion \"%s\" failed - " + "function = %s, file = %s, line = %u\n", sdp->sd_fsname, assertion, - sdp->sd_fsname, function, file, line); + function, file, line); sdp->sd_last_warning = jiffies; } @@ -441,7 +321,8 @@ void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line) { gfs2_lm(sdp, - "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", + "fatal: filesystem consistency error - " + "function = %s, file = %s, line = %u\n", function, file, line); gfs2_withdraw(sdp); } @@ -456,9 +337,9 @@ void gfs2_consist_inode_i(struct gfs2_inode *ip, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); gfs2_lm(sdp, - "fatal: filesystem consistency error\n" - " inode = %llu %llu\n" - " function = %s, file = %s, line = %u\n", + "fatal: filesystem consistency error - " + "inode = %llu %llu, " + "function = %s, file = %s, line = %u\n", (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, function, file, line); @@ -479,9 +360,9 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); gfs2_rgrp_dump(NULL, rgd, fs_id_buf); gfs2_lm(sdp, - "fatal: filesystem consistency error\n" - " RG = %llu\n" - " function = %s, file = %s, line = %u\n", + "fatal: filesystem consistency error - " + "RG = %llu, " + "function = %s, file = %s, line = %u\n", (unsigned long long)rgd->rd_addr, function, file, line); gfs2_dump_glock(NULL, rgd->rd_gl, 1); @@ -490,46 +371,36 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, /* * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * -2 if it was already withdrawn */ -int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *type, const char *function, char *file, - unsigned int line) +void gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, + unsigned int line) { - int me; - gfs2_lm(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (%s)\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, type, + "fatal: invalid metadata block - " + "bh = %llu (bad magic number), " + "function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, function, file, line); - me = gfs2_withdraw(sdp); - return (me) ? -1 : -2; + gfs2_withdraw(sdp); } /* * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * -2 if it was already withdrawn */ -int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, - u16 type, u16 t, const char *function, - char *file, unsigned int line) +void gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + u16 type, u16 t, const char *function, + char *file, unsigned int line) { - int me; - gfs2_lm(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (type: exp=%u, found=%u)\n" - " function = %s, file = %s, line = %u\n", + "fatal: invalid metadata block - " + "bh = %llu (type: exp=%u, found=%u), " + "function = %s, file = %s, line = %u\n", (unsigned long long)bh->b_blocknr, type, t, function, file, line); - me = gfs2_withdraw(sdp); - return (me) ? -1 : -2; + gfs2_withdraw(sdp); } /* @@ -538,33 +409,29 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, * 0 if it was already withdrawn */ -int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, - unsigned int line) +void gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, + unsigned int line) { gfs2_lm(sdp, - "fatal: I/O error\n" - " function = %s, file = %s, line = %u\n", + "fatal: I/O error - " + "function = %s, file = %s, line = %u\n", function, file, line); - return gfs2_withdraw(sdp); + gfs2_withdraw(sdp); } /* - * gfs2_io_error_bh_i - Flag a buffer I/O error - * @withdraw: withdraw the filesystem + * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw */ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line, - bool withdraw) + const char *function, char *file, unsigned int line) { if (gfs2_withdrawn(sdp)) return; - fs_err(sdp, "fatal: I/O error\n" - " block = %llu\n" - " function = %s, file = %s, line = %u\n", + fs_err(sdp, "fatal: I/O error - " + "block = %llu, " + "function = %s, file = %s, line = %u\n", (unsigned long long)bh->b_blocknr, function, file, line); - if (withdraw) - gfs2_withdraw(sdp); + gfs2_withdraw(sdp); } - diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 78ec190f4155..ffcc47d6b0b4 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -37,24 +37,14 @@ do { \ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line, - bool delayed); + const char *function, char *file, unsigned int line); #define gfs2_assert_withdraw(sdp, assertion) \ ({ \ bool _bool = (assertion); \ if (unlikely(!_bool)) \ gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__, false); \ - !_bool; \ - }) - -#define gfs2_assert_withdraw_delayed(sdp, assertion) \ - ({ \ - bool _bool = (assertion); \ - if (unlikely(!_bool)) \ - gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__, true); \ + __func__, __FILE__, __LINE__); \ !_bool; \ }) @@ -91,9 +81,9 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__) -int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *type, const char *function, - char *file, unsigned int line); +void gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, + char *file, unsigned int line); static inline int gfs2_meta_check(struct gfs2_sbd *sdp, struct buffer_head *bh) @@ -108,10 +98,10 @@ static inline int gfs2_meta_check(struct gfs2_sbd *sdp, return 0; } -int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, - u16 type, u16 t, - const char *function, - char *file, unsigned int line); +void gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + u16 type, u16 t, + const char *function, + char *file, unsigned int line); static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -122,12 +112,16 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; u32 magic = be32_to_cpu(mh->mh_magic); u16 t = be32_to_cpu(mh->mh_type); - if (unlikely(magic != GFS2_MAGIC)) - return gfs2_meta_check_ii(sdp, bh, "magic number", function, - file, line); - if (unlikely(t != type)) - return gfs2_metatype_check_ii(sdp, bh, type, t, function, - file, line); + if (unlikely(magic != GFS2_MAGIC)) { + gfs2_meta_check_ii(sdp, bh, function, + file, line); + return -EIO; + } + if (unlikely(t != type)) { + gfs2_metatype_check_ii(sdp, bh, type, t, function, + file, line); + return -EIO; + } return 0; } @@ -144,28 +138,23 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, } -int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, - char *file, unsigned int line); +void gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, + char *file, unsigned int line); -extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, - bool verbose); -extern int gfs2_freeze_lock(struct gfs2_sbd *sdp, - struct gfs2_holder *freeze_gh, int caller_flags); -extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); +int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + bool verbose); +int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp); +void gfs2_freeze_unlock(struct gfs2_sbd *sdp); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line, - bool withdraw); - -#define gfs2_io_error_bh_wd(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true) + const char *function, char *file, unsigned int line); #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false) +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__) extern struct kmem_cache *gfs2_glock_cachep; @@ -190,37 +179,12 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, } /** - * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks - * @sdp: the superblock - */ -static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp) -{ - set_bit(SDF_WITHDRAWING, &sdp->sd_flags); -} - -/** - * gfs2_withdrawn - test whether the file system is withdrawing or withdrawn + * gfs2_withdrawn - test whether the file system is withdrawn * @sdp: the superblock */ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) { - return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || - test_bit(SDF_WITHDRAWING, &sdp->sd_flags); -} - -/** - * gfs2_withdrawing - check if a withdraw is pending - * @sdp: the superblock - */ -static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp) -{ - return test_bit(SDF_WITHDRAWING, &sdp->sd_flags) && - !test_bit(SDF_WITHDRAWN, &sdp->sd_flags); -} - -static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp) -{ - return test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); + return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); } #define gfs2_tune_get(sdp, field) \ @@ -228,6 +192,8 @@ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) __printf(2, 3) void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...); -int gfs2_withdraw(struct gfs2_sbd *sdp); + +void gfs2_withdraw_func(struct work_struct *work); +void gfs2_withdraw(struct gfs2_sbd *sdp); #endif /* __UTIL_DOT_H__ */ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 518c0677e12a..df9c93de94c7 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -96,30 +96,34 @@ static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh, return -EIO; for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) { - if (!GFS2_EA_REC_LEN(ea)) - goto fail; + if (!GFS2_EA_REC_LEN(ea)) { + gfs2_consist_inode(ip); + return -EIO; + } if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <= - bh->b_data + bh->b_size)) - goto fail; - if (!gfs2_eatype_valid(sdp, ea->ea_type)) - goto fail; + bh->b_data + bh->b_size)) { + gfs2_consist_inode(ip); + return -EIO; + } + if (!gfs2_eatype_valid(sdp, ea->ea_type)) { + gfs2_consist_inode(ip); + return -EIO; + } error = ea_call(ip, bh, ea, prev, data); if (error) return error; if (GFS2_EA_IS_LAST(ea)) { if ((char *)GFS2_EA2NEXT(ea) != - bh->b_data + bh->b_size) - goto fail; + bh->b_data + bh->b_size) { + gfs2_consist_inode(ip); + return -EIO; + } break; } } return error; - -fail: - gfs2_consist_inode(ip); - return -EIO; } static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) @@ -311,7 +315,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, ea->ea_num_ptrs = 0; } - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); @@ -639,7 +643,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) u64 block; int error; - error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, block, 1); @@ -701,7 +705,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; - error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, block, 1); @@ -763,7 +767,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) goto out_end_trans; - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); out_end_trans: @@ -888,7 +892,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, if (es->es_el) ea_set_remove_stuffed(ip, es->es_el); - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); @@ -1002,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, } else { u64 blk; unsigned int n = 1; - error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL); + error = gfs2_alloc_blocks(ip, &blk, &n, 0); if (error) return error; gfs2_trans_remove_revoke(sdp, blk, 1); @@ -1106,7 +1110,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) ea->ea_type = GFS2_EATYPE_UNUSED; } - ip->i_inode.i_ctime = current_time(&ip->i_inode); + inode_set_ctime_current(&ip->i_inode); __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC); gfs2_trans_end(GFS2_SB(&ip->i_inode)); @@ -1225,7 +1229,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, } static int gfs2_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -1379,7 +1383,7 @@ out: return error; } -static int ea_dealloc_block(struct gfs2_inode *ip) +static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1412,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); - if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + if (initialized) { error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_meta(ip->i_gl, dibh); @@ -1431,11 +1435,12 @@ out_gunlock: /** * gfs2_ea_dealloc - deallocate the extended attribute fork * @ip: the inode + * @initialized: xattrs have been initialized * * Returns: errno */ -int gfs2_ea_dealloc(struct gfs2_inode *ip) +int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized) { int error; @@ -1447,7 +1452,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + if (initialized) { error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; @@ -1459,7 +1464,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) } } - error = ea_dealloc_block(ip); + error = ea_dealloc_block(ip, initialized); out_quota: gfs2_quota_unhold(ip); @@ -1494,16 +1499,14 @@ static const struct xattr_handler gfs2_xattr_trusted_handler = { .set = gfs2_xattr_set, }; -const struct xattr_handler *gfs2_xattr_handlers_max[] = { +const struct xattr_handler * const gfs2_xattr_handlers_max[] = { /* GFS2_FS_FORMAT_MAX */ &gfs2_xattr_trusted_handler, /* GFS2_FS_FORMAT_MIN */ &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, NULL, }; -const struct xattr_handler **gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1; +const struct xattr_handler * const *gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1; diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index 2aed9d7d483d..3c9788e0e137 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -50,14 +50,14 @@ struct gfs2_ea_location { struct gfs2_ea_header *el_prev; }; -extern int __gfs2_xattr_set(struct inode *inode, const char *name, - const void *value, size_t size, - int flags, int type); -extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); -extern int gfs2_ea_dealloc(struct gfs2_inode *ip); +int __gfs2_xattr_set(struct inode *inode, const char *name, + const void *value, size_t size, + int flags, int type); +ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); +int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized); /* Exported to acl.c */ -extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); +int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); #endif /* __EATTR_DOT_H__ */ |
