summaryrefslogtreecommitdiff
path: root/fs/gfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/acl.h8
-rw-r--r--fs/gfs2/aops.c323
-rw-r--r--fs/gfs2/aops.h5
-rw-r--r--fs/gfs2/bmap.c181
-rw-r--r--fs/gfs2/bmap.h40
-rw-r--r--fs/gfs2/dentry.c31
-rw-r--r--fs/gfs2/dir.c54
-rw-r--r--fs/gfs2/dir.h38
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c194
-rw-r--r--fs/gfs2/glock.c1116
-rw-r--r--fs/gfs2/glock.h160
-rw-r--r--fs/gfs2/glops.c311
-rw-r--r--fs/gfs2/glops.h4
-rw-r--r--fs/gfs2/incore.h78
-rw-r--r--fs/gfs2/inode.c326
-rw-r--r--fs/gfs2/inode.h60
-rw-r--r--fs/gfs2/lock_dlm.c240
-rw-r--r--fs/gfs2/log.c160
-rw-r--r--fs/gfs2/log.h57
-rw-r--r--fs/gfs2/lops.c140
-rw-r--r--fs/gfs2/lops.h22
-rw-r--r--fs/gfs2/main.c21
-rw-r--r--fs/gfs2/meta_io.c138
-rw-r--r--fs/gfs2/meta_io.h24
-rw-r--r--fs/gfs2/ops_fstype.c313
-rw-r--r--fs/gfs2/quota.c778
-rw-r--r--fs/gfs2/quota.h44
-rw-r--r--fs/gfs2/recovery.c66
-rw-r--r--fs/gfs2/recovery.h20
-rw-r--r--fs/gfs2/rgrp.c42
-rw-r--r--fs/gfs2/rgrp.h85
-rw-r--r--fs/gfs2/super.c537
-rw-r--r--fs/gfs2/super.h51
-rw-r--r--fs/gfs2/sys.c91
-rw-r--r--fs/gfs2/trace_gfs2.h16
-rw-r--r--fs/gfs2/trans.c62
-rw-r--r--fs/gfs2/trans.h26
-rw-r--r--fs/gfs2/util.c443
-rw-r--r--fs/gfs2/util.h98
-rw-r--r--fs/gfs2/xattr.c61
-rw-r--r--fs/gfs2/xattr.h12
44 files changed, 3209 insertions, 3277 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 03c966840422..7bd231d16d4a 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
config GFS2_FS
tristate "GFS2 file system support"
+ select BUFFER_HEAD
select FS_POSIX_ACL
select CRC32
- select LIBCRC32C
select QUOTACTL
select FS_IOMAP
help
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3dcde4912413..443640e6fb9c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -109,7 +109,7 @@ out:
return error;
}
-int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct inode *inode = d_inode(dentry);
@@ -135,14 +135,14 @@ int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
+ ret = posix_acl_update_mode(&nop_mnt_idmap, inode, &mode, &acl);
if (ret)
goto unlock;
}
ret = __gfs2_set_acl(inode, acl, type);
if (!ret && mode != inode->i_mode) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode->i_mode = mode;
mark_inode_dirty(inode);
}
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b8de8c148f5c..82f5b09c04e6 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -11,9 +11,9 @@
#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
-extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu);
-extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
- struct posix_acl *acl, int type);
+struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu);
+int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct posix_acl *acl, int type);
#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e782b4f1d104..e79ad087512a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -37,27 +37,6 @@
#include "aops.h"
-void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
- unsigned int from, unsigned int len)
-{
- struct buffer_head *head = page_buffers(page);
- unsigned int bsize = head->b_size;
- struct buffer_head *bh;
- unsigned int to = from + len;
- unsigned int start, end;
-
- for (bh = head, start = 0; bh != head || !start;
- bh = bh->b_this_page, start = end) {
- end = start + bsize;
- if (end <= from)
- continue;
- if (start >= to)
- break;
- set_buffer_uptodate(bh);
- gfs2_trans_add_data(ip->i_gl, bh);
- }
-}
-
/**
* gfs2_get_block_noalloc - Fills in a buffer head with details about a block
* @inode: The inode
@@ -82,90 +61,90 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
}
/**
- * gfs2_write_jdata_page - gfs2 jdata-specific version of block_write_full_page
- * @page: The page to write
+ * gfs2_write_jdata_folio - gfs2 jdata-specific version of block_write_full_folio
+ * @folio: The folio to write
* @wbc: The writeback control
*
- * This is the same as calling block_write_full_page, but it also
+ * This is the same as calling block_write_full_folio, but it also
* writes pages outside of i_size
*/
-static int gfs2_write_jdata_page(struct page *page,
+static int gfs2_write_jdata_folio(struct folio *folio,
struct writeback_control *wbc)
{
- struct inode * const inode = page->mapping->host;
+ struct inode * const inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned offset;
/*
- * The page straddles i_size. It must be zeroed out on each and every
+ * The folio straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when mapped, and
+ * the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- offset = i_size & (PAGE_SIZE - 1);
- if (page->index == end_index && offset)
- zero_user_segment(page, offset, PAGE_SIZE);
+ if (folio_pos(folio) < i_size && i_size < folio_next_pos(folio))
+ folio_zero_segment(folio, offset_in_folio(folio, i_size),
+ folio_size(folio));
- return __block_write_full_page(inode, page, gfs2_get_block_noalloc, wbc,
- end_buffer_async_write);
+ return __block_write_full_folio(inode, folio, gfs2_get_block_noalloc,
+ wbc);
}
/**
- * __gfs2_jdata_writepage - The core of jdata writepage
- * @page: The page to write
+ * __gfs2_jdata_write_folio - The core of jdata writepage
+ * @folio: The folio to write
* @wbc: The writeback control
*
- * This is shared between writepage and writepages and implements the
- * core of the writepage operation. If a transaction is required then
- * PageChecked will have been set and the transaction will have
+ * Implements the core of write back. If a transaction is required then
+ * the checked flag will have been set and the transaction will have
* already been started before this is called.
*/
-
-static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+static int __gfs2_jdata_write_folio(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- if (PageChecked(page)) {
- ClearPageChecked(page);
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, inode->i_sb->s_blocksize,
- BIT(BH_Dirty)|BIT(BH_Uptodate));
+ if (folio_test_checked(folio)) {
+ folio_clear_checked(folio);
+ if (!folio_buffers(folio)) {
+ create_empty_buffers(folio,
+ inode->i_sb->s_blocksize,
+ BIT(BH_Dirty)|BIT(BH_Uptodate));
}
- gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
+ gfs2_trans_add_databufs(ip->i_gl, folio, 0, folio_size(folio));
}
- return gfs2_write_jdata_page(page, wbc);
+ return gfs2_write_jdata_folio(folio, wbc);
}
/**
- * gfs2_jdata_writepage - Write complete page
- * @page: Page to write
+ * gfs2_jdata_writeback - Write jdata folios to the log
+ * @mapping: The mapping to write
* @wbc: The writeback control
*
* Returns: errno
- *
*/
-
-static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+ struct folio *folio = NULL;
+ int error;
- if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
- goto out;
- if (PageChecked(page) || current->journal_info)
- goto out_ignore;
- return __gfs2_jdata_writepage(page, wbc);
+ BUG_ON(current->journal_info);
+ if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE))
+ return 0;
-out_ignore:
- redirty_page_for_writepage(wbc, page);
-out:
- unlock_page(page);
- return 0;
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+ if (folio_test_checked(folio)) {
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
+ continue;
+ }
+ error = __gfs2_jdata_write_folio(folio, wbc);
+ }
+
+ return error;
}
/**
@@ -179,99 +158,99 @@ static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
- struct iomap_writepage_ctx wpc = { };
+ struct iomap_writepage_ctx wpc = {
+ .inode = mapping->host,
+ .wbc = wbc,
+ .ops = &gfs2_writeback_ops,
+ };
int ret;
/*
- * Even if we didn't write any pages here, we might still be holding
+ * Even if we didn't write enough pages here, we might still be holding
* dirty pages in the ail. We forcibly flush the ail because we don't
* want balance_dirty_pages() to loop indefinitely trying to write out
* pages held in the ail that it can't find.
*/
- ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
- if (ret == 0)
+ ret = iomap_writepages(&wpc);
+ if (ret == 0 && wbc->nr_to_write > 0)
set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
return ret;
}
/**
- * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
+ * gfs2_write_jdata_batch - Write back a folio batch's worth of folios
* @mapping: The mapping
* @wbc: The writeback control
- * @pvec: The vector of pages
- * @nr_pages: The number of pages to write
+ * @fbatch: The batch of folios
* @done_index: Page index
*
* Returns: non-zero if loop should terminate, zero otherwise
*/
-static int gfs2_write_jdata_pagevec(struct address_space *mapping,
+static int gfs2_write_jdata_batch(struct address_space *mapping,
struct writeback_control *wbc,
- struct pagevec *pvec,
- int nr_pages,
+ struct folio_batch *fbatch,
pgoff_t *done_index)
{
struct inode *inode = mapping->host;
struct gfs2_sbd *sdp = GFS2_SB(inode);
- unsigned nrblocks = nr_pages * (PAGE_SIZE >> inode->i_blkbits);
+ unsigned nrblocks;
int i;
int ret;
+ size_t size = 0;
+ int nr_folios = folio_batch_count(fbatch);
+
+ for (i = 0; i < nr_folios; i++)
+ size += folio_size(fbatch->folios[i]);
+ nrblocks = size >> inode->i_blkbits;
ret = gfs2_trans_begin(sdp, nrblocks, nrblocks);
if (ret < 0)
return ret;
- for(i = 0; i < nr_pages; i++) {
- struct page *page = pvec->pages[i];
+ for (i = 0; i < nr_folios; i++) {
+ struct folio *folio = fbatch->folios[i];
- *done_index = page->index;
+ *done_index = folio->index;
- lock_page(page);
+ folio_lock(folio);
- if (unlikely(page->mapping != mapping)) {
+ if (unlikely(folio->mapping != mapping)) {
continue_unlock:
- unlock_page(page);
+ folio_unlock(folio);
continue;
}
- if (!PageDirty(page)) {
+ if (!folio_test_dirty(folio)) {
/* someone wrote it for us */
goto continue_unlock;
}
- if (PageWriteback(page)) {
+ if (folio_test_writeback(folio)) {
if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
else
goto continue_unlock;
}
- BUG_ON(PageWriteback(page));
- if (!clear_page_dirty_for_io(page))
+ BUG_ON(folio_test_writeback(folio));
+ if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
trace_wbc_writepage(wbc, inode_to_bdi(inode));
- ret = __gfs2_jdata_writepage(page, wbc);
+ ret = __gfs2_jdata_write_folio(folio, wbc);
if (unlikely(ret)) {
- if (ret == AOP_WRITEPAGE_ACTIVATE) {
- unlock_page(page);
- ret = 0;
- } else {
-
- /*
- * done_index is set past this page,
- * so media errors will not choke
- * background writeout for the entire
- * file. This has consequences for
- * range_cyclic semantics (ie. it may
- * not be suitable for data integrity
- * writeout).
- */
- *done_index = page->index + 1;
- ret = 1;
- break;
- }
+ /*
+ * done_index is set past this page, so media errors
+ * will not choke background writeout for the entire
+ * file. This has consequences for range_cyclic
+ * semantics (ie. it may not be suitable for data
+ * integrity writeout).
+ */
+ *done_index = folio_next_index(folio);
+ ret = 1;
+ break;
}
/*
@@ -305,8 +284,8 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
{
int ret = 0;
int done = 0;
- struct pagevec pvec;
- int nr_pages;
+ struct folio_batch fbatch;
+ int nr_folios;
pgoff_t writeback_index;
pgoff_t index;
pgoff_t end;
@@ -315,7 +294,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
int range_whole = 0;
xa_mark_t tag;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
@@ -331,27 +310,25 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */
}
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
+ tag = wbc_to_tag(wbc);
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
- nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
- tag);
- if (nr_pages == 0)
+ nr_folios = filemap_get_folios_tag(mapping, &index, end,
+ tag, &fbatch);
+ if (nr_folios == 0)
break;
- ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index);
+ ret = gfs2_write_jdata_batch(mapping, wbc, &fbatch,
+ &done_index);
if (ret)
done = 1;
if (ret > 0)
ret = 0;
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
@@ -398,43 +375,39 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
}
/**
- * stuffed_readpage - Fill in a Linux page with stuffed file data
+ * stuffed_read_folio - Fill in a Linux folio with stuffed file data
* @ip: the inode
- * @page: the page
+ * @folio: the folio
*
* Returns: errno
*/
-static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+static int stuffed_read_folio(struct gfs2_inode *ip, struct folio *folio)
{
- struct buffer_head *dibh;
- u64 dsize = i_size_read(&ip->i_inode);
- void *kaddr;
- int error;
+ struct buffer_head *dibh = NULL;
+ size_t dsize = i_size_read(&ip->i_inode);
+ void *from = NULL;
+ int error = 0;
/*
* Due to the order of unstuffing files and ->fault(), we can be
- * asked for a zero page in the case of a stuffed file being extended,
+ * asked for a zero folio in the case of a stuffed file being extended,
* so we need to supply one here. It doesn't happen often.
*/
- if (unlikely(page->index)) {
- zero_user(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- return 0;
+ if (unlikely(folio->index)) {
+ dsize = 0;
+ } else {
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error)
+ goto out;
+ from = dibh->b_data + sizeof(struct gfs2_dinode);
}
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- kaddr = kmap_atomic(page);
- memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
- memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
+ folio_fill_tail(folio, 0, from, dsize);
brelse(dibh);
- SetPageUptodate(page);
+out:
+ folio_end_read(folio, error == 0);
- return 0;
+ return error;
}
/**
@@ -447,19 +420,18 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
struct inode *inode = folio->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- int error;
+ int error = 0;
if (!gfs2_is_jdata(ip) ||
(i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) {
- error = iomap_read_folio(folio, &gfs2_iomap_ops);
+ iomap_bio_read_folio(folio, &gfs2_iomap_ops);
} else if (gfs2_is_stuffed(ip)) {
- error = stuffed_readpage(ip, &folio->page);
- folio_unlock(folio);
+ error = stuffed_read_folio(ip, folio);
} else {
error = mpage_read_folio(folio, gfs2_block_map);
}
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (gfs2_withdrawn(sdp))
return -EIO;
return error;
@@ -474,31 +446,29 @@ static int gfs2_read_folio(struct file *file, struct folio *folio)
*
*/
-int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
- unsigned size)
+ssize_t gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
+ size_t size)
{
struct address_space *mapping = ip->i_inode.i_mapping;
unsigned long index = *pos >> PAGE_SHIFT;
- unsigned offset = *pos & (PAGE_SIZE - 1);
- unsigned copied = 0;
- unsigned amt;
- struct page *page;
- void *p;
+ size_t copied = 0;
do {
- amt = size - copied;
- if (offset + size > PAGE_SIZE)
- amt = PAGE_SIZE - offset;
- page = read_cache_page(mapping, index, gfs2_read_folio, NULL);
- if (IS_ERR(page))
- return PTR_ERR(page);
- p = kmap_atomic(page);
- memcpy(buf + copied, p + offset, amt);
- kunmap_atomic(p);
- put_page(page);
- copied += amt;
- index++;
- offset = 0;
+ size_t offset, chunk;
+ struct folio *folio;
+
+ folio = read_cache_folio(mapping, index, gfs2_read_folio, NULL);
+ if (IS_ERR(folio)) {
+ if (PTR_ERR(folio) == -EINTR)
+ continue;
+ return PTR_ERR(folio);
+ }
+ offset = *pos + copied - folio_pos(folio);
+ chunk = min(size - copied, folio_size(folio) - offset);
+ memcpy_from_folio(buf + copied, folio, offset, chunk);
+ index = folio_next_index(folio);
+ folio_put(folio);
+ copied += chunk;
} while(copied < size);
(*pos) += size;
return size;
@@ -529,7 +499,7 @@ static void gfs2_readahead(struct readahead_control *rac)
else if (gfs2_is_jdata(ip))
mpage_readahead(rac, gfs2_block_map);
else
- iomap_readahead(rac, &gfs2_iomap_ops);
+ iomap_bio_readahead(rac, &gfs2_iomap_ops);
}
/**
@@ -572,7 +542,7 @@ out:
gfs2_trans_end(sdp);
}
-static bool jdata_dirty_folio(struct address_space *mapping,
+static bool gfs2_jdata_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
if (current->journal_info)
@@ -741,27 +711,26 @@ static const struct address_space_operations gfs2_aops = {
.writepages = gfs2_writepages,
.read_folio = gfs2_read_folio,
.readahead = gfs2_readahead,
- .dirty_folio = filemap_dirty_folio,
+ .dirty_folio = iomap_dirty_folio,
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
.bmap = gfs2_bmap,
- .direct_IO = noop_direct_IO,
.migrate_folio = filemap_migrate_folio,
.is_partially_uptodate = iomap_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
+ .error_remove_folio = generic_error_remove_folio,
};
static const struct address_space_operations gfs2_jdata_aops = {
- .writepage = gfs2_jdata_writepage,
.writepages = gfs2_jdata_writepages,
.read_folio = gfs2_read_folio,
.readahead = gfs2_readahead,
- .dirty_folio = jdata_dirty_folio,
+ .dirty_folio = gfs2_jdata_dirty_folio,
.bmap = gfs2_bmap,
+ .migrate_folio = buffer_migrate_folio,
.invalidate_folio = gfs2_invalidate_folio,
.release_folio = gfs2_release_folio,
.is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
+ .error_remove_folio = generic_error_remove_folio,
};
void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index ff9877a68780..bf002522a782 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -8,8 +8,7 @@
#include "incore.h"
-extern void adjust_fs_space(struct inode *inode);
-extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
- unsigned int from, unsigned int len);
+void adjust_fs_space(struct inode *inode);
+int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc);
#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e7537fd305dd..131091520de6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -43,53 +43,51 @@ struct metapath {
static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
/**
- * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
+ * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio
* @ip: the inode
* @dibh: the dinode buffer
* @block: the block number that was allocated
- * @page: The (optional) page. This is looked up if @page is NULL
+ * @folio: The folio.
*
* Returns: errno
*/
-
-static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
- u64 block, struct page *page)
+static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh,
+ u64 block, struct folio *folio)
{
struct inode *inode = &ip->i_inode;
- if (!PageUptodate(page)) {
- void *kaddr = kmap(page);
+ if (!folio_test_uptodate(folio)) {
+ void *kaddr = kmap_local_folio(folio, 0);
u64 dsize = i_size_read(inode);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
- memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
- kunmap(page);
+ memset(kaddr + dsize, 0, folio_size(folio) - dsize);
+ kunmap_local(kaddr);
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
if (gfs2_is_jdata(ip)) {
- struct buffer_head *bh;
+ struct buffer_head *bh = folio_buffers(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, BIT(inode->i_blkbits),
- BIT(BH_Uptodate));
+ if (!bh)
+ bh = create_empty_buffers(folio,
+ BIT(inode->i_blkbits), BIT(BH_Uptodate));
- bh = page_buffers(page);
if (!buffer_mapped(bh))
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
gfs2_trans_add_data(ip->i_gl, bh);
} else {
- set_page_dirty(page);
+ folio_mark_dirty(folio);
gfs2_ordered_add_inode(ip);
}
return 0;
}
-static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
+static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio)
{
struct buffer_head *bh, *dibh;
struct gfs2_dinode *di;
@@ -106,7 +104,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
and write it out to disk */
unsigned int n = 1;
- error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
+ error = gfs2_alloc_blocks(ip, &block, &n, 0);
if (error)
goto out_brelse;
if (isdir) {
@@ -118,7 +116,7 @@ static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
dibh, sizeof(struct gfs2_dinode));
brelse(bh);
} else {
- error = gfs2_unstuffer_page(ip, dibh, block, page);
+ error = gfs2_unstuffer_folio(ip, dibh, block, folio);
if (error)
goto out_brelse;
}
@@ -157,17 +155,17 @@ out_brelse:
int gfs2_unstuff_dinode(struct gfs2_inode *ip)
{
struct inode *inode = &ip->i_inode;
- struct page *page;
+ struct folio *folio;
int error;
down_write(&ip->i_rw_mutex);
- page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
- error = -ENOMEM;
- if (!page)
+ folio = filemap_grab_folio(inode->i_mapping, 0);
+ error = PTR_ERR(folio);
+ if (IS_ERR(folio))
goto out;
- error = __gfs2_unstuff_inode(ip, page);
- unlock_page(page);
- put_page(page);
+ error = __gfs2_unstuff_inode(ip, folio);
+ folio_unlock(folio);
+ folio_put(folio);
out:
up_write(&ip->i_rw_mutex);
return error;
@@ -317,6 +315,12 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
}
}
+static inline struct buffer_head *
+metapath_dibh(struct metapath *mp)
+{
+ return mp->mp_bh[0];
+}
+
static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
unsigned int x, unsigned int h)
{
@@ -415,13 +419,12 @@ static void release_metapath(struct metapath *mp)
* gfs2_extent_length - Returns length of an extent of blocks
* @bh: The metadata block
* @ptr: Current position in @bh
- * @limit: Max extent length to return
* @eob: Set to 1 if we hit "end of block"
*
* Returns: The length of the extent (minimum of one block)
*/
-static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
+static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob)
{
const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
const __be64 *first = ptr;
@@ -660,7 +663,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct buffer_head *dibh = mp->mp_bh[0];
+ struct buffer_head *dibh = metapath_dibh(mp);
u64 bn;
unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
size_t dblks = iomap->length >> inode->i_blkbits;
@@ -702,7 +705,7 @@ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
i = mp->mp_aheight;
do {
n = blks - alloced;
- ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+ ret = gfs2_alloc_blocks(ip, &bn, &n, 0);
if (ret)
goto out;
alloced += n;
@@ -913,7 +916,7 @@ unstuff:
goto do_alloc;
bh = mp->mp_bh[ip->i_height - 1];
- len = gfs2_extent_length(bh, ptr, len, &eob);
+ len = gfs2_extent_length(bh, ptr, &eob);
iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
iomap->length = len << inode->i_blkbits;
@@ -956,36 +959,56 @@ hole_found:
goto out;
}
-static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
- unsigned len)
+static struct folio *
+gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
{
+ struct inode *inode = iter->inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
unsigned int blockmask = i_blocksize(inode) - 1;
struct gfs2_sbd *sdp = GFS2_SB(inode);
unsigned int blocks;
+ struct folio *folio;
+ int status;
+
+ if (!gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip))
+ return iomap_get_folio(iter, pos, len);
blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
- return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
+ status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
+ if (status)
+ return ERR_PTR(status);
+
+ folio = iomap_get_folio(iter, pos, len);
+ if (IS_ERR(folio))
+ gfs2_trans_end(sdp);
+ return folio;
}
-static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
- unsigned copied, struct page *page)
+static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
+ unsigned copied, struct folio *folio)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- if (page && !gfs2_is_stuffed(ip))
- gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+ if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip))
+ gfs2_trans_add_databufs(ip->i_gl, folio,
+ offset_in_folio(folio, pos),
+ copied);
- if (tr->tr_num_buf_new)
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ folio_unlock(folio);
+ folio_put(folio);
- gfs2_trans_end(sdp);
+ if (gfs2_is_jdata(ip) || gfs2_is_stuffed(ip)) {
+ if (tr->tr_num_buf_new)
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ gfs2_trans_end(sdp);
+ }
}
-static const struct iomap_page_ops gfs2_iomap_page_ops = {
- .page_prepare = gfs2_iomap_page_prepare,
- .page_done = gfs2_iomap_page_done,
+const struct iomap_write_ops gfs2_iomap_write_ops = {
+ .get_folio = gfs2_iomap_get_folio,
+ .put_folio = gfs2_iomap_put_folio,
};
static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
@@ -1060,8 +1083,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
gfs2_trans_end(sdp);
}
- if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
- iomap->page_ops = &gfs2_iomap_page_ops;
return 0;
out_trans_end:
@@ -1277,13 +1298,16 @@ int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
/*
* NOTE: Never call gfs2_block_zero_range with an open transaction because it
* uses iomap write to perform its actions, which begin their own transactions
- * (iomap_begin, page_prepare, etc.)
+ * (iomap_begin, get_folio, etc.)
*/
-static int gfs2_block_zero_range(struct inode *inode, loff_t from,
- unsigned int length)
+static int gfs2_block_zero_range(struct inode *inode, loff_t from, loff_t length)
{
BUG_ON(current->journal_info);
- return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
+ if (from >= inode->i_size)
+ return 0;
+ length = min(length, inode->i_size - from);
+ return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
+ &gfs2_iomap_write_ops, NULL);
}
#define GFS2_JTRUNC_REVOKES 8192
@@ -1372,7 +1396,7 @@ static int trunc_start(struct inode *inode, u64 newsize)
ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
i_size_write(inode, newsize);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_dinode_out(ip, dibh->b_data);
if (journaled)
@@ -1569,8 +1593,7 @@ out_unlock:
/* Every transaction boundary, we rewrite the dinode
to keep its di_blocks current in case of failure. */
- ip->i_inode.i_mtime = ip->i_inode.i_ctime =
- current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1702,7 +1725,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
struct buffer_head *dibh, *bh;
struct gfs2_holder rd_gh;
unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
- u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
+ unsigned int bsize = 1 << bsize_shift;
+ u64 lblock = (offset + bsize - 1) >> bsize_shift;
__u16 start_list[GFS2_MAX_META_HEIGHT];
__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
unsigned int start_aligned, end_aligned;
@@ -1713,10 +1737,10 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
u64 prev_bnr = 0;
__be64 *start, *end;
- if (offset >= maxsize) {
+ if (offset + bsize - 1 >= maxsize) {
/*
- * The starting point lies beyond the allocated meta-data;
- * there are no blocks do deallocate.
+ * The starting point lies beyond the allocated metadata;
+ * there are no blocks to deallocate.
*/
return 0;
}
@@ -1810,7 +1834,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
prev_bnr != bh->b_blocknr)) {
- fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
+ fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, "
"s_h:%u, mp_h:%u\n",
(unsigned long long)ip->i_no_addr,
prev_bnr, ip->i_height, strip_h, mp_h);
@@ -1936,7 +1960,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
gfs2_statfs_change(sdp, 0, +btotal, 0);
gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
ip->i_inode.i_gid);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
@@ -1979,7 +2003,7 @@ static int trunc_end(struct gfs2_inode *ip)
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
gfs2_ordered_del_inode(ip);
}
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -2021,14 +2045,6 @@ static int do_shrink(struct inode *inode, u64 newsize)
return error;
}
-void gfs2_trim_blocks(struct inode *inode)
-{
- int ret;
-
- ret = do_shrink(inode, inode->i_size);
- WARN_ON(ret != 0);
-}
-
/**
* do_grow - Touch and update inode size
* @inode: The inode
@@ -2088,7 +2104,7 @@ static int do_grow(struct inode *inode, u64 size)
goto do_end_trans;
truncate_setsize(inode, size);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -2456,23 +2472,26 @@ out:
return error;
}
-static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
- loff_t offset)
+static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc,
+ struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
{
- int ret;
-
- if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
+ if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode))))
return -EIO;
- if (offset >= wpc->iomap.offset &&
- offset < wpc->iomap.offset + wpc->iomap.length)
- return 0;
+ if (offset < wpc->iomap.offset ||
+ offset >= wpc->iomap.offset + wpc->iomap.length) {
+ int ret;
- memset(&wpc->iomap, 0, sizeof(wpc->iomap));
- ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
- return ret;
+ memset(&wpc->iomap, 0, sizeof(wpc->iomap));
+ ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap);
+ if (ret)
+ return ret;
+ }
+
+ return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
}
const struct iomap_writeback_ops gfs2_writeback_ops = {
- .map_blocks = gfs2_map_blocks,
+ .writeback_range = gfs2_writeback_range,
+ .writeback_submit = iomap_ioend_writeback_submit,
};
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 53cce6c08e81..6cdc72dd55a3 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -44,27 +44,27 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
}
extern const struct iomap_ops gfs2_iomap_ops;
+extern const struct iomap_write_ops gfs2_iomap_write_ops;
extern const struct iomap_writeback_ops gfs2_writeback_ops;
-extern int gfs2_unstuff_dinode(struct gfs2_inode *ip);
-extern int gfs2_block_map(struct inode *inode, sector_t lblock,
- struct buffer_head *bh, int create);
-extern int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
- struct iomap *iomap);
-extern int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
- struct iomap *iomap);
-extern int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
- unsigned int *extlen);
-extern int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
- unsigned *extlen, bool *new);
-extern int gfs2_setattr_size(struct inode *inode, u64 size);
-extern void gfs2_trim_blocks(struct inode *inode);
-extern int gfs2_truncatei_resume(struct gfs2_inode *ip);
-extern int gfs2_file_dealloc(struct gfs2_inode *ip);
-extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
- unsigned int len);
-extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
-extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
-extern int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length);
+int gfs2_unstuff_dinode(struct gfs2_inode *ip);
+int gfs2_block_map(struct inode *inode, sector_t lblock,
+ struct buffer_head *bh, int create);
+int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap);
+int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
+ struct iomap *iomap);
+int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
+ unsigned int *extlen);
+int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
+ unsigned *extlen, bool *new);
+int gfs2_setattr_size(struct inode *inode, u64 size);
+int gfs2_truncatei_resume(struct gfs2_inode *ip);
+int gfs2_file_dealloc(struct gfs2_inode *ip);
+int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
+ unsigned int len);
+int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
+void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
+int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length);
#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 2e215e8c3c88..95050e719233 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -21,7 +21,9 @@
/**
* gfs2_drevalidate - Check directory lookup consistency
- * @dentry: the mapping to check
+ * @dir: expected parent directory inode
+ * @name: expexted name
+ * @dentry: dentry to check
* @flags: lookup flags
*
* Check to make sure the lookup necessary to arrive at this inode from its
@@ -30,50 +32,43 @@
* Returns: 1 if the dentry is ok, 0 if it isn't
*/
-static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
+static int gfs2_drevalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent;
- struct gfs2_sbd *sdp;
- struct gfs2_inode *dip;
+ struct gfs2_sbd *sdp = GFS2_SB(dir);
+ struct gfs2_inode *dip = GFS2_I(dir);
struct inode *inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip = NULL;
- int error, valid = 0;
+ int error, valid;
int had_lock = 0;
if (flags & LOOKUP_RCU)
return -ECHILD;
- parent = dget_parent(dentry);
- sdp = GFS2_SB(d_inode(parent));
- dip = GFS2_I(d_inode(parent));
inode = d_inode(dentry);
if (inode) {
if (is_bad_inode(inode))
- goto out;
+ return 0;
ip = GFS2_I(inode);
}
- if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) {
- valid = 1;
- goto out;
- }
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ return 1;
had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL);
if (!had_lock) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
- goto out;
+ return 0;
}
- error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip);
+ error = gfs2_dir_check(dir, name, ip);
valid = inode ? !error : (error == -ENOENT);
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
-out:
- dput(parent);
return valid;
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 54a6d17b8c25..509e2f0d97e7 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -60,6 +60,7 @@
#include <linux/crc32.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
+#include <linux/log2.h>
#include "gfs2.h"
#include "incore.h"
@@ -130,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_inode.i_size < offset + size)
i_size_write(&ip->i_inode, offset + size);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -227,7 +228,7 @@ out:
if (ip->i_inode.i_size < offset + copied)
i_size_write(&ip->i_inode, offset + copied);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode));
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
@@ -562,15 +563,18 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
int ret = 0;
ret = gfs2_dirent_offset(GFS2_SB(inode), buf);
- if (ret < 0)
- goto consist_inode;
-
+ if (ret < 0) {
+ gfs2_consist_inode(GFS2_I(inode));
+ return ERR_PTR(-EIO);
+ }
offset = ret;
prev = NULL;
dent = buf + offset;
size = be16_to_cpu(dent->de_rec_len);
- if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1))
- goto consist_inode;
+ if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1)) {
+ gfs2_consist_inode(GFS2_I(inode));
+ return ERR_PTR(-EIO);
+ }
do {
ret = scan(dent, name, opaque);
if (ret)
@@ -582,8 +586,10 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
dent = buf + offset;
size = be16_to_cpu(dent->de_rec_len);
if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size,
- len, 0))
- goto consist_inode;
+ len, 0)) {
+ gfs2_consist_inode(GFS2_I(inode));
+ return ERR_PTR(-EIO);
+ }
} while(1);
switch(ret) {
@@ -597,10 +603,6 @@ static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
BUG_ON(ret > 0);
return ERR_PTR(ret);
}
-
-consist_inode:
- gfs2_consist_inode(GFS2_I(inode));
- return ERR_PTR(-EIO);
}
static int dirent_check_reclen(struct gfs2_inode *dip,
@@ -609,14 +611,16 @@ static int dirent_check_reclen(struct gfs2_inode *dip,
const void *ptr = d;
u16 rec_len = be16_to_cpu(d->de_rec_len);
- if (unlikely(rec_len < sizeof(struct gfs2_dirent)))
- goto broken;
+ if (unlikely(rec_len < sizeof(struct gfs2_dirent))) {
+ gfs2_consist_inode(dip);
+ return -EIO;
+ }
ptr += rec_len;
if (ptr < end_p)
return rec_len;
if (ptr == end_p)
return -ENOENT;
-broken:
+
gfs2_consist_inode(dip);
return -EIO;
}
@@ -868,7 +872,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
struct gfs2_dirent *dent;
struct timespec64 tv = current_time(inode);
- error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+ error = gfs2_alloc_blocks(ip, &bn, &n, 0);
if (error)
return NULL;
bh = gfs2_meta_new(ip->i_gl, bn);
@@ -909,7 +913,6 @@ static int dir_make_exhash(struct inode *inode)
struct qstr args;
struct buffer_head *bh, *dibh;
struct gfs2_leaf *leaf;
- int y;
u32 x;
__be64 *lp;
u64 bn;
@@ -976,9 +979,7 @@ static int dir_make_exhash(struct inode *inode)
i_size_write(inode, sdp->sd_sb.sb_bsize / 2);
gfs2_add_inode_blocks(&dip->i_inode, 1);
dip->i_diskflags |= GFS2_DIF_EXHASH;
-
- for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
- dip->i_depth = y;
+ dip->i_depth = ilog2(sdp->sd_hash_ptrs);
gfs2_dinode_out(dip, dibh->b_data);
@@ -1814,7 +1815,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
- tv = current_time(&ip->i_inode);
+ tv = inode_set_ctime_current(&ip->i_inode);
if (ip->i_diskflags & GFS2_DIF_EXHASH) {
leaf = (struct gfs2_leaf *)bh->b_data;
be16_add_cpu(&leaf->lf_entries, 1);
@@ -1825,7 +1826,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
da->bh = NULL;
brelse(bh);
ip->i_entries++;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv;
+ inode_set_mtime_to_ts(&ip->i_inode, tv);
if (S_ISDIR(nip->i_inode.i_mode))
inc_nlink(&ip->i_inode);
mark_inode_dirty(inode);
@@ -1876,7 +1877,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
const struct qstr *name = &dentry->d_name;
struct gfs2_dirent *dent, *prev = NULL;
struct buffer_head *bh;
- struct timespec64 tv = current_time(&dip->i_inode);
+ struct timespec64 tv;
/* Returns _either_ the entry (if its first in block) or the
previous entry otherwise */
@@ -1896,6 +1897,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
}
dirent_del(dip, bh, prev, dent);
+ tv = inode_set_ctime_current(&dip->i_inode);
if (dip->i_diskflags & GFS2_DIF_EXHASH) {
struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
u16 entries = be16_to_cpu(leaf->lf_entries);
@@ -1910,7 +1912,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
if (!dip->i_entries)
gfs2_consist_inode(dip);
dip->i_entries--;
- dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv;
+ inode_set_mtime_to_ts(&dip->i_inode, tv);
if (d_is_dir(dentry))
drop_nlink(&dip->i_inode);
mark_inode_dirty(&dip->i_inode);
@@ -1951,7 +1953,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
dent->de_type = cpu_to_be16(new_type);
brelse(bh);
- dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
+ inode_set_mtime_to_ts(&dip->i_inode, inode_set_ctime_current(&dip->i_inode));
mark_inode_dirty_sync(&dip->i_inode);
return 0;
}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 5b76480c17c9..25a857c78b53 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -23,32 +23,32 @@ struct gfs2_diradd {
int save_loc;
};
-extern struct inode *gfs2_dir_search(struct inode *dir,
- const struct qstr *filename,
- bool fail_on_exist);
-extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
- const struct gfs2_inode *ip);
-extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
- const struct gfs2_inode *ip, struct gfs2_diradd *da);
+struct inode *gfs2_dir_search(struct inode *dir,
+ const struct qstr *filename,
+ bool fail_on_exist);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+ const struct gfs2_inode *ip);
+int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
+ const struct gfs2_inode *ip, struct gfs2_diradd *da);
static inline void gfs2_dir_no_add(struct gfs2_diradd *da)
{
brelse(da->bh);
da->bh = NULL;
}
-extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
-extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
- struct file_ra_state *f_ra);
-extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
- const struct gfs2_inode *nip, unsigned int new_type);
+int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
+int gfs2_dir_read(struct inode *inode, struct dir_context *ctx,
+ struct file_ra_state *f_ra);
+int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
+ const struct gfs2_inode *nip, unsigned int new_type);
-extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
+int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
-extern int gfs2_diradd_alloc_required(struct inode *dir,
- const struct qstr *filename,
- struct gfs2_diradd *da);
-extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
- struct buffer_head **bhp);
-extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
+int gfs2_diradd_alloc_required(struct inode *dir,
+ const struct qstr *filename,
+ struct gfs2_diradd *da);
+int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
+ struct buffer_head **bhp);
+void gfs2_dir_hash_inval(struct gfs2_inode *ip);
static inline u32 gfs2_disk_hash(const char *data, int len)
{
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index cf40895233f5..3334c394ce9c 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -138,8 +138,6 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
return ERR_PTR(-ESTALE);
inode = gfs2_lookup_by_inum(sdp, inum->no_addr, inum->no_formal_ino,
GFS2_BLKST_DINODE);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
return d_obtain_alias(inode);
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index eea5be4fbf0e..b2d23c98c996 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
#include <linux/gfs2_ondisk.h>
#include <linux/falloc.h>
#include <linux/swap.h>
@@ -154,7 +155,7 @@ static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags)
return fsflags;
}
-int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+int gfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
struct gfs2_inode *ip = GFS2_I(inode);
@@ -235,7 +236,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
goto out;
if (!IS_IMMUTABLE(inode)) {
- error = gfs2_permission(&init_user_ns, inode, MAY_WRITE);
+ error = gfs2_permission(&nop_mnt_idmap, inode, MAY_WRITE);
if (error)
goto out;
}
@@ -250,6 +251,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
error = filemap_fdatawait(inode->i_mapping);
if (error)
goto out;
+ truncate_inode_pages(inode->i_mapping, 0);
if (new_flags & GFS2_DIF_JDATA)
gfs2_ordered_del_inode(ip);
}
@@ -259,7 +261,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
error = gfs2_meta_inode_buffer(ip, &bh);
if (error)
goto out_trans_end;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
gfs2_trans_add_meta(ip->i_gl, bh);
ip->i_diskflags = new_flags;
gfs2_dinode_out(ip, bh->b_data);
@@ -273,8 +275,8 @@ out:
return error;
}
-int gfs2_fileattr_set(struct user_namespace *mnt_userns,
- struct dentry *dentry, struct fileattr *fa)
+int gfs2_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
u32 fsflags = fa->flags, gfsflags = 0;
@@ -375,23 +377,23 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
}
/**
- * gfs2_allocate_page_backing - Allocate blocks for a write fault
- * @page: The (locked) page to allocate backing for
+ * gfs2_allocate_folio_backing - Allocate blocks for a write fault
+ * @folio: The (locked) folio to allocate backing for
* @length: Size of the allocation
*
- * We try to allocate all the blocks required for the page in one go. This
+ * We try to allocate all the blocks required for the folio in one go. This
* might fail for various reasons, so we keep trying until all the blocks to
- * back this page are allocated. If some of the blocks are already allocated,
+ * back this folio are allocated. If some of the blocks are already allocated,
* that is ok too.
*/
-static int gfs2_allocate_page_backing(struct page *page, unsigned int length)
+static int gfs2_allocate_folio_backing(struct folio *folio, size_t length)
{
- u64 pos = page_offset(page);
+ u64 pos = folio_pos(folio);
do {
struct iomap iomap = { };
- if (gfs2_iomap_alloc(page->mapping->host, pos, length, &iomap))
+ if (gfs2_iomap_alloc(folio->mapping->host, pos, length, &iomap))
return -EIO;
if (length < iomap.length)
@@ -413,16 +415,16 @@ static int gfs2_allocate_page_backing(struct page *page, unsigned int length)
static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_alloc_parms ap = { .aflags = 0, };
- u64 offset = page_offset(page);
+ struct gfs2_alloc_parms ap = {};
+ u64 pos = folio_pos(folio);
unsigned int data_blocks, ind_blocks, rblocks;
vm_fault_t ret = VM_FAULT_LOCKED;
struct gfs2_holder gh;
- unsigned int length;
+ size_t length;
loff_t size;
int err;
@@ -431,27 +433,27 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
err = gfs2_glock_nq(&gh);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_uninit;
}
- /* Check page index against inode size */
+ /* Check folio index against inode size */
size = i_size_read(inode);
- if (offset >= size) {
+ if (pos >= size) {
ret = VM_FAULT_SIGBUS;
goto out_unlock;
}
- /* Update file times before taking page lock */
+ /* Update file times before taking folio lock */
file_update_time(vmf->vma->vm_file);
- /* page is wholly or partially inside EOF */
- if (size - offset < PAGE_SIZE)
- length = size - offset;
+ /* folio is wholly or partially inside EOF */
+ if (size - pos < folio_size(folio))
+ length = size - pos;
else
- length = PAGE_SIZE;
+ length = folio_size(folio);
- gfs2_size_hint(vmf->vma->vm_file, offset, length);
+ gfs2_size_hint(vmf->vma->vm_file, pos, length);
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
@@ -462,18 +464,19 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
*/
if (!gfs2_is_stuffed(ip) &&
- !gfs2_write_alloc_required(ip, offset, length)) {
- lock_page(page);
- if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+ !gfs2_write_alloc_required(ip, pos, length)) {
+ folio_lock(folio);
+ if (!folio_test_uptodate(folio) ||
+ folio->mapping != inode->i_mapping) {
ret = VM_FAULT_NOPAGE;
- unlock_page(page);
+ folio_unlock(folio);
}
goto out_unlock;
}
err = gfs2_rindex_update(sdp);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_unlock;
}
@@ -481,12 +484,12 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
ap.target = data_blocks + ind_blocks;
err = gfs2_quota_lock_check(ip, &ap);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_unlock;
}
err = gfs2_inplace_reserve(ip, &ap);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_quota_unlock;
}
@@ -499,35 +502,35 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
}
err = gfs2_trans_begin(sdp, rblocks, 0);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_trans_fail;
}
- /* Unstuff, if required, and allocate backing blocks for page */
+ /* Unstuff, if required, and allocate backing blocks for folio */
if (gfs2_is_stuffed(ip)) {
err = gfs2_unstuff_dinode(ip);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_trans_end;
}
}
- lock_page(page);
+ folio_lock(folio);
/* If truncated, we must retry the operation, we may have raced
* with the glock demotion code.
*/
- if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+ if (!folio_test_uptodate(folio) || folio->mapping != inode->i_mapping) {
ret = VM_FAULT_NOPAGE;
goto out_page_locked;
}
- err = gfs2_allocate_page_backing(page, length);
+ err = gfs2_allocate_folio_backing(folio, length);
if (err)
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
out_page_locked:
if (ret != VM_FAULT_LOCKED)
- unlock_page(page);
+ folio_unlock(folio);
out_trans_end:
gfs2_trans_end(sdp);
out_trans_fail:
@@ -539,8 +542,8 @@ out_unlock:
out_uninit:
gfs2_holder_uninit(&gh);
if (ret == VM_FAULT_LOCKED) {
- set_page_dirty(page);
- wait_for_stable_page(page);
+ folio_mark_dirty(folio);
+ folio_wait_stable(folio);
}
sb_end_pagefault(inode->i_sb);
return ret;
@@ -557,7 +560,7 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
err = gfs2_glock_nq(&gh);
if (err) {
- ret = block_page_mkwrite_return(err);
+ ret = vmf_fs_error(err);
goto out_uninit;
}
ret = filemap_fault(vmf);
@@ -629,6 +632,9 @@ int gfs2_open_common(struct inode *inode, struct file *file)
ret = generic_file_open(inode, file);
if (ret)
return ret;
+
+ if (!gfs2_is_jdata(GFS2_I(inode)))
+ file->f_mode |= FMODE_CAN_ODIRECT;
}
fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS);
@@ -738,7 +744,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- int sync_state = inode->i_state & I_DIRTY;
+ int sync_state = inode_state_read_once(inode) & I_DIRTY;
struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0;
@@ -783,9 +789,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i,
if (!user_backed_iter(i))
return false;
+ /*
+ * Try to fault in multiple pages initially. When that doesn't result
+ * in any progress, fall back to a single page.
+ */
size = PAGE_SIZE;
offs = offset_in_page(iocb->ki_pos);
- if (*prev_count != count || !*window_size) {
+ if (*prev_count != count) {
size_t nr_dirtied;
nr_dirtied = max(current->nr_dirtied_pause -
@@ -810,7 +820,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
- * that the inode glock may be dropped, fault in the pages manually,
+ * that the inode glock should be dropped, fault in the pages manually,
* and retry.
*
* Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
@@ -869,12 +879,13 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
struct gfs2_inode *ip = GFS2_I(inode);
size_t prev_count = 0, window_size = 0;
size_t written = 0;
+ bool enough_retries;
ssize_t ret;
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
- * that the inode glock may be dropped, fault in the pages manually,
+ * that the inode glock should be dropped, fault in the pages manually,
* and retry.
*
* For writes, iomap_dio_rw only triggers manual page faults, so we
@@ -912,11 +923,17 @@ retry:
if (ret > 0)
written = ret;
+ enough_retries = prev_count == iov_iter_count(from) &&
+ window_size <= PAGE_SIZE;
if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
gfs2_glock_dq(gh);
window_size -= fault_in_iov_iter_readable(from, window_size);
- if (window_size)
- goto retry;
+ if (window_size) {
+ if (!enough_retries)
+ goto retry;
+ /* fall back to buffered I/O */
+ ret = 0;
+ }
}
out_unlock:
if (gfs2_holder_queued(gh))
@@ -940,7 +957,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
- * that the inode glock may be dropped, fault in the pages manually,
+ * that the inode glock should be dropped, fault in the pages manually,
* and retry.
*/
@@ -1007,7 +1024,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
- * that the inode glock may be dropped, fault in the pages manually,
+ * that the inode glock should be dropped, fault in the pages manually,
* and retry.
*/
@@ -1018,8 +1035,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
}
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
-retry:
if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+retry:
window_size -= fault_in_iov_iter_readable(from, window_size);
if (!window_size) {
ret = -EFAULT;
@@ -1040,15 +1057,12 @@ retry:
goto out_unlock;
}
- current->backing_dev_info = inode_to_bdi(inode);
pagefault_disable();
- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+ ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops,
+ &gfs2_iomap_write_ops, NULL);
pagefault_enable();
- current->backing_dev_info = NULL;
- if (ret > 0) {
- iocb->ki_pos += ret;
+ if (ret > 0)
written += ret;
- }
if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
@@ -1109,14 +1123,16 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret)
goto out_unlock;
- ret = file_update_time(file);
- if (ret)
- goto out_unlock;
-
if (iocb->ki_flags & IOCB_DIRECT) {
struct address_space *mapping = file->f_mapping;
ssize_t buffered, ret2;
+ /*
+ * Note that under direct I/O, we don't allow and inode
+ * timestamp updates, so we're not calling file_update_time()
+ * here.
+ */
+
ret = gfs2_file_direct_write(iocb, from, &gh);
if (ret < 0 || !iov_iter_count(from))
goto out_unlock;
@@ -1143,6 +1159,10 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!ret || ret2 > 0)
ret += ret2;
} else {
+ ret = file_update_time(file);
+ if (ret)
+ goto out_unlock;
+
ret = gfs2_file_buffered_write(iocb, from, &gh);
if (likely(ret > 0))
ret = generic_write_sync(iocb, ret);
@@ -1234,7 +1254,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
struct inode *inode = file_inode(file);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_alloc_parms ap = { .aflags = 0, };
+ struct gfs2_alloc_parms ap = {};
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
loff_t bytes, max_bytes, max_blks;
int error;
@@ -1422,25 +1442,29 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ int ret;
- if (!(fl->fl_flags & FL_POSIX))
+ if (!(fl->c.flc_flags & FL_POSIX))
return -ENOLCK;
- if (cmd == F_CANCELLK) {
- /* Hack: */
- cmd = F_SETLK;
- fl->fl_type = F_UNLCK;
- }
- if (unlikely(gfs2_withdrawn(sdp))) {
- if (fl->fl_type == F_UNLCK)
+ if (gfs2_withdrawn(sdp)) {
+ if (lock_is_unlock(fl))
locks_lock_file_wait(file, fl);
return -EIO;
}
- if (IS_GETLK(cmd))
- return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
- else if (fl->fl_type == F_UNLCK)
- return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
- else
- return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
+ down_read(&ls->ls_sem);
+ ret = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ if (cmd == F_CANCELLK)
+ ret = dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl);
+ else if (IS_GETLK(cmd))
+ ret = dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
+ else if (lock_is_unlock(fl))
+ ret = dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
+ else
+ ret = dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
+ }
+ up_read(&ls->ls_sem);
+ return ret;
}
static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh)
@@ -1469,7 +1493,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int error = 0;
int sleeptime;
- state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
+ state = lock_is_write(fl) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
flags = GL_EXACT | GL_NOPID;
if (!IS_SETLKW(cmd))
flags |= LM_FLAG_TRY_1CB;
@@ -1481,8 +1505,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (fl_gh->gh_state == state)
goto out;
locks_init_lock(&request);
- request.fl_type = F_UNLCK;
- request.fl_flags = FL_FLOCK;
+ request.c.flc_type = F_UNLCK;
+ request.c.flc_flags = FL_FLOCK;
locks_lock_file_wait(file, &request);
gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
@@ -1543,10 +1567,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
{
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
- if (fl->fl_type == F_UNLCK) {
+ if (lock_is_unlock(fl)) {
do_unflock(file, fl);
return 0;
} else {
@@ -1567,10 +1591,11 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
- .splice_read = generic_file_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
+ .fop_flags = FOP_ASYNC_LOCK,
};
const struct file_operations gfs2_dir_fops = {
@@ -1583,6 +1608,7 @@ const struct file_operations gfs2_dir_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.llseek = default_llseek,
+ .fop_flags = FOP_ASYNC_LOCK,
};
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
@@ -1598,7 +1624,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 524f3c96b9a4..92e029104d8a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -34,8 +34,8 @@
#include <linux/lockref.h>
#include <linux/rhashtable.h>
#include <linux/pid_namespace.h>
-#include <linux/fdtable.h>
#include <linux/file.h>
+#include <linux/random.h>
#include "gfs2.h"
#include "incore.h"
@@ -61,13 +61,10 @@ struct gfs2_glock_iter {
typedef void (*glock_examiner) (struct gfs2_glock * gl);
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
-static void __gfs2_glock_dq(struct gfs2_holder *gh);
-static void handle_callback(struct gfs2_glock *gl, unsigned int state,
- unsigned long delay, bool remote);
+static void request_demote(struct gfs2_glock *gl, unsigned int state,
+ unsigned long delay, bool remote);
static struct dentry *gfs2_root;
-static struct workqueue_struct *glock_workqueue;
-struct workqueue_struct *gfs2_delete_workqueue;
static LIST_HEAD(lru_list);
static atomic_t lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(lru_lock);
@@ -140,44 +137,43 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
kmem_cache_free(gfs2_glock_cachep, gl);
}
-/**
- * glock_blocked_by_withdraw - determine if we can still use a glock
- * @gl: the glock
- *
- * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
- * when we're withdrawn. For example, to maintain metadata integrity, we should
- * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
- * iopen or the transaction glocks may be safely used because none of their
- * metadata goes through the journal. So in general, we should disallow all
- * glocks that are journaled, and allow all the others. One exception is:
- * we need to allow our active journal to be promoted and demoted so others
- * may recover it and we can reacquire it when they're done.
- */
-static bool glock_blocked_by_withdraw(struct gfs2_glock *gl)
+static void __gfs2_glock_free(struct gfs2_glock *gl)
{
+ rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
+ smp_mb();
+ wake_up_glock(gl);
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl) {
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- if (likely(!gfs2_withdrawn(sdp)))
- return false;
- if (gl->gl_ops->go_flags & GLOF_NONDISK)
- return false;
- if (!sdp->sd_jdesc ||
- gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr)
- return false;
- return true;
+ __gfs2_glock_free(gl);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_kill_wait);
}
-void gfs2_glock_free(struct gfs2_glock *gl)
-{
+void gfs2_glock_free_later(struct gfs2_glock *gl) {
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0);
- rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
- smp_mb();
- wake_up_glock(gl);
- call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
+ spin_lock(&lru_lock);
+ list_add(&gl->gl_lru, &sdp->sd_dead_glocks);
+ spin_unlock(&lru_lock);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ wake_up(&sdp->sd_kill_wait);
+}
+
+static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp)
+{
+ struct list_head *list = &sdp->sd_dead_glocks;
+
+ while(!list_empty(list)) {
+ struct gfs2_glock *gl;
+
+ gl = list_first_entry(list, struct gfs2_glock, gl_lru);
+ list_del_init(&gl->gl_lru);
+ __gfs2_glock_free(gl);
+ }
}
/**
@@ -193,34 +189,9 @@ struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl)
return gl;
}
-/**
- * demote_ok - Check to see if it's ok to unlock a glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int demote_ok(const struct gfs2_glock *gl)
+static void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
{
- const struct gfs2_glock_operations *glops = gl->gl_ops;
-
- if (gl->gl_state == LM_ST_UNLOCKED)
- return 0;
- if (!list_empty(&gl->gl_holders))
- return 0;
- if (glops->go_demote_ok)
- return glops->go_demote_ok(gl);
- return 1;
-}
-
-
-void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
-{
- if (!(gl->gl_ops->go_flags & GLOF_LRU))
- return;
-
spin_lock(&lru_lock);
-
list_move_tail(&gl->gl_lru, &lru_list);
if (!test_bit(GLF_LRU, &gl->gl_flags)) {
@@ -233,9 +204,6 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
{
- if (!(gl->gl_ops->go_flags & GLOF_LRU))
- return;
-
spin_lock(&lru_lock);
if (test_bit(GLF_LRU, &gl->gl_flags)) {
list_del_init(&gl->gl_lru);
@@ -249,8 +217,10 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
* Enqueue the glock on the work queue. Passes one glock reference on to the
* work queue.
*/
-static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
- if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
+static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ if (!queue_delayed_work(sdp->sd_glock_wq, &gl->gl_work, delay)) {
/*
* We are holding the lockref spinlock, and the work was still
* queued above. The queued work (glock_work_func) takes that
@@ -262,21 +232,14 @@ static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay)
}
}
-static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
- spin_lock(&gl->gl_lockref.lock);
- __gfs2_glock_queue_work(gl, delay);
- spin_unlock(&gl->gl_lockref.lock);
-}
-
static void __gfs2_glock_put(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = gfs2_glock2aspace(gl);
lockref_mark_dead(&gl->gl_lockref);
-
- gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
+ gfs2_glock_remove_from_lru(gl);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
if (mapping) {
truncate_inode_pages_final(mapping);
@@ -287,12 +250,18 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
-/*
- * Cause the glock to be put in work queue context.
- */
-void gfs2_glock_queue_put(struct gfs2_glock *gl)
+static bool __gfs2_glock_put_or_lock(struct gfs2_glock *gl)
{
- gfs2_glock_queue_work(gl, 0);
+ if (lockref_put_or_lock(&gl->gl_lockref))
+ return true;
+ GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1);
+ if (gl->gl_state != LM_ST_UNLOCKED) {
+ gl->gl_lockref.count--;
+ gfs2_glock_add_to_lru(gl);
+ spin_unlock(&gl->gl_lockref.lock);
+ return true;
+ }
+ return false;
}
/**
@@ -303,12 +272,28 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl)
void gfs2_glock_put(struct gfs2_glock *gl)
{
- if (lockref_put_or_lock(&gl->gl_lockref))
+ if (__gfs2_glock_put_or_lock(gl))
return;
__gfs2_glock_put(gl);
}
+/*
+ * gfs2_glock_put_async - Decrement reference count without sleeping
+ * @gl: The glock to put
+ *
+ * Decrement the reference count on glock immediately unless it is the last
+ * reference. Defer putting the last reference to work queue context.
+ */
+void gfs2_glock_put_async(struct gfs2_glock *gl)
+{
+ if (__gfs2_glock_put_or_lock(gl))
+ return;
+
+ gfs2_glock_queue_work(gl, 0);
+ spin_unlock(&gl->gl_lockref.lock);
+}
+
/**
* may_grant - check if it's ok to grant a new lock
* @gl: The glock
@@ -469,14 +454,18 @@ done:
/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
- *
- * Returns: 1 if there is a blocked holder at the head of the list
*/
-static int do_promote(struct gfs2_glock *gl)
+static void do_promote(struct gfs2_glock *gl)
{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_holder *gh, *current_gh;
+ if (gfs2_withdrawn(sdp)) {
+ do_error(gl, LM_OUT_ERROR);
+ return;
+ }
+
current_gh = find_first_holder(gl);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
@@ -484,13 +473,10 @@ static int do_promote(struct gfs2_glock *gl)
if (!may_grant(gl, current_gh, gh)) {
/*
* If we get here, it means we may not grant this
- * holder for some reason. If this holder is at the
- * head of the list, it means we have a blocked holder
- * at the head, so return 1.
+ * holder for some reason.
*/
- if (list_is_first(&gh->gh_list, &gl->gl_holders))
- return 1;
- do_error(gl, 0);
+ if (current_gh)
+ do_error(gl, 0); /* Fail queued try locks */
break;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
@@ -499,7 +485,6 @@ static int do_promote(struct gfs2_glock *gl)
if (!current_gh)
current_gh = gh;
}
- return 0;
}
/**
@@ -519,6 +504,23 @@ static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
}
/**
+ * find_last_waiter - find the last gh that's waiting for the glock
+ * @gl: the glock
+ *
+ * This also is a fast way of finding out if there are any waiters.
+ */
+
+static inline struct gfs2_holder *find_last_waiter(const struct gfs2_glock *gl)
+{
+ struct gfs2_holder *gh;
+
+ if (list_empty(&gl->gl_holders))
+ return NULL;
+ gh = list_last_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
+ return test_bit(HIF_HOLDER, &gh->gh_iflags) ? NULL : gh;
+}
+
+/**
* state_change - record that the glock is now in a different state
* @gl: the glock
* @new_state: the new state
@@ -526,18 +528,6 @@ static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
static void state_change(struct gfs2_glock *gl, unsigned int new_state)
{
- int held1, held2;
-
- held1 = (gl->gl_state != LM_ST_UNLOCKED);
- held2 = (new_state != LM_ST_UNLOCKED);
-
- if (held1 != held2) {
- GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
- if (held2)
- gl->gl_lockref.count++;
- else
- gl->gl_lockref.count--;
- }
if (new_state != gl->gl_target)
/* shorten our minimum hold time */
gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
@@ -546,11 +536,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
gl->gl_tchange = jiffies;
}
-static void gfs2_set_demote(struct gfs2_glock *gl)
+static void gfs2_set_demote(int nr, struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- set_bit(GLF_DEMOTE, &gl->gl_flags);
+ set_bit(nr, &gl->gl_flags);
smp_mb();
wake_up(&sdp->sd_async_glock_wait);
}
@@ -573,31 +563,31 @@ static void gfs2_demote_wake(struct gfs2_glock *gl)
static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- struct gfs2_holder *gh;
- unsigned state = ret & LM_OUT_ST_MASK;
- spin_lock(&gl->gl_lockref.lock);
- trace_gfs2_glock_state_change(gl, state);
- state_change(gl, state);
- gh = find_first_waiter(gl);
+ if (!(ret & ~LM_OUT_ST_MASK)) {
+ unsigned state = ret & LM_OUT_ST_MASK;
+
+ trace_gfs2_glock_state_change(gl, state);
+ state_change(gl, state);
+ }
/* Demote to UN request arrived during demote to SH or DF */
if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
- state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
+ gl->gl_state != LM_ST_UNLOCKED &&
+ gl->gl_demote_state == LM_ST_UNLOCKED)
gl->gl_target = LM_ST_UNLOCKED;
/* Check for state != intended state */
- if (unlikely(state != gl->gl_target)) {
- if (gh && (ret & LM_OUT_CANCELED))
- gfs2_holder_wake(gh);
+ if (unlikely(gl->gl_state != gl->gl_target)) {
+ struct gfs2_holder *gh = find_first_waiter(gl);
+
if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
- /* move to back of queue and try next entry */
if (ret & LM_OUT_CANCELED) {
- if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
- list_move_tail(&gh->gh_list, &gl->gl_holders);
- gh = find_first_waiter(gl);
- gl->gl_target = gh->gh_state;
- goto retry;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ gl->gl_target = gl->gl_state;
+ goto out;
}
/* Some error or failed "try lock" - report it */
if ((ret & LM_OUT_ERROR) ||
@@ -607,10 +597,9 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
goto out;
}
}
- switch(state) {
+ switch(gl->gl_state) {
/* Unlocked due to conversion deadlock, try again */
case LM_ST_UNLOCKED:
-retry:
do_xmote(gl, gh, gl->gl_target);
break;
/* Conversion fails, unlock and try again */
@@ -619,18 +608,21 @@ retry:
do_xmote(gl, gh, LM_ST_UNLOCKED);
break;
default: /* Everything else */
- fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
- gl->gl_target, state);
+ fs_err(gl->gl_name.ln_sbd,
+ "glock %u:%llu requested=%u ret=%u\n",
+ gl->gl_name.ln_type, gl->gl_name.ln_number,
+ gl->gl_req, ret);
GLOCK_BUG_ON(gl, 1);
}
- spin_unlock(&gl->gl_lockref.lock);
return;
}
/* Fast path - we got what we asked for */
- if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
+ if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
+ clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
gfs2_demote_wake(gl);
- if (state != LM_ST_UNLOCKED) {
+ }
+ if (gl->gl_state != LM_ST_UNLOCKED) {
if (glops->go_xmote_bh) {
int rv;
@@ -645,18 +637,8 @@ retry:
do_promote(gl);
}
out:
- clear_bit(GLF_LOCK, &gl->gl_flags);
- spin_unlock(&gl->gl_lockref.lock);
-}
-
-static bool is_system_glock(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
-
- if (gl == m_ip->i_gl)
- return true;
- return false;
+ if (!test_bit(GLF_CANCELING, &gl->gl_flags))
+ clear_bit(GLF_LOCK, &gl->gl_flags);
}
/**
@@ -674,136 +656,86 @@ __acquires(&gl->gl_lockref.lock)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int ret;
- if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
- gh && !(gh->gh_flags & LM_FLAG_NOEXP))
- goto skip_inval;
+ /*
+ * When a filesystem is withdrawing, the remaining cluster nodes will
+ * take care of recovering the withdrawing node's journal. We only
+ * need to make sure that once we trigger remote recovery, we won't
+ * write to the shared block device anymore. This means that here,
+ *
+ * - no new writes to the filesystem must be triggered (->go_sync()).
+ *
+ * - any cached data should be discarded by calling ->go_inval(), dirty
+ * or not and journaled or unjournaled.
+ *
+ * - no more dlm locking operations should be issued (->lm_lock()).
+ */
- lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
- LM_FLAG_PRIORITY);
GLOCK_BUG_ON(gl, gl->gl_state == target);
GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
- if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
- glops->go_inval) {
- /*
- * If another process is already doing the invalidate, let that
- * finish first. The glock state machine will get back to this
- * holder again later.
- */
- if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS,
- &gl->gl_flags))
- return;
- do_error(gl, 0); /* Fail queued try locks */
- }
- gl->gl_req = target;
- set_bit(GLF_BLOCKING, &gl->gl_flags);
- if ((gl->gl_req == LM_ST_UNLOCKED) ||
- (gl->gl_state == LM_ST_EXCLUSIVE) ||
- (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
- clear_bit(GLF_BLOCKING, &gl->gl_flags);
+
+ if (!glops->go_inval || !glops->go_sync)
+ goto skip_inval;
+
spin_unlock(&gl->gl_lockref.lock);
- if (glops->go_sync) {
+ if (!gfs2_withdrawn(sdp)) {
ret = glops->go_sync(gl);
- /* If we had a problem syncing (due to io errors or whatever,
- * we should not invalidate the metadata or tell dlm to
- * release the glock to other nodes.
- */
if (ret) {
if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
- fs_err(sdp, "Error %d syncing glock \n", ret);
+ fs_err(sdp, "Error %d syncing glock\n", ret);
gfs2_dump_glock(NULL, gl, true);
+ gfs2_withdraw(sdp);
}
- goto skip_inval;
}
}
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
- /*
- * The call to go_sync should have cleared out the ail list.
- * If there are still items, we have a problem. We ought to
- * withdraw, but we can't because the withdraw code also uses
- * glocks. Warn about the error, dump the glock, then fall
- * through and wait for logd to do the withdraw for us.
- */
- if ((atomic_read(&gl->gl_ail_count) != 0) &&
- (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
- gfs2_glock_assert_warn(gl,
- !atomic_read(&gl->gl_ail_count));
- gfs2_dump_glock(NULL, gl, true);
- }
+
+ if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED)
glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
- clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
- }
+ spin_lock(&gl->gl_lockref.lock);
skip_inval:
- gfs2_glock_hold(gl);
- /*
- * Check for an error encountered since we called go_sync and go_inval.
- * If so, we can't withdraw from the glock code because the withdraw
- * code itself uses glocks (see function signal_our_withdraw) to
- * change the mount to read-only. Most importantly, we must not call
- * dlm to unlock the glock until the journal is in a known good state
- * (after journal replay) otherwise other nodes may use the object
- * (rgrp or dinode) and then later, journal replay will corrupt the
- * file system. The best we can do here is wait for the logd daemon
- * to see sd_log_error and withdraw, and in the meantime, requeue the
- * work for later.
- *
- * We make a special exception for some system glocks, such as the
- * system statfs inode glock, which needs to be granted before the
- * gfs2_quotad daemon can exit, and that exit needs to finish before
- * we can unmount the withdrawn file system.
- *
- * However, if we're just unlocking the lock (say, for unmount, when
- * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
- * then it's okay to tell dlm to unlock it.
- */
- if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
- gfs2_withdraw_delayed(sdp);
- if (glock_blocked_by_withdraw(gl) &&
- (target != LM_ST_UNLOCKED ||
- test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
- if (!is_system_glock(gl)) {
- handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */
- /*
- * Ordinarily, we would call dlm and its callback would call
- * finish_xmote, which would call state_change() to the new state.
- * Since we withdrew, we won't call dlm, so call state_change
- * manually, but to the UNLOCKED state we desire.
- */
- state_change(gl, LM_ST_UNLOCKED);
+ if (gfs2_withdrawn(sdp)) {
+ if (target != LM_ST_UNLOCKED)
+ target = LM_OUT_ERROR;
+ goto out;
+ }
+
+ if (ls->ls_ops->lm_lock) {
+ set_bit(GLF_PENDING_REPLY, &gl->gl_flags);
+ spin_unlock(&gl->gl_lockref.lock);
+ ret = ls->ls_ops->lm_lock(gl, target, gh ? gh->gh_flags : 0);
+ spin_lock(&gl->gl_lockref.lock);
+
+ if (!ret) {
+ /* The operation will be completed asynchronously. */
+ gl->gl_lockref.count++;
+ return;
+ }
+ clear_bit(GLF_PENDING_REPLY, &gl->gl_flags);
+
+ if (ret == -ENODEV) {
/*
- * We skip telling dlm to do the locking, so we won't get a
- * reply that would otherwise clear GLF_LOCK. So we clear it here.
+ * The lockspace has been released and the lock has
+ * been unlocked implicitly.
*/
- clear_bit(GLF_LOCK, &gl->gl_flags);
- clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
- gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
- goto out;
+ if (target != LM_ST_UNLOCKED) {
+ target = LM_OUT_ERROR;
+ goto out;
+ }
} else {
- clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
- }
- }
-
- if (sdp->sd_lockstruct.ls_ops->lm_lock) {
- /* lock_dlm */
- ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
- if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
- target == LM_ST_UNLOCKED &&
- test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
- finish_xmote(gl, target);
- gfs2_glock_queue_work(gl, 0);
- } else if (ret) {
fs_err(sdp, "lm_lock ret %d\n", ret);
GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
+ return;
}
- } else { /* lock_nolock */
- finish_xmote(gl, target);
- gfs2_glock_queue_work(gl, 0);
}
+
out:
- spin_lock(&gl->gl_lockref.lock);
+ /* Complete the operation now. */
+ finish_xmote(gl, target);
+ gl->gl_lockref.count++;
+ gfs2_glock_queue_work(gl, 0);
}
/**
@@ -817,15 +749,26 @@ static void run_queue(struct gfs2_glock *gl, const int nonblock)
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
{
- struct gfs2_holder *gh = NULL;
+ struct gfs2_holder *gh;
- if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
+ if (test_bit(GLF_LOCK, &gl->gl_flags))
return;
+ set_bit(GLF_LOCK, &gl->gl_flags);
+ /*
+ * The GLF_DEMOTE_IN_PROGRESS flag is only set intermittently during
+ * locking operations. We have just started a locking operation by
+ * setting the GLF_LOCK flag, so the GLF_DEMOTE_IN_PROGRESS flag must
+ * be cleared.
+ */
GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
- if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
- gl->gl_demote_state != gl->gl_state) {
+ if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
+ if (gl->gl_demote_state == gl->gl_state) {
+ gfs2_demote_wake(gl);
+ goto promote;
+ }
+
if (find_first_holder(gl))
goto out_unlock;
if (nonblock)
@@ -833,30 +776,33 @@ __acquires(&gl->gl_lockref.lock)
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
gl->gl_target = gl->gl_demote_state;
- } else {
- if (test_bit(GLF_DEMOTE, &gl->gl_flags))
- gfs2_demote_wake(gl);
- if (do_promote(gl) == 0)
- goto out_unlock;
- gh = find_first_waiter(gl);
- gl->gl_target = gh->gh_state;
- if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
- do_error(gl, 0); /* Fail queued try locks */
+ do_xmote(gl, NULL, gl->gl_target);
+ return;
}
+
+promote:
+ do_promote(gl);
+ if (find_first_holder(gl))
+ goto out_unlock;
+ gh = find_first_waiter(gl);
+ if (!gh)
+ goto out_unlock;
+ if (nonblock)
+ goto out_sched;
+ gl->gl_target = gh->gh_state;
+ if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
+ do_error(gl, 0); /* Fail queued try locks */
do_xmote(gl, gh, gl->gl_target);
return;
out_sched:
clear_bit(GLF_LOCK, &gl->gl_flags);
- smp_mb__after_atomic();
gl->gl_lockref.count++;
- __gfs2_glock_queue_work(gl, 0);
+ gfs2_glock_queue_work(gl, 0);
return;
out_unlock:
clear_bit(GLF_LOCK, &gl->gl_flags);
- smp_mb__after_atomic();
- return;
}
/**
@@ -872,17 +818,14 @@ void glock_set_object(struct gfs2_glock *gl, void *object)
prev_object = gl->gl_object;
gl->gl_object = object;
spin_unlock(&gl->gl_lockref.lock);
- if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) {
- pr_warn("glock=%u/%llx\n",
- gl->gl_name.ln_type,
- (unsigned long long)gl->gl_name.ln_number);
+ if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL))
gfs2_dump_glock(NULL, gl, true);
- }
}
/**
* glock_clear_object - clear the gl_object field of a glock
* @gl: the glock
+ * @object: object the glock currently points at
*/
void glock_clear_object(struct gfs2_glock *gl, void *object)
{
@@ -892,13 +835,8 @@ void glock_clear_object(struct gfs2_glock *gl, void *object)
prev_object = gl->gl_object;
gl->gl_object = NULL;
spin_unlock(&gl->gl_lockref.lock);
- if (gfs2_assert_warn(gl->gl_name.ln_sbd,
- prev_object == object || prev_object == NULL)) {
- pr_warn("glock=%u/%llx\n",
- gl->gl_name.ln_type,
- (unsigned long long)gl->gl_name.ln_number);
+ if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object))
gfs2_dump_glock(NULL, gl, true);
- }
}
void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
@@ -933,48 +871,76 @@ static void gfs2_glock_poke(struct gfs2_glock *gl)
gfs2_holder_uninit(&gh);
}
-static bool gfs2_try_evict(struct gfs2_glock *gl)
+static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
+{
+ struct gfs2_inode *ip;
+
+ spin_lock(&gl->gl_lockref.lock);
+ ip = gl->gl_object;
+ if (ip && !igrab(&ip->i_inode))
+ ip = NULL;
+ spin_unlock(&gl->gl_lockref.lock);
+ if (ip) {
+ wait_on_new_inode(&ip->i_inode);
+ if (is_bad_inode(&ip->i_inode)) {
+ iput(&ip->i_inode);
+ ip = NULL;
+ }
+ }
+ return ip;
+}
+
+static void gfs2_try_to_evict(struct gfs2_glock *gl)
{
struct gfs2_inode *ip;
- bool evicted = false;
/*
* If there is contention on the iopen glock and we have an inode, try
- * to grab and release the inode so that it can be evicted. This will
- * allow the remote node to go ahead and delete the inode without us
- * having to do it, which will avoid rgrp glock thrashing.
+ * to grab and release the inode so that it can be evicted. The
+ * GLF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode
+ * should not be deleted locally. This will allow the remote node to
+ * go ahead and delete the inode without us having to do it, which will
+ * avoid rgrp glock thrashing.
*
* The remote node is likely still holding the corresponding inode
* glock, so it will run before we get to verify that the delete has
- * happened below.
+ * happened below. (Verification is triggered by the call to
+ * gfs2_queue_verify_delete() in gfs2_evict_inode().)
*/
- spin_lock(&gl->gl_lockref.lock);
- ip = gl->gl_object;
- if (ip && !igrab(&ip->i_inode))
- ip = NULL;
- spin_unlock(&gl->gl_lockref.lock);
+ ip = gfs2_grab_existing_inode(gl);
if (ip) {
- gl->gl_no_formal_ino = ip->i_no_formal_ino;
- set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
+ set_bit(GLF_DEFER_DELETE, &gl->gl_flags);
d_prune_aliases(&ip->i_inode);
iput(&ip->i_inode);
+ clear_bit(GLF_DEFER_DELETE, &gl->gl_flags);
/* If the inode was evicted, gl->gl_object will now be NULL. */
- spin_lock(&gl->gl_lockref.lock);
- ip = gl->gl_object;
- if (ip) {
- clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
- if (!igrab(&ip->i_inode))
- ip = NULL;
- }
- spin_unlock(&gl->gl_lockref.lock);
+ ip = gfs2_grab_existing_inode(gl);
if (ip) {
gfs2_glock_poke(ip->i_gl);
iput(&ip->i_inode);
}
- evicted = !ip;
}
- return evicted;
+}
+
+bool gfs2_queue_try_to_evict(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags))
+ return false;
+ return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0);
+}
+
+bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ unsigned long delay;
+
+ if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags))
+ return false;
+ delay = later ? HZ + get_random_long() % (HZ * 9) : 0;
+ return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay);
}
static void delete_work_func(struct work_struct *work)
@@ -982,49 +948,34 @@ static void delete_work_func(struct work_struct *work)
struct delayed_work *dwork = to_delayed_work(work);
struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct inode *inode;
- u64 no_addr = gl->gl_name.ln_number;
+ bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags);
- spin_lock(&gl->gl_lockref.lock);
- clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
- spin_unlock(&gl->gl_lockref.lock);
+ /*
+ * Check for the GLF_VERIFY_DELETE above: this ensures that we won't
+ * immediately process GLF_VERIFY_DELETE work that the below call to
+ * gfs2_try_to_evict() queues.
+ */
- if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
- /*
- * If we can evict the inode, give the remote node trying to
- * delete the inode some time before verifying that the delete
- * has happened. Otherwise, if we cause contention on the inode glock
- * immediately, the remote node will think that we still have
- * the inode in use, and so it will give up waiting.
- *
- * If we can't evict the inode, signal to the remote node that
- * the inode is still in use. We'll later try to delete the
- * inode locally in gfs2_evict_inode.
- *
- * FIXME: We only need to verify that the remote node has
- * deleted the inode because nodes before this remote delete
- * rework won't cooperate. At a later time, when we no longer
- * care about compatibility with such nodes, we can skip this
- * step entirely.
- */
- if (gfs2_try_evict(gl)) {
- if (gfs2_queue_delete_work(gl, 5 * HZ))
+ if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags))
+ gfs2_try_to_evict(gl);
+
+ if (verify_delete) {
+ u64 no_addr = gl->gl_name.ln_number;
+ struct inode *inode;
+
+ inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
+ GFS2_BLKST_UNLINKED);
+ if (IS_ERR(inode)) {
+ if (PTR_ERR(inode) == -EAGAIN &&
+ !test_bit(SDF_KILL, &sdp->sd_flags) &&
+ gfs2_queue_verify_delete(gl, true))
return;
+ } else {
+ d_prune_aliases(inode);
+ iput(inode);
}
- goto out;
}
- inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
- GFS2_BLKST_UNLINKED);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -EAGAIN &&
- (gfs2_queue_delete_work(gl, 5 * HZ)))
- return;
- } else {
- d_prune_aliases(inode);
- iput(inode);
- }
-out:
gfs2_glock_put(gl);
}
@@ -1034,43 +985,44 @@ static void glock_work_func(struct work_struct *work)
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
unsigned int drop_refs = 1;
- if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
+ spin_lock(&gl->gl_lockref.lock);
+ if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) {
+ clear_bit(GLF_HAVE_REPLY, &gl->gl_flags);
finish_xmote(gl, gl->gl_reply);
drop_refs++;
}
- spin_lock(&gl->gl_lockref.lock);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
gl->gl_state != LM_ST_UNLOCKED &&
gl->gl_demote_state != LM_ST_EXCLUSIVE) {
- unsigned long holdtime, now = jiffies;
+ if (gl->gl_name.ln_type == LM_TYPE_INODE) {
+ unsigned long holdtime, now = jiffies;
- holdtime = gl->gl_tchange + gl->gl_hold_time;
- if (time_before(now, holdtime))
- delay = holdtime - now;
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
+ if (time_before(now, holdtime))
+ delay = holdtime - now;
+ }
if (!delay) {
clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
- gfs2_set_demote(gl);
+ gfs2_set_demote(GLF_DEMOTE, gl);
}
}
run_queue(gl, 0);
if (delay) {
/* Keep one glock reference for the work we requeue. */
drop_refs--;
- if (gl->gl_name.ln_type != LM_TYPE_INODE)
- delay = 0;
- __gfs2_glock_queue_work(gl, delay);
+ gfs2_glock_queue_work(gl, delay);
}
- /*
- * Drop the remaining glock references manually here. (Mind that
- * __gfs2_glock_queue_work depends on the lockref spinlock begin held
- * here as well.)
- */
+ /* Drop the remaining glock references manually. */
+ GLOCK_BUG_ON(gl, gl->gl_lockref.count < drop_refs);
gl->gl_lockref.count -= drop_refs;
if (!gl->gl_lockref.count) {
- __gfs2_glock_put(gl);
- return;
+ if (gl->gl_state == LM_ST_UNLOCKED) {
+ __gfs2_glock_put(gl);
+ return;
+ }
+ gfs2_glock_add_to_lru(gl);
}
spin_unlock(&gl->gl_lockref.lock);
}
@@ -1106,6 +1058,8 @@ again:
out:
rcu_read_unlock();
finish_wait(wq, &wait.wait);
+ if (gl)
+ gfs2_glock_remove_from_lru(gl);
return gl;
}
@@ -1126,19 +1080,15 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops, int create,
struct gfs2_glock **glp)
{
- struct super_block *s = sdp->sd_vfs;
struct lm_lockname name = { .ln_number = number,
.ln_type = glops->go_type,
.ln_sbd = sdp };
struct gfs2_glock *gl, *tmp;
struct address_space *mapping;
- int ret = 0;
gl = find_insert_glock(&name, NULL);
- if (gl) {
- *glp = gl;
- return 0;
- }
+ if (gl)
+ goto found;
if (!create)
return -ENOENT;
@@ -1166,10 +1116,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
atomic_inc(&sdp->sd_glock_disposal);
gl->gl_node.next = NULL;
- gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0;
+ gl->gl_flags = BIT(GLF_INITIAL);
+ if (glops->go_instantiate)
+ gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED);
gl->gl_name = name;
+ lockref_init(&gl->gl_lockref);
lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
- gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
@@ -1189,32 +1141,31 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping = gfs2_glock2aspace(gl);
if (mapping) {
+ gfp_t gfp_mask;
+
mapping->a_ops = &gfs2_meta_aops;
- mapping->host = s->s_bdev->bd_inode;
+ mapping->host = sdp->sd_inode;
mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->private_data = NULL;
+ gfp_mask = mapping_gfp_mask(sdp->sd_inode->i_mapping);
+ mapping_set_gfp_mask(mapping, gfp_mask);
+ mapping->i_private_data = NULL;
mapping->writeback_index = 0;
}
tmp = find_insert_glock(&name, gl);
- if (!tmp) {
- *glp = gl;
- goto out;
- }
- if (IS_ERR(tmp)) {
- ret = PTR_ERR(tmp);
- goto out_free;
- }
- *glp = tmp;
+ if (tmp) {
+ gfs2_glock_dealloc(&gl->gl_rcu);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_kill_wait);
-out_free:
- gfs2_glock_dealloc(&gl->gl_rcu);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+ gl = tmp;
+ }
-out:
- return ret;
+found:
+ *glp = gl;
+ return 0;
}
/**
@@ -1223,7 +1174,7 @@ out:
* @state: the state we're requesting
* @flags: the modifier flags
* @gh: the holder structure
- *
+ * @ip: caller's return address for debugging
*/
void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
@@ -1384,7 +1335,7 @@ out:
}
/**
- * handle_callback - process a demote request
+ * request_demote - process a demote request
* @gl: the glock
* @state: the state the caller wants us to change to
* @delay: zero to demote immediately; otherwise pending demote
@@ -1394,13 +1345,10 @@ out:
* practise: LM_ST_SHARED and LM_ST_UNLOCKED
*/
-static void handle_callback(struct gfs2_glock *gl, unsigned int state,
- unsigned long delay, bool remote)
+static void request_demote(struct gfs2_glock *gl, unsigned int state,
+ unsigned long delay, bool remote)
{
- if (delay)
- set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
- else
- gfs2_set_demote(gl);
+ gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl);
if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
gl->gl_demote_state = state;
gl->gl_demote_time = jiffies;
@@ -1432,13 +1380,29 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
va_end(args);
}
+static bool gfs2_should_queue_trylock(struct gfs2_glock *gl,
+ struct gfs2_holder *gh)
+{
+ struct gfs2_holder *current_gh, *gh2;
+
+ current_gh = find_first_holder(gl);
+ if (current_gh && !may_grant(gl, current_gh, gh))
+ return false;
+
+ list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
+ if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
+ continue;
+ if (!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
+ return false;
+ }
+ return true;
+}
+
static inline bool pid_is_meaningful(const struct gfs2_holder *gh)
{
if (!(gh->gh_flags & GL_NOPID))
return true;
- if (gh->gh_state == LM_ST_UNLOCKED)
- return true;
- return false;
+ return !test_bit(HIF_HOLDER, &gh->gh_iflags);
}
/**
@@ -1452,28 +1416,20 @@ static inline bool pid_is_meaningful(const struct gfs2_holder *gh)
*/
static inline void add_to_queue(struct gfs2_holder *gh)
-__releases(&gl->gl_lockref.lock)
-__acquires(&gl->gl_lockref.lock)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct list_head *insert_pt = NULL;
struct gfs2_holder *gh2;
- int try_futile = 0;
GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
GLOCK_BUG_ON(gl, true);
- if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
- if (test_bit(GLF_LOCK, &gl->gl_flags)) {
- struct gfs2_holder *current_gh;
-
- current_gh = find_first_holder(gl);
- try_futile = !may_grant(gl, current_gh, gh);
- }
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
- goto fail;
+ if ((gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
+ !gfs2_should_queue_trylock(gl, gh)) {
+ gh->gh_error = GLR_TRYFAILED;
+ gfs2_holder_wake(gh);
+ return;
}
list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
@@ -1485,37 +1441,10 @@ __acquires(&gl->gl_lockref.lock)
continue;
goto trap_recursive;
}
- list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
- if (try_futile &&
- !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
-fail:
- gh->gh_error = GLR_TRYFAILED;
- gfs2_holder_wake(gh);
- return;
- }
- if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
- continue;
- if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
- insert_pt = &gh2->gh_list;
- }
trace_gfs2_glock_queue(gh, 1);
gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
- if (likely(insert_pt == NULL)) {
- list_add_tail(&gh->gh_list, &gl->gl_holders);
- if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
- goto do_cancel;
- return;
- }
- list_add_tail(&gh->gh_list, insert_pt);
-do_cancel:
- gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
- if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
- spin_unlock(&gl->gl_lockref.lock);
- if (sdp->sd_lockstruct.ls_ops->lm_cancel)
- sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
- spin_lock(&gl->gl_lockref.lock);
- }
+ list_add_tail(&gh->gh_list, &gl->gl_holders);
return;
trap_recursive:
@@ -1543,26 +1472,44 @@ trap_recursive:
int gfs2_glock_nq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- int error = 0;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ int error;
- if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
+ if (gfs2_withdrawn(sdp))
return -EIO;
- if (test_bit(GLF_LRU, &gl->gl_flags))
- gfs2_glock_remove_from_lru(gl);
+ if (gh->gh_flags & GL_NOBLOCK) {
+ struct gfs2_holder *current_gh;
+
+ error = -ECHILD;
+ spin_lock(&gl->gl_lockref.lock);
+ if (find_last_waiter(gl))
+ goto unlock;
+ current_gh = find_first_holder(gl);
+ if (!may_grant(gl, current_gh, gh))
+ goto unlock;
+ set_bit(HIF_HOLDER, &gh->gh_iflags);
+ list_add_tail(&gh->gh_list, &gl->gl_holders);
+ trace_gfs2_promote(gh);
+ error = 0;
+unlock:
+ spin_unlock(&gl->gl_lockref.lock);
+ return error;
+ }
gh->gh_error = 0;
spin_lock(&gl->gl_lockref.lock);
add_to_queue(gh);
- if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
- test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ if (unlikely((LM_FLAG_RECOVER & gh->gh_flags) &&
+ test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) {
+ set_bit(GLF_HAVE_REPLY, &gl->gl_flags);
gl->gl_lockref.count++;
- __gfs2_glock_queue_work(gl, 0);
+ gfs2_glock_queue_work(gl, 0);
}
run_queue(gl, 1);
spin_unlock(&gl->gl_lockref.lock);
+ error = 0;
if (!(gh->gh_flags & GL_ASYNC))
error = gfs2_glock_wait(gh);
@@ -1581,12 +1528,6 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
}
-static inline bool needs_demote(struct gfs2_glock *gl)
-{
- return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
- test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
-}
-
static void __gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
@@ -1595,11 +1536,11 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
/*
* This holder should not be cached, so mark it for demote.
- * Note: this should be done before the check for needs_demote
- * below.
+ * Note: this should be done before the glock_needs_demote
+ * check below.
*/
if (gh->gh_flags & GL_NOCACHE)
- handle_callback(gl, LM_ST_UNLOCKED, 0, false);
+ request_demote(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
@@ -1609,21 +1550,18 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
* If there hasn't been a demote request we are done.
* (Let the remaining holders, if any, keep holding it.)
*/
- if (!needs_demote(gl)) {
+ if (!glock_needs_demote(gl)) {
if (list_empty(&gl->gl_holders))
fast_path = 1;
}
- if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
- gfs2_glock_add_to_lru(gl);
-
if (unlikely(!fast_path)) {
gl->gl_lockref.count++;
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags) &&
gl->gl_name.ln_type == LM_TYPE_INODE)
delay = gl->gl_hold_time;
- __gfs2_glock_queue_work(gl, delay);
+ gfs2_glock_queue_work(gl, delay);
}
}
@@ -1635,7 +1573,6 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
spin_lock(&gl->gl_lockref.lock);
if (!gfs2_holder_queued(gh)) {
@@ -1647,29 +1584,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
}
if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
- !test_bit(HIF_HOLDER, &gh->gh_iflags)) {
+ !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
+ test_bit(GLF_LOCK, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+ !test_bit(GLF_CANCELING, &gl->gl_flags)) {
+ set_bit(GLF_CANCELING, &gl->gl_flags);
spin_unlock(&gl->gl_lockref.lock);
gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl);
wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
- }
-
- /*
- * If we're in the process of file system withdraw, we cannot just
- * dequeue any glocks until our journal is recovered, lest we introduce
- * file system corruption. We need two exceptions to this rule: We need
- * to allow unlocking of nondisk glocks and the glock for our own
- * journal that needs recovery.
- */
- if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
- glock_blocked_by_withdraw(gl) &&
- gh->gh_gl != sdp->sd_jinode_gl) {
- sdp->sd_glock_dqs_held++;
- spin_unlock(&gl->gl_lockref.lock);
- might_sleep();
- wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
- TASK_UNINTERRUPTIBLE);
- spin_lock(&gl->gl_lockref.lock);
+ clear_bit(GLF_CANCELING, &gl->gl_flags);
+ clear_bit(GLF_LOCK, &gl->gl_flags);
+ if (!gfs2_holder_queued(gh))
+ goto out;
}
__gfs2_glock_dq(gh);
@@ -1833,21 +1760,23 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
unsigned long delay = 0;
- unsigned long holdtime;
- unsigned long now = jiffies;
gfs2_glock_hold(gl);
spin_lock(&gl->gl_lockref.lock);
- holdtime = gl->gl_tchange + gl->gl_hold_time;
if (!list_empty(&gl->gl_holders) &&
gl->gl_name.ln_type == LM_TYPE_INODE) {
+ unsigned long now = jiffies;
+ unsigned long holdtime;
+
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
+
if (time_before(now, holdtime))
delay = holdtime - now;
- if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+ if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags))
delay = gl->gl_hold_time;
}
- handle_callback(gl, state, delay, true);
- __gfs2_glock_queue_work(gl, delay);
+ request_demote(gl, state, delay, true);
+ gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock);
}
@@ -1857,7 +1786,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
*
* Glocks are not frozen if (a) the result of the dlm operation is
* an error, (b) the locking operation was an unlock operation or
- * (c) if there is a "noexp" flagged request anywhere in the queue
+ * (c) if there is a "recover" flagged request anywhere in the queue
*
* Returns: 1 if freezing should occur, 0 otherwise
*/
@@ -1874,7 +1803,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
- if (LM_FLAG_NOEXP & gh->gh_flags)
+ if (LM_FLAG_RECOVER & gh->gh_flags)
return 0;
}
@@ -1895,19 +1824,20 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
spin_lock(&gl->gl_lockref.lock);
+ clear_bit(GLF_PENDING_REPLY, &gl->gl_flags);
gl->gl_reply = ret;
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
if (gfs2_should_freeze(gl)) {
- set_bit(GLF_FROZEN, &gl->gl_flags);
+ set_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags);
spin_unlock(&gl->gl_lockref.lock);
return;
}
}
gl->gl_lockref.count++;
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
- __gfs2_glock_queue_work(gl, 0);
+ set_bit(GLF_HAVE_REPLY, &gl->gl_flags);
+ gfs2_glock_queue_work(gl, 0);
spin_unlock(&gl->gl_lockref.lock);
}
@@ -1927,6 +1857,16 @@ static int glock_cmp(void *priv, const struct list_head *a,
return 0;
}
+static bool can_free_glock(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ return !test_bit(GLF_LOCK, &gl->gl_flags) &&
+ !gl->gl_lockref.count &&
+ (!test_bit(GLF_LFLUSH, &gl->gl_flags) ||
+ test_bit(SDF_KILL, &sdp->sd_flags));
+}
+
/**
* gfs2_dispose_glock_lru - Demote a list of glocks
* @list: The list to dispose of
@@ -1941,37 +1881,38 @@ static int glock_cmp(void *priv, const struct list_head *a,
* private)
*/
-static void gfs2_dispose_glock_lru(struct list_head *list)
+static unsigned long gfs2_dispose_glock_lru(struct list_head *list)
__releases(&lru_lock)
__acquires(&lru_lock)
{
struct gfs2_glock *gl;
+ unsigned long freed = 0;
list_sort(NULL, list, glock_cmp);
while(!list_empty(list)) {
gl = list_first_entry(list, struct gfs2_glock, gl_lru);
- list_del_init(&gl->gl_lru);
- clear_bit(GLF_LRU, &gl->gl_flags);
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
- list_add(&gl->gl_lru, &lru_list);
- set_bit(GLF_LRU, &gl->gl_flags);
- atomic_inc(&lru_count);
+ list_move(&gl->gl_lru, &lru_list);
continue;
}
- if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+ if (!can_free_glock(gl)) {
spin_unlock(&gl->gl_lockref.lock);
goto add_back_to_lru;
}
+ list_del_init(&gl->gl_lru);
+ atomic_dec(&lru_count);
+ clear_bit(GLF_LRU, &gl->gl_flags);
+ freed++;
gl->gl_lockref.count++;
- if (demote_ok(gl))
- handle_callback(gl, LM_ST_UNLOCKED, 0, false);
- WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
- __gfs2_glock_queue_work(gl, 0);
+ if (gl->gl_state != LM_ST_UNLOCKED)
+ request_demote(gl, LM_ST_UNLOCKED, 0, false);
+ gfs2_glock_queue_work(gl, 0);
spin_unlock(&gl->gl_lockref.lock);
cond_resched_lock(&lru_lock);
}
+ return freed;
}
/**
@@ -1983,30 +1924,21 @@ add_back_to_lru:
* gfs2_dispose_glock_lru() above.
*/
-static long gfs2_scan_glock_lru(int nr)
+static unsigned long gfs2_scan_glock_lru(unsigned long nr)
{
- struct gfs2_glock *gl;
- LIST_HEAD(skipped);
+ struct gfs2_glock *gl, *next;
LIST_HEAD(dispose);
- long freed = 0;
+ unsigned long freed = 0;
spin_lock(&lru_lock);
- while ((nr-- >= 0) && !list_empty(&lru_list)) {
- gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru);
-
- /* Test for being demotable */
- if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
+ list_for_each_entry_safe(gl, next, &lru_list, gl_lru) {
+ if (!nr--)
+ break;
+ if (can_free_glock(gl))
list_move(&gl->gl_lru, &dispose);
- atomic_dec(&lru_count);
- freed++;
- continue;
- }
-
- list_move(&gl->gl_lru, &skipped);
}
- list_splice(&skipped, &lru_list);
if (!list_empty(&dispose))
- gfs2_dispose_glock_lru(&dispose);
+ freed = gfs2_dispose_glock_lru(&dispose);
spin_unlock(&lru_lock);
return freed;
@@ -2026,11 +1958,7 @@ static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
return vfs_pressure_ratio(atomic_read(&lru_count));
}
-static struct shrinker glock_shrinker = {
- .seeks = DEFAULT_SEEKS,
- .count_objects = gfs2_glock_shrink_count,
- .scan_objects = gfs2_glock_shrink_scan,
-};
+static struct shrinker *glock_shrinker;
/**
* glock_hash_walk - Call a function for glock in a hash bucket
@@ -2063,37 +1991,21 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
rhashtable_walk_exit(&iter);
}
-bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
-{
- bool queued;
-
- spin_lock(&gl->gl_lockref.lock);
- queued = queue_delayed_work(gfs2_delete_workqueue,
- &gl->gl_delete, delay);
- if (queued)
- set_bit(GLF_PENDING_DELETE, &gl->gl_flags);
- spin_unlock(&gl->gl_lockref.lock);
- return queued;
-}
-
void gfs2_cancel_delete_work(struct gfs2_glock *gl)
{
- if (cancel_delayed_work(&gl->gl_delete)) {
- clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+ clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags);
+ clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags);
+ if (cancel_delayed_work(&gl->gl_delete))
gfs2_glock_put(gl);
- }
-}
-
-bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
-{
- return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
}
static void flush_delete_work(struct gfs2_glock *gl)
{
if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
if (cancel_delayed_work(&gl->gl_delete)) {
- queue_delayed_work(gfs2_delete_workqueue,
+ queue_delayed_work(sdp->sd_delete_wq,
&gl->gl_delete, 0);
}
}
@@ -2102,7 +2014,7 @@ static void flush_delete_work(struct gfs2_glock *gl)
void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
{
glock_hash_walk(flush_delete_work, sdp);
- flush_workqueue(gfs2_delete_workqueue);
+ flush_workqueue(sdp->sd_delete_wq);
}
/**
@@ -2113,12 +2025,16 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
static void thaw_glock(struct gfs2_glock *gl)
{
- if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ if (!test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))
return;
if (!lockref_get_not_dead(&gl->gl_lockref))
return;
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+
+ gfs2_glock_remove_from_lru(gl);
+ spin_lock(&gl->gl_lockref.lock);
+ set_bit(GLF_HAVE_REPLY, &gl->gl_flags);
gfs2_glock_queue_work(gl, 0);
+ spin_unlock(&gl->gl_lockref.lock);
}
/**
@@ -2135,8 +2051,8 @@ static void clear_glock(struct gfs2_glock *gl)
if (!__lockref_is_dead(&gl->gl_lockref)) {
gl->gl_lockref.count++;
if (gl->gl_state != LM_ST_UNLOCKED)
- handle_callback(gl, LM_ST_UNLOCKED, 0, false);
- __gfs2_glock_queue_work(gl, 0);
+ request_demote(gl, LM_ST_UNLOCKED, 0, false);
+ gfs2_glock_queue_work(gl, 0);
}
spin_unlock(&gl->gl_lockref.lock);
}
@@ -2164,18 +2080,26 @@ static void dump_glock_func(struct gfs2_glock *gl)
dump_glock(NULL, gl, true);
}
-static void withdraw_dq(struct gfs2_glock *gl)
+static void withdraw_glock(struct gfs2_glock *gl)
{
spin_lock(&gl->gl_lockref.lock);
- if (!__lockref_is_dead(&gl->gl_lockref) &&
- glock_blocked_by_withdraw(gl))
+ if (!__lockref_is_dead(&gl->gl_lockref)) {
+ /*
+ * We don't want to write back any more dirty data. Unlock the
+ * remaining inode and resource group glocks; this will cause
+ * their ->go_inval() hooks to toss out all the remaining
+ * cached data, dirty or not.
+ */
+ if (gl->gl_ops->go_inval && gl->gl_state != LM_ST_UNLOCKED)
+ request_demote(gl, LM_ST_UNLOCKED, 0, false);
do_error(gl, LM_OUT_ERROR); /* remove pending waiters */
+ }
spin_unlock(&gl->gl_lockref.lock);
}
-void gfs2_gl_dq_holders(struct gfs2_sbd *sdp)
+void gfs2_withdraw_glocks(struct gfs2_sbd *sdp)
{
- glock_hash_walk(withdraw_dq, sdp);
+ glock_hash_walk(withdraw_glock, sdp);
}
/**
@@ -2187,14 +2111,31 @@ void gfs2_gl_dq_holders(struct gfs2_sbd *sdp)
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
{
+ unsigned long start = jiffies;
+ bool timed_out = false;
+
set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
- flush_workqueue(glock_workqueue);
+ flush_workqueue(sdp->sd_glock_wq);
glock_hash_walk(clear_glock, sdp);
- flush_workqueue(glock_workqueue);
- wait_event_timeout(sdp->sd_glock_wait,
- atomic_read(&sdp->sd_glock_disposal) == 0,
- HZ * 600);
+ flush_workqueue(sdp->sd_glock_wq);
+
+ while (!timed_out) {
+ wait_event_timeout(sdp->sd_kill_wait,
+ !atomic_read(&sdp->sd_glock_disposal),
+ HZ * 60);
+ if (!atomic_read(&sdp->sd_glock_disposal))
+ break;
+ timed_out = time_after(jiffies, start + (HZ * 600));
+ fs_warn(sdp, "%u glocks left after %u seconds%s\n",
+ atomic_read(&sdp->sd_glock_disposal),
+ jiffies_to_msecs(jiffies - start) / 1000,
+ timed_out ? ":" : "; still waiting");
+ }
+ gfs2_lm_unmount(sdp);
+ gfs2_free_dead_glocks(sdp);
glock_hash_walk(dump_glock_func, sdp);
+ destroy_workqueue(sdp->sd_glock_wq);
+ sdp->sd_glock_wq = NULL;
}
static const char *state2str(unsigned state)
@@ -2219,12 +2160,10 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 't';
if (flags & LM_FLAG_TRY_1CB)
*p++ = 'T';
- if (flags & LM_FLAG_NOEXP)
+ if (flags & LM_FLAG_RECOVER)
*p++ = 'e';
if (flags & LM_FLAG_ANY)
*p++ = 'A';
- if (flags & LM_FLAG_PRIORITY)
- *p++ = 'p';
if (flags & LM_FLAG_NODE_SCOPE)
*p++ = 'n';
if (flags & GL_ASYNC)
@@ -2292,13 +2231,13 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'y';
if (test_bit(GLF_LFLUSH, gflags))
*p++ = 'f';
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
- *p++ = 'i';
- if (test_bit(GLF_REPLY_PENDING, gflags))
+ if (test_bit(GLF_PENDING_REPLY, gflags))
+ *p++ = 'R';
+ if (test_bit(GLF_HAVE_REPLY, gflags))
*p++ = 'r';
if (test_bit(GLF_INITIAL, gflags))
- *p++ = 'I';
- if (test_bit(GLF_FROZEN, gflags))
+ *p++ = 'a';
+ if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags))
*p++ = 'F';
if (!list_empty(&gl->gl_holders))
*p++ = 'q';
@@ -2308,14 +2247,18 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'o';
if (test_bit(GLF_BLOCKING, gflags))
*p++ = 'b';
- if (test_bit(GLF_PENDING_DELETE, gflags))
- *p++ = 'P';
- if (test_bit(GLF_FREEING, gflags))
- *p++ = 'x';
if (test_bit(GLF_INSTANTIATE_NEEDED, gflags))
*p++ = 'n';
if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags))
*p++ = 'N';
+ if (test_bit(GLF_TRY_TO_EVICT, gflags))
+ *p++ = 'e';
+ if (test_bit(GLF_VERIFY_DELETE, gflags))
+ *p++ = 'E';
+ if (test_bit(GLF_DEFER_DELETE, gflags))
+ *p++ = 's';
+ if (test_bit(GLF_CANCELING, gflags))
+ *p++ = 'C';
*p = 0;
return buf;
}
@@ -2459,28 +2402,16 @@ int __init gfs2_glock_init(void)
if (ret < 0)
return ret;
- glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZABLE, 0);
- if (!glock_workqueue) {
- rhashtable_destroy(&gl_hash_table);
- return -ENOMEM;
- }
- gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
- WQ_MEM_RECLAIM | WQ_FREEZABLE,
- 0);
- if (!gfs2_delete_workqueue) {
- destroy_workqueue(glock_workqueue);
+ glock_shrinker = shrinker_alloc(0, "gfs2-glock");
+ if (!glock_shrinker) {
rhashtable_destroy(&gl_hash_table);
return -ENOMEM;
}
- ret = register_shrinker(&glock_shrinker, "gfs2-glock");
- if (ret) {
- destroy_workqueue(gfs2_delete_workqueue);
- destroy_workqueue(glock_workqueue);
- rhashtable_destroy(&gl_hash_table);
- return ret;
- }
+ glock_shrinker->count_objects = gfs2_glock_shrink_count;
+ glock_shrinker->scan_objects = gfs2_glock_shrink_scan;
+
+ shrinker_register(glock_shrinker);
for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
init_waitqueue_head(glock_wait_table + i);
@@ -2490,10 +2421,8 @@ int __init gfs2_glock_init(void)
void gfs2_glock_exit(void)
{
- unregister_shrinker(&glock_shrinker);
+ shrinker_free(glock_shrinker);
rhashtable_destroy(&gl_hash_table);
- destroy_workqueue(glock_workqueue);
- destroy_workqueue(gfs2_delete_workqueue);
}
static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
@@ -2503,8 +2432,7 @@ static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
if (gl) {
if (n == 0)
return;
- if (!lockref_put_not_zero(&gl->gl_lockref))
- gfs2_glock_queue_put(gl);
+ gfs2_glock_put_async(gl);
}
for (;;) {
gl = rhashtable_walk_next(&gi->hti);
@@ -2726,22 +2654,18 @@ static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i)
i->file = NULL;
}
- rcu_read_lock();
for(;; i->fd++) {
- struct inode *inode;
-
- i->file = task_lookup_next_fd_rcu(i->task, &i->fd);
+ i->file = fget_task_next(i->task, &i->fd);
if (!i->file) {
i->fd = 0;
break;
}
- inode = file_inode(i->file);
- if (inode->i_sb != i->sb)
- continue;
- if (get_file_rcu(i->file))
+
+ if (file_inode(i->file)->i_sb == i->sb)
break;
+
+ fput(i->file);
}
- rcu_read_unlock();
return i->file;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index f37ac087e2c1..55d5985f32a0 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -58,23 +58,19 @@ enum {
* LM_FLAG_TRY_1CB
* Send one blocking callback if TRY is set and the lock is not granted.
*
- * LM_FLAG_NOEXP
+ * LM_FLAG_RECOVER
* GFS sets this flag on lock requests it makes while doing journal recovery.
- * These special requests should not be blocked due to the recovery like
- * ordinary locks would be.
+ * While ordinary requests are blocked until the end of recovery, requests
+ * with this flag set do proceed.
*
* LM_FLAG_ANY
* A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
* also be granted in SHARED. The preferred state is whichever is compatible
* with other granted locks, or the specified state if no other locks exist.
*
- * LM_FLAG_PRIORITY
- * Override fairness considerations. Suppose a lock is held in a shared state
- * and there is a pending request for the deferred state. A shared lock
- * request with the priority flag would be allowed to bypass the deferred
- * request and directly join the other shared lock. A shared lock request
- * without the priority flag might be forced to wait until the deferred
- * requested had acquired and released the lock.
+ * In addition, when a lock is already held in EX mode locally, a SHARED or
+ * DEFERRED mode request with the LM_FLAG_ANY flag set will be granted.
+ * (The LM_FLAG_ANY flag is only use for SHARED mode requests currently.)
*
* LM_FLAG_NODE_SCOPE
* This holder agrees to share the lock within this node. In other words,
@@ -84,15 +80,15 @@ enum {
#define LM_FLAG_TRY 0x0001
#define LM_FLAG_TRY_1CB 0x0002
-#define LM_FLAG_NOEXP 0x0004
+#define LM_FLAG_RECOVER 0x0004
#define LM_FLAG_ANY 0x0008
-#define LM_FLAG_PRIORITY 0x0010
#define LM_FLAG_NODE_SCOPE 0x0020
#define GL_ASYNC 0x0040
#define GL_EXACT 0x0080
#define GL_SKIP 0x0100
#define GL_NOPID 0x0200
#define GL_NOCACHE 0x0400
+#define GL_NOBLOCK 0x0800
/*
* lm_async_cb return flags
@@ -100,12 +96,22 @@ enum {
* LM_OUT_ST_MASK
* Masks the lower two bits of lock state in the returned value.
*
+ * LM_OUT_TRY_AGAIN
+ * The trylock request failed.
+ *
+ * LM_OUT_DEADLOCK
+ * The lock request failed because it would deadlock.
+ *
* LM_OUT_CANCELED
* The lock request was canceled.
*
+ * LM_OUT_ERROR
+ * The lock request timed out or failed.
*/
#define LM_OUT_ST_MASK 0x00000003
+#define LM_OUT_TRY_AGAIN 0x00000020
+#define LM_OUT_DEADLOCK 0x00000010
#define LM_OUT_CANCELED 0x00000008
#define LM_OUT_ERROR 0x00000004
@@ -130,7 +136,7 @@ struct lm_lockops {
void (*lm_first_done) (struct gfs2_sbd *sdp);
void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
unsigned int result);
- void (*lm_unmount) (struct gfs2_sbd *sdp);
+ void (*lm_unmount) (struct gfs2_sbd *sdp, bool clean);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
void (*lm_put_lock) (struct gfs2_glock *gl);
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
@@ -144,7 +150,6 @@ struct gfs2_glock_aspace {
struct address_space mapping;
};
-extern struct workqueue_struct *gfs2_delete_workqueue;
static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
@@ -166,21 +171,6 @@ out:
return gh;
}
-static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
-{
- return gl->gl_state == LM_ST_EXCLUSIVE;
-}
-
-static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
-{
- return gl->gl_state == LM_ST_DEFERRED;
-}
-
-static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
-{
- return gl->gl_state == LM_ST_SHARED;
-}
-
static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
{
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
@@ -191,40 +181,40 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
return NULL;
}
-extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
- const struct gfs2_glock_operations *glops,
- int create, struct gfs2_glock **glp);
-extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
-extern void gfs2_glock_put(struct gfs2_glock *gl);
-extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
+int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
+ const struct gfs2_glock_operations *glops,
+ int create, struct gfs2_glock **glp);
+struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
+void gfs2_glock_put(struct gfs2_glock *gl);
+void gfs2_glock_put_async(struct gfs2_glock *gl);
-extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
- u16 flags, struct gfs2_holder *gh,
- unsigned long ip);
+void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
+ u16 flags, struct gfs2_holder *gh,
+ unsigned long ip);
static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
u16 flags, struct gfs2_holder *gh) {
__gfs2_holder_init(gl, state, flags, gh, _RET_IP_);
}
-extern void gfs2_holder_reinit(unsigned int state, u16 flags,
- struct gfs2_holder *gh);
-extern void gfs2_holder_uninit(struct gfs2_holder *gh);
-extern int gfs2_glock_nq(struct gfs2_holder *gh);
-extern int gfs2_glock_poll(struct gfs2_holder *gh);
-extern int gfs2_instantiate(struct gfs2_holder *gh);
-extern int gfs2_glock_holder_ready(struct gfs2_holder *gh);
-extern int gfs2_glock_wait(struct gfs2_holder *gh);
-extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_glock_dq(struct gfs2_holder *gh);
-extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
-extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
-extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
- const struct gfs2_glock_operations *glops,
- unsigned int state, u16 flags,
- struct gfs2_holder *gh);
-extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
+void gfs2_holder_reinit(unsigned int state, u16 flags,
+ struct gfs2_holder *gh);
+void gfs2_holder_uninit(struct gfs2_holder *gh);
+int gfs2_glock_nq(struct gfs2_holder *gh);
+int gfs2_glock_poll(struct gfs2_holder *gh);
+int gfs2_instantiate(struct gfs2_holder *gh);
+int gfs2_glock_holder_ready(struct gfs2_holder *gh);
+int gfs2_glock_wait(struct gfs2_holder *gh);
+int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
+void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
+int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
+ const struct gfs2_glock_operations *glops,
+ unsigned int state, u16 flags,
+ struct gfs2_holder *gh);
+int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
+void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
bool fsid);
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \
gfs2_dump_glock(NULL, gl, true); \
@@ -238,7 +228,7 @@ extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl,
gfs2_assert_withdraw((gl)->gl_name.ln_sbd, (x)); } } \
while (0)
-extern __printf(2, 3)
+__printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
/**
@@ -266,28 +256,28 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
return error;
}
-extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
-extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay);
-extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
-extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
-extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
-extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
-extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
-extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
-extern void gfs2_glock_free(struct gfs2_glock *gl);
-
-extern int __init gfs2_glock_init(void);
-extern void gfs2_glock_exit(void);
-
-extern void gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
-extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
-extern void gfs2_register_debugfs(void);
-extern void gfs2_unregister_debugfs(void);
-
-extern void glock_set_object(struct gfs2_glock *gl, void *object);
-extern void glock_clear_object(struct gfs2_glock *gl, void *object);
+void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
+bool gfs2_queue_try_to_evict(struct gfs2_glock *gl);
+bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later);
+void gfs2_cancel_delete_work(struct gfs2_glock *gl);
+void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
+void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+void gfs2_withdraw_glocks(struct gfs2_sbd *sdp);
+void gfs2_glock_thaw(struct gfs2_sbd *sdp);
+void gfs2_glock_free(struct gfs2_glock *gl);
+void gfs2_glock_free_later(struct gfs2_glock *gl);
+
+int __init gfs2_glock_init(void);
+void gfs2_glock_exit(void);
+
+void gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
+void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
+void gfs2_register_debugfs(void);
+void gfs2_unregister_debugfs(void);
+
+void glock_set_object(struct gfs2_glock *gl, void *object);
+void glock_clear_object(struct gfs2_glock *gl, void *object);
extern const struct lm_lockops gfs2_dlm_ops;
@@ -306,7 +296,13 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
return !list_empty(&gh->gh_list);
}
-extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
-extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
+void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
+bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
+
+static inline bool glock_needs_demote(struct gfs2_glock *gl)
+{
+ return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
+ test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
+}
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index d78b61ecc1cd..2173ccf5034b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -11,6 +11,7 @@
#include <linux/bio.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
+#include <linux/log2.h>
#include "gfs2.h"
#include "incore.h"
@@ -29,8 +30,6 @@
struct workqueue_struct *gfs2_freeze_wq;
-extern struct workqueue_struct *gfs2_control_wq;
-
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -39,12 +38,12 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
"AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
"state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
- bh->b_page->mapping, bh->b_page->flags);
+ bh->b_folio->mapping, bh->b_folio->flags.f);
fs_err(sdp, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
gfs2_lm(sdp, "AIL error\n");
- gfs2_withdraw_delayed(sdp);
+ gfs2_withdraw(sdp);
}
/**
@@ -90,7 +89,7 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_trans tr;
unsigned int revokes;
- int ret;
+ int ret = 0;
revokes = atomic_read(&gl->gl_ail_count);
@@ -124,15 +123,18 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
memset(&tr, 0, sizeof(tr));
set_bit(TR_ONSTACK, &tr.tr_flags);
ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_);
- if (ret)
+ if (ret) {
+ fs_err(sdp, "Transaction error %d: Unable to write revokes.", ret);
goto flush;
+ }
__gfs2_ail_flush(gl, 0, revokes);
gfs2_trans_end(sdp);
flush:
- gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
- GFS2_LFC_AIL_EMPTY_GL);
- return 0;
+ if (!ret)
+ gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+ GFS2_LFC_AIL_EMPTY_GL);
+ return ret;
}
void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -162,7 +164,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
static int gfs2_rgrp_metasync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *metamapping = &sdp->sd_aspace;
+ struct address_space *metamapping = gfs2_aspace(sdp);
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
@@ -193,7 +195,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
int error;
- if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
+ if (!rgd || !test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return 0;
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
@@ -219,18 +221,22 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *mapping = &sdp->sd_aspace;
+ struct address_space *mapping = gfs2_aspace(sdp);
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
const unsigned bsize = sdp->sd_sb.sb_bsize;
- loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
- loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
+ loff_t start, end;
+ if (!rgd)
+ return;
+ start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
gfs2_rgrp_brelse(rgd);
WARN_ON_ONCE(!(flags & DIO_METADATA));
+ gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
truncate_inode_pages_range(mapping, start, end);
}
-static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+static void gfs2_rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl,
const char *fs_id_buf)
{
struct gfs2_rgrpd *rgd = gl->gl_object;
@@ -323,7 +329,9 @@ static int inode_go_sync(struct gfs2_glock *gl)
ret = gfs2_inode_metasync(gl);
if (!error)
error = ret;
- gfs2_ail_empty_gl(gl);
+ ret = gfs2_ail_empty_gl(gl);
+ if (!error)
+ error = ret;
/*
* Writeback of the data mapping may cause the dirty flag to be set
* so we have to clear it again here.
@@ -351,6 +359,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gfs2_glock2inode(gl);
+ gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count));
+
if (flags & DIO_METADATA) {
struct address_space *mapping = gfs2_glock2aspace(gl);
truncate_inode_pages(mapping, 0);
@@ -374,36 +384,24 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
gfs2_clear_glop_pending(ip);
}
-/**
- * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int inode_go_demote_ok(const struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-
- if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
- return 0;
-
- return 1;
-}
-
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
const struct gfs2_dinode *str = buf;
- struct timespec64 atime;
+ struct timespec64 atime, iatime;
u16 height, depth;
umode_t mode = be32_to_cpu(str->di_mode);
struct inode *inode = &ip->i_inode;
- bool is_new = inode->i_state & I_NEW;
+ bool is_new = inode_state_read_once(inode) & I_NEW;
- if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
- goto corrupt;
- if (unlikely(!is_new && inode_wrong_type(inode, mode)))
- goto corrupt;
+ if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+ if (unlikely(!is_new && inode_wrong_type(inode, mode))) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
inode->i_mode = mode;
if (is_new) {
@@ -424,12 +422,13 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks));
atime.tv_sec = be64_to_cpu(str->di_atime);
atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
- if (timespec64_compare(&inode->i_atime, &atime) < 0)
- inode->i_atime = atime;
- inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
- inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
- inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
- inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+ iatime = inode_get_atime(inode);
+ if (timespec64_compare(&iatime, &atime) < 0)
+ inode_set_atime_to_ts(inode, atime);
+ inode_set_mtime(inode, be64_to_cpu(str->di_mtime),
+ be32_to_cpu(str->di_mtime_nsec));
+ inode_set_ctime(inode, be64_to_cpu(str->di_ctime),
+ be32_to_cpu(str->di_ctime_nsec));
ip->i_goal = be64_to_cpu(str->di_goal_meta);
ip->i_generation = be64_to_cpu(str->di_generation);
@@ -439,26 +438,33 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
gfs2_set_inode_flags(inode);
height = be16_to_cpu(str->di_height);
- if (unlikely(height > GFS2_MAX_META_HEIGHT))
- goto corrupt;
+ if (unlikely(height > sdp->sd_max_height)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
ip->i_height = (u8)height;
depth = be16_to_cpu(str->di_depth);
- if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
- goto corrupt;
+ if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+ if ((ip->i_diskflags & GFS2_DIF_EXHASH) &&
+ depth < ilog2(sdp->sd_hash_ptrs)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
- if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip))
- goto corrupt;
-
+ if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
if (S_ISREG(inode->i_mode))
gfs2_set_aops(inode);
return 0;
-corrupt:
- gfs2_consist_inode(ip);
- return -EIO;
}
/**
@@ -468,7 +474,7 @@ corrupt:
* Returns: errno
*/
-int gfs2_inode_refresh(struct gfs2_inode *ip)
+static int gfs2_inode_refresh(struct gfs2_inode *ip)
{
struct buffer_head *dibh;
int error;
@@ -484,7 +490,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
/**
* inode_go_instantiate - read in an inode if necessary
- * @gh: The glock holder
+ * @gl: The glock
*
* Returns: errno
*/
@@ -492,11 +498,18 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
static int inode_go_instantiate(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
+ struct gfs2_glock *io_gl;
+ int error;
if (!ip) /* no inode to populate - read it in later */
return 0;
- return gfs2_inode_refresh(ip);
+ error = gfs2_inode_refresh(ip);
+ if (error)
+ return error;
+ io_gl = ip->i_iopen_gh.gh_gl;
+ io_gl->gl_no_formal_ino = ip->i_no_formal_ino;
+ return 0;
}
static int inode_go_held(struct gfs2_holder *gh)
@@ -527,71 +540,54 @@ static int inode_go_held(struct gfs2_holder *gh)
*
*/
-static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl,
const char *fs_id_buf)
{
struct gfs2_inode *ip = gl->gl_object;
- struct inode *inode = &ip->i_inode;
- unsigned long nrpages;
+ const struct inode *inode = &ip->i_inode;
if (ip == NULL)
return;
- xa_lock_irq(&inode->i_data.i_pages);
- nrpages = inode->i_data.nrpages;
- xa_unlock_irq(&inode->i_data.i_pages);
-
gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu "
"p:%lu\n", fs_id_buf,
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
- IF2DT(ip->i_inode.i_mode), ip->i_flags,
+ IF2DT(inode->i_mode), ip->i_flags,
(unsigned int)ip->i_diskflags,
- (unsigned long long)i_size_read(inode), nrpages);
+ (unsigned long long)i_size_read(inode),
+ inode->i_data.nrpages);
}
/**
- * freeze_go_sync - promote/demote the freeze glock
+ * freeze_go_callback - A cluster node is requesting a freeze
* @gl: the glock
+ * @remote: true if this came from a different cluster node
*/
-static int freeze_go_sync(struct gfs2_glock *gl)
+static void freeze_go_callback(struct gfs2_glock *gl, bool remote)
{
- int error = 0;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct super_block *sb = sdp->sd_vfs;
+
+ if (!remote ||
+ (gl->gl_state != LM_ST_SHARED &&
+ gl->gl_state != LM_ST_UNLOCKED) ||
+ gl->gl_demote_state != LM_ST_UNLOCKED)
+ return;
/*
- * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
- * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
- * all the nodes should have the freeze glock in SH mode and they all
- * call do_xmote: One for EX and the others for UN. They ALL must
- * freeze locally, and they ALL must queue freeze work. The freeze_work
- * calls freeze_func, which tries to reacquire the freeze glock in SH,
- * effectively waiting for the thaw on the node who holds it in EX.
- * Once thawed, the work func acquires the freeze glock in
- * SH and everybody goes back to thawed.
+ * Try to get an active super block reference to prevent racing with
+ * unmount (see super_trylock_shared()). But note that unmount isn't
+ * the only place where a write lock on s_umount is taken, and we can
+ * fail here because of things like remount as well.
*/
- if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
- !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
- atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
- error = freeze_super(sdp->sd_vfs);
- if (error) {
- fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
- error);
- if (gfs2_withdrawn(sdp)) {
- atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
- return 0;
- }
- gfs2_assert_withdraw(sdp, 0);
- }
- queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
- if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
- gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
- GFS2_LFC_FREEZE_GO_SYNC);
- else /* read-only mounts */
- atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+ if (down_read_trylock(&sb->s_umount)) {
+ atomic_inc(&sb->s_active);
+ up_read(&sb->s_umount);
+ if (!queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work))
+ deactivate_super(sb);
}
- return 0;
}
/**
@@ -609,31 +605,18 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl)
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
- error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (gfs2_assert_withdraw_delayed(sdp, !error))
+ error = gfs2_find_jhead(sdp->sd_jdesc, &head);
+ if (gfs2_assert_withdraw(sdp, !error))
return error;
- if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags &
- GFS2_LOG_HEAD_UNMOUNT))
+ if (gfs2_assert_withdraw(sdp, head.lh_flags &
+ GFS2_LOG_HEAD_UNMOUNT))
return -EIO;
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
+ gfs2_log_pointers_init(sdp, &head);
}
return 0;
}
/**
- * freeze_go_demote_ok
- * @gl: the glock
- *
- * Always returns 0
- */
-
-static int freeze_go_demote_ok(const struct gfs2_glock *gl)
-{
- return 0;
-}
-
-/**
* iopen_go_callback - schedule the dcache entry for the inode to be deleted
* @gl: the glock
* @remote: true if this came from a different cluster node
@@ -645,105 +628,29 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
struct gfs2_inode *ip = gl->gl_object;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- if (!remote || sb_rdonly(sdp->sd_vfs))
+ if (!remote || test_bit(SDF_KILL, &sdp->sd_flags))
return;
if (gl->gl_demote_state == LM_ST_UNLOCKED &&
gl->gl_state == LM_ST_SHARED && ip) {
gl->gl_lockref.count++;
- if (!queue_delayed_work(gfs2_delete_workqueue,
- &gl->gl_delete, 0))
+ if (!gfs2_queue_try_to_evict(gl))
gl->gl_lockref.count--;
}
}
-static int iopen_go_demote_ok(const struct gfs2_glock *gl)
-{
- return !gfs2_delete_work_queued(gl);
-}
-
-/**
- * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
- * @gl: glock being freed
- *
- * For now, this is only used for the journal inode glock. In withdraw
- * situations, we need to wait for the glock to be freed so that we know
- * other nodes may proceed with recovery / journal replay.
- */
-static void inode_go_free(struct gfs2_glock *gl)
-{
- /* Note that we cannot reference gl_object because it's already set
- * to NULL by this point in its lifecycle. */
- if (!test_bit(GLF_FREEING, &gl->gl_flags))
- return;
- clear_bit_unlock(GLF_FREEING, &gl->gl_flags);
- wake_up_bit(&gl->gl_flags, GLF_FREEING);
-}
-
-/**
- * nondisk_go_callback - used to signal when a node did a withdraw
- * @gl: the nondisk glock
- * @remote: true if this came from a different cluster node
- *
- */
-static void nondisk_go_callback(struct gfs2_glock *gl, bool remote)
-{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-
- /* Ignore the callback unless it's from another node, and it's the
- live lock. */
- if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK)
- return;
-
- /* First order of business is to cancel the demote request. We don't
- * really want to demote a nondisk glock. At best it's just to inform
- * us of another node's withdraw. We'll keep it in SH mode. */
- clear_bit(GLF_DEMOTE, &gl->gl_flags);
- clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
-
- /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */
- if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) ||
- test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
- test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags))
- return;
-
- /* We only care when a node wants us to unlock, because that means
- * they want a journal recovered. */
- if (gl->gl_demote_state != LM_ST_UNLOCKED)
- return;
-
- if (sdp->sd_args.ar_spectator) {
- fs_warn(sdp, "Spectator node cannot recover journals.\n");
- return;
- }
-
- fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n");
- set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
- /*
- * We can't call remote_withdraw directly here or gfs2_recover_journal
- * because this is called from the glock unlock function and the
- * remote_withdraw needs to enqueue and dequeue the same "live" glock
- * we were called from. So we queue it to the control work queue in
- * lock_dlm.
- */
- queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
-}
-
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_type = LM_TYPE_META,
- .go_flags = GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_inode_glops = {
.go_sync = inode_go_sync,
.go_inval = inode_go_inval,
- .go_demote_ok = inode_go_demote_ok,
.go_instantiate = inode_go_instantiate,
.go_held = inode_go_held,
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
- .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB,
- .go_free = inode_go_free,
+ .go_flags = GLOF_ASPACE | GLOF_LVB,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -756,41 +663,33 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
};
const struct gfs2_glock_operations gfs2_freeze_glops = {
- .go_sync = freeze_go_sync,
.go_xmote_bh = freeze_go_xmote_bh,
- .go_demote_ok = freeze_go_demote_ok,
+ .go_callback = freeze_go_callback,
.go_type = LM_TYPE_NONDISK,
- .go_flags = GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_type = LM_TYPE_IOPEN,
.go_callback = iopen_go_callback,
.go_dump = inode_go_dump,
- .go_demote_ok = iopen_go_demote_ok,
- .go_flags = GLOF_LRU | GLOF_NONDISK,
.go_subclass = 1,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
.go_type = LM_TYPE_FLOCK,
- .go_flags = GLOF_LRU | GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_type = LM_TYPE_NONDISK,
- .go_flags = GLOF_NONDISK,
- .go_callback = nondisk_go_callback,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
.go_type = LM_TYPE_QUOTA,
- .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK,
+ .go_flags = GLOF_LVB,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
- .go_flags = GLOF_NONDISK,
};
const struct gfs2_glock_operations *gfs2_glops_list[] = {
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 695898afcaf1..9341423798df 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -22,7 +22,7 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
extern const struct gfs2_glock_operations *gfs2_glops_list[];
-extern int gfs2_inode_metasync(struct gfs2_glock *gl);
-extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
+int gfs2_inode_metasync(struct gfs2_glock *gl);
+void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c26765080f28..d05d8fe4e456 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -218,20 +218,16 @@ struct gfs2_glock_operations {
int (*go_sync) (struct gfs2_glock *gl);
int (*go_xmote_bh)(struct gfs2_glock *gl);
void (*go_inval) (struct gfs2_glock *gl, int flags);
- int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_instantiate) (struct gfs2_glock *gl);
int (*go_held)(struct gfs2_holder *gh);
- void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
+ void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl,
const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
- void (*go_free)(struct gfs2_glock *gl);
const int go_subclass;
const int go_type;
const unsigned long go_flags;
#define GLOF_ASPACE 1 /* address space attached */
#define GLOF_LVB 2 /* Lock Value Block attached */
-#define GLOF_LRU 4 /* LRU managed */
-#define GLOF_NONDISK 8 /* not I/O related */
};
enum {
@@ -321,16 +317,18 @@ enum {
GLF_DEMOTE_IN_PROGRESS = 5,
GLF_DIRTY = 6,
GLF_LFLUSH = 7,
- GLF_INVALIDATE_IN_PROGRESS = 8,
- GLF_REPLY_PENDING = 9,
+ GLF_HAVE_REPLY = 9,
GLF_INITIAL = 10,
- GLF_FROZEN = 11,
+ GLF_HAVE_FROZEN_REPLY = 11,
GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
- GLF_PENDING_DELETE = 17,
- GLF_FREEING = 18, /* Wait for glock to be freed */
+ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */
+ GLF_VERIFY_DELETE = 18, /* iopen glocks only */
+ GLF_PENDING_REPLY = 19,
+ GLF_DEFER_DELETE = 20, /* iopen glocks only */
+ GLF_CANCELING = 21,
};
struct gfs2_glock {
@@ -373,11 +371,8 @@ struct gfs2_glock {
enum {
GIF_QD_LOCKED = 1,
- GIF_ALLOC_FAILED = 2,
GIF_SW_PAGED = 3,
- GIF_FREE_VFS_INODE = 5,
GIF_GLOP_PENDING = 6,
- GIF_DEFERRED_DELETE = 7,
};
struct gfs2_inode {
@@ -451,7 +446,7 @@ struct gfs2_quota_data {
s64 qd_change_sync;
unsigned int qd_slot;
- unsigned int qd_slot_count;
+ unsigned int qd_slot_ref;
struct buffer_head *qd_bh;
struct gfs2_quota_change *qd_bh_qc;
@@ -522,8 +517,6 @@ struct gfs2_jdesc {
struct list_head jd_revoke_list;
unsigned int jd_replay_tail;
-
- u64 jd_no_addr;
};
struct gfs2_statfs_change_host {
@@ -536,6 +529,7 @@ struct gfs2_statfs_change_host {
#define GFS2_QUOTA_OFF 0
#define GFS2_QUOTA_ACCOUNT 1
#define GFS2_QUOTA_ON 2
+#define GFS2_QUOTA_QUIET 3 /* on but not complaining */
#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
#define GFS2_DATA_WRITEBACK 1
@@ -543,8 +537,7 @@ struct gfs2_statfs_change_host {
#define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW
#define GFS2_ERRORS_WITHDRAW 0
-#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */
-#define GFS2_ERRORS_RO 2 /* place holder for future feature */
+#define GFS2_ERRORS_DEACTIVATE 1
#define GFS2_ERRORS_PANIC 3
struct gfs2_args {
@@ -560,7 +553,7 @@ struct gfs2_args {
unsigned int ar_data:2; /* ordered/writeback */
unsigned int ar_meta:1; /* mount metafs */
unsigned int ar_discard:1; /* discard requests */
- unsigned int ar_errors:2; /* errors=withdraw | panic */
+ unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */
unsigned int ar_nobarrier:1; /* do not send barriers */
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */
@@ -586,6 +579,7 @@ struct gfs2_tune {
unsigned int gt_complain_secs;
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
+ unsigned int gt_withdraw_helper_timeout;
};
enum {
@@ -599,18 +593,10 @@ enum {
SDF_RORECOVERY = 7, /* read only recovery */
SDF_SKIP_DLM_UNLOCK = 8,
SDF_FORCE_AIL_FLUSH = 9,
- SDF_FS_FROZEN = 10,
- SDF_WITHDRAWING = 11, /* Will withdraw eventually */
- SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */
- SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */
- SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are
- withdrawing */
-};
-
-enum gfs2_freeze_state {
- SFS_UNFROZEN = 0,
- SFS_STARTING_FREEZE = 1,
- SFS_FROZEN = 2,
+ SDF_FREEZE_INITIATOR = 10,
+ SDF_KILL = 15,
+ SDF_EVICTING = 16,
+ SDF_FROZEN = 17,
};
#define GFS2_FSNAME_LEN 256
@@ -660,6 +646,8 @@ struct lm_lockstruct {
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
char *ls_lvb_bits;
+ struct rw_semaphore ls_sem;
+
spinlock_t ls_recover_spin; /* protects following fields */
unsigned long ls_recover_flags; /* DFL_ */
uint32_t ls_recover_mount; /* gen in first recover_done cb */
@@ -718,11 +706,13 @@ struct gfs2_sbd {
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_freeze_gl;
struct work_struct sd_freeze_work;
- wait_queue_head_t sd_glock_wait;
+ struct work_struct sd_withdraw_work;
+ wait_queue_head_t sd_kill_wait;
wait_queue_head_t sd_async_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
- struct completion sd_wdack;
+ struct completion sd_withdraw_helper;
+ int sd_withdraw_helper_status;
struct delayed_work sd_control_work;
/* Inode Stuff */
@@ -763,7 +753,6 @@ struct gfs2_sbd {
struct gfs2_jdesc *sd_jdesc;
struct gfs2_holder sd_journal_gh;
struct gfs2_holder sd_jinode_gh;
- struct gfs2_glock *sd_jinode_gl;
struct gfs2_holder sd_sc_gh;
struct buffer_head *sd_sc_bh;
@@ -771,6 +760,11 @@ struct gfs2_sbd {
struct completion sd_journal_ready;
+ /* Workqueue stuff */
+
+ struct workqueue_struct *sd_glock_wq;
+ struct workqueue_struct *sd_delete_wq;
+
/* Daemon stuff */
struct task_struct *sd_logd_process;
@@ -780,7 +774,6 @@ struct gfs2_sbd {
struct list_head sd_quota_list;
atomic_t sd_quota_count;
- struct mutex sd_quota_mutex;
struct mutex sd_quota_sync_mutex;
wait_queue_head_t sd_quota_wait;
@@ -792,7 +785,7 @@ struct gfs2_sbd {
/* Log stuff */
- struct address_space sd_aspace;
+ struct inode *sd_inode;
spinlock_t sd_log_lock;
@@ -821,7 +814,6 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
wait_queue_head_t sd_log_flush_wait;
int sd_log_error; /* First log error */
- wait_queue_head_t sd_withdraw_wait;
unsigned int sd_log_tail;
unsigned int sd_log_flush_tail;
@@ -834,8 +826,8 @@ struct gfs2_sbd {
/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
- atomic_t sd_freeze_state;
struct mutex sd_freeze_mutex;
+ struct list_head sd_dead_glocks;
char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
char sd_table_name[GFS2_FSNAME_LEN];
@@ -845,9 +837,15 @@ struct gfs2_sbd {
unsigned long sd_last_warning;
struct dentry *debugfs_dir; /* debugfs directory */
- unsigned long sd_glock_dqs_held;
};
+#define GFS2_BAD_INO 1
+
+static inline struct address_space *gfs2_aspace(struct gfs2_sbd *sdp)
+{
+ return sdp->sd_inode->i_mapping;
+}
+
static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
{
gl->gl_stats.stats[which]++;
@@ -861,7 +859,7 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
preempt_enable();
}
-extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
+struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip)
{
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 614db3055c02..36618e353199 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -89,6 +89,19 @@ static int iget_set(struct inode *inode, void *opaque)
return 0;
}
+void gfs2_setup_inode(struct inode *inode)
+{
+ gfp_t gfp_mask;
+
+ /*
+ * Ensure all page cache allocations are done from GFP_NOFS context to
+ * prevent direct reclaim recursion back into the filesystem and blowing
+ * stacks or deadlocking.
+ */
+ gfp_mask = mapping_gfp_mask(inode->i_mapping);
+ mapping_set_gfp_mask(inode->i_mapping, gfp_mask & ~__GFP_FS);
+}
+
/**
* gfs2_inode_lookup - Lookup an inode
* @sb: The super block
@@ -127,11 +140,12 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
ip = GFS2_I(inode);
- if (inode->i_state & I_NEW) {
+ if (inode_state_read_once(inode) & I_NEW) {
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_glock *io_gl;
int extra_flags = 0;
+ gfs2_setup_inode(inode);
error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE,
&ip->i_gl);
if (unlikely(error))
@@ -185,8 +199,9 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
/* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */
- inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1);
- inode->i_atime.tv_nsec = 0;
+ inode_set_atime(inode,
+ 1LL << (8 * sizeof(inode_get_atime_sec(inode)) - 1),
+ 0);
glock_set_object(ip->i_gl, ip);
@@ -225,6 +240,10 @@ fail:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
+ if (ip->i_gl) {
+ gfs2_glock_put(ip->i_gl);
+ ip->i_gl = NULL;
+ }
iget_failed(inode);
return ERR_PTR(error);
}
@@ -261,21 +280,28 @@ fail_iput:
}
-struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
+/**
+ * gfs2_lookup_meta - Look up an inode in a metadata directory
+ * @dip: The directory
+ * @name: The name of the inode
+ */
+struct inode *gfs2_lookup_meta(struct inode *dip, const char *name)
{
struct qstr qstr;
struct inode *inode;
+
gfs2_str2qstr(&qstr, name);
inode = gfs2_lookupi(dip, &qstr, 1);
- /* gfs2_lookupi has inconsistent callers: vfs
- * related routines expect NULL for no entry found,
- * gfs2_lookup_simple callers expect ENOENT
- * and do not check for NULL.
+ if (IS_ERR_OR_NULL(inode))
+ return inode ? inode : ERR_PTR(-ENOENT);
+
+ /*
+ * Must not call back into the filesystem when allocating
+ * pages in the metadata inode's address space.
*/
- if (inode == NULL)
- return ERR_PTR(-ENOENT);
- else
- return inode;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+ return inode;
}
@@ -320,7 +346,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
}
if (!is_root) {
- error = gfs2_permission(&init_user_ns, dir, MAY_EXEC);
+ error = gfs2_permission(&nop_mnt_idmap, dir, MAY_EXEC);
if (error)
goto out;
}
@@ -350,7 +376,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
{
int error;
- error = gfs2_permission(&init_user_ns, &dip->i_inode,
+ error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode,
MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -407,7 +433,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
if (error)
goto out_ipreserv;
- error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation);
+ error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1);
if (error)
goto out_trans_end;
@@ -427,6 +453,72 @@ out:
return error;
}
+static void gfs2_final_release_pages(struct gfs2_inode *ip)
+{
+ struct inode *inode = &ip->i_inode;
+ struct gfs2_glock *gl = ip->i_gl;
+
+ /* This can only happen during incomplete inode creation. */
+ if (unlikely(!gl))
+ return;
+
+ truncate_inode_pages(gfs2_glock2aspace(gl), 0);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (atomic_read(&gl->gl_revokes) == 0) {
+ clear_bit(GLF_LFLUSH, &gl->gl_flags);
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ }
+}
+
+int gfs2_dinode_dealloc(struct gfs2_inode *ip)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder gh;
+ int error;
+
+ if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+
+ gfs2_rindex_update(sdp);
+
+ error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
+ if (error)
+ return error;
+
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
+ if (!rgd) {
+ gfs2_consist_inode(ip);
+ error = -EIO;
+ goto out_qs;
+ }
+
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
+ if (error)
+ goto out_qs;
+
+ error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
+ sdp->sd_jdesc->jd_blocks);
+ if (error)
+ goto out_rg_gunlock;
+
+ gfs2_free_di(rgd, ip);
+
+ gfs2_final_release_pages(ip);
+
+ gfs2_trans_end(sdp);
+
+out_rg_gunlock:
+ gfs2_glock_dq_uninit(&gh);
+out_qs:
+ gfs2_quota_unhold(ip);
+ return error;
+}
+
static void gfs2_init_dir(struct buffer_head *dibh,
const struct gfs2_inode *parent)
{
@@ -617,10 +709,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
- int error;
+ int error, dealloc_error;
u32 aflags = 0;
unsigned blocks = 1;
struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
+ bool xattr_initialized = false;
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
@@ -647,7 +740,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
if (S_ISDIR(inode->i_mode)) {
iput(inode);
- inode = ERR_PTR(-EISDIR);
+ inode = NULL;
+ error = -EISDIR;
goto fail_gunlock;
}
d_instantiate(dentry, inode);
@@ -672,6 +766,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = -ENOMEM;
if (!inode)
goto fail_gunlock;
+ gfs2_setup_inode(inode);
ip = GFS2_I(inode);
error = posix_acl_create(dir, &mode, &default_acl, &acl);
@@ -686,7 +781,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
set_nlink(inode, S_ISDIR(mode) ? 2 : 1);
inode->i_rdev = dev;
inode->i_size = size;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ simple_inode_init_ts(inode);
munge_mode_uid_gid(dip, inode);
check_and_update_goal(dip);
ip->i_goal = dip->i_goal;
@@ -732,12 +827,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (error)
- goto fail_free_inode;
+ goto fail_dealloc_inode;
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
- goto fail_free_inode;
+ goto fail_dealloc_inode;
gfs2_cancel_delete_work(io_gl);
+ io_gl->gl_no_formal_ino = ip->i_no_formal_ino;
retry:
error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
@@ -754,13 +850,16 @@ retry:
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (error)
goto fail_gunlock3;
+ clear_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
goto fail_gunlock3;
- if (blocks > 1)
+ if (blocks > 1) {
gfs2_init_xattr(ip);
+ xattr_initialized = true;
+ }
init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);
@@ -815,7 +914,22 @@ fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_gunlock2:
gfs2_glock_put(io_gl);
+fail_dealloc_inode:
+ dealloc_error = 0;
+ if (ip->i_eattr)
+ dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized);
+ clear_nlink(inode);
+ mark_inode_dirty(inode);
+ if (!dealloc_error)
+ dealloc_error = gfs2_dinode_dealloc(ip);
+ if (dealloc_error)
+ fs_warn(sdp, "%s: %d\n", __func__, dealloc_error);
+ ip->i_no_addr = 0;
fail_free_inode:
+ if (ip->i_gl) {
+ gfs2_glock_put(ip->i_gl);
+ ip->i_gl = NULL;
+ }
gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
@@ -825,11 +939,7 @@ fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(&d_gh);
if (!IS_ERR_OR_NULL(inode)) {
- set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
- clear_nlink(inode);
- if (ip->i_no_addr)
- mark_inode_dirty(inode);
- if (inode->i_state & I_NEW)
+ if (inode_state_read_once(inode) & I_NEW)
iget_failed(inode);
else
iput(inode);
@@ -843,7 +953,7 @@ fail:
/**
* gfs2_create - Create a file
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The directory in which to create the file
* @dentry: The dentry of the new file
* @mode: The mode of the new file
@@ -852,7 +962,7 @@ fail:
* Returns: errno
*/
-static int gfs2_create(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
@@ -933,7 +1043,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct inode *inode = d_inode(old_dentry);
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder d_gh, gh;
struct buffer_head *dibh;
struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
int error;
@@ -945,14 +1055,14 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (error)
return error;
- gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+ gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- error = gfs2_glock_nq(ghs); /* parent */
+ error = gfs2_glock_nq(&d_gh);
if (error)
goto out_parent;
- error = gfs2_glock_nq(ghs + 1); /* child */
+ error = gfs2_glock_nq(&gh);
if (error)
goto out_child;
@@ -960,7 +1070,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink == 0)
goto out_gunlock;
- error = gfs2_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ error = gfs2_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -984,9 +1094,6 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out_gunlock;
- error = -EINVAL;
- if (!ip->i_inode.i_nlink)
- goto out_gunlock;
error = -EMLINK;
if (ip->i_inode.i_nlink == (u32)-1)
goto out_gunlock;
@@ -1024,7 +1131,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
gfs2_trans_add_meta(ip->i_gl, dibh);
inc_nlink(&ip->i_inode);
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
ihold(inode);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
@@ -1041,13 +1148,13 @@ out_gunlock_q:
gfs2_quota_unlock(dip);
out_gunlock:
gfs2_dir_no_add(&da);
- gfs2_glock_dq(ghs + 1);
+ gfs2_glock_dq(&gh);
out_child:
- gfs2_glock_dq(ghs);
+ gfs2_glock_dq(&d_gh);
out_parent:
gfs2_qa_put(dip);
- gfs2_holder_uninit(ghs);
- gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(&d_gh);
+ gfs2_holder_uninit(&gh);
return error;
}
@@ -1078,7 +1185,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&init_user_ns, &dip->i_inode,
+ error = gfs2_permission(&nop_mnt_idmap, &dip->i_inode,
MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -1109,7 +1216,7 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
return error;
ip->i_entries = 0;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (S_ISDIR(inode->i_mode))
clear_nlink(inode);
else
@@ -1138,7 +1245,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct inode *inode = d_inode(dentry);
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder ghs[3];
+ struct gfs2_holder d_gh, r_gh, gh;
struct gfs2_rgrpd *rgd;
int error;
@@ -1148,21 +1255,21 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
error = -EROFS;
- gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+ gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
if (!rgd)
goto out_inodes;
- gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, ghs + 2);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, &r_gh);
- error = gfs2_glock_nq(ghs); /* parent */
+ error = gfs2_glock_nq(&d_gh);
if (error)
goto out_parent;
- error = gfs2_glock_nq(ghs + 1); /* child */
+ error = gfs2_glock_nq(&gh);
if (error)
goto out_child;
@@ -1176,7 +1283,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
goto out_rgrp;
}
- error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ error = gfs2_glock_nq(&r_gh); /* rgrp */
if (error)
goto out_rgrp;
@@ -1192,22 +1299,22 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq(ghs + 2);
+ gfs2_glock_dq(&r_gh);
out_rgrp:
- gfs2_glock_dq(ghs + 1);
+ gfs2_glock_dq(&gh);
out_child:
- gfs2_glock_dq(ghs);
+ gfs2_glock_dq(&d_gh);
out_parent:
- gfs2_holder_uninit(ghs + 2);
+ gfs2_holder_uninit(&r_gh);
out_inodes:
- gfs2_holder_uninit(ghs + 1);
- gfs2_holder_uninit(ghs);
+ gfs2_holder_uninit(&gh);
+ gfs2_holder_uninit(&d_gh);
return error;
}
/**
* gfs2_symlink - Create a symlink
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The directory to create the symlink in
* @dentry: The dentry to put the symlink in
* @symname: The thing which the link points to
@@ -1215,7 +1322,7 @@ out_inodes:
* Returns: errno
*/
-static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
unsigned int size;
@@ -1229,24 +1336,25 @@ static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
/**
* gfs2_mkdir - Make a directory
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The parent directory of the new one
* @dentry: The dentry of the new directory
* @mode: The mode of the new directory
*
- * Returns: errno
+ * Returns: the dentry, or ERR_PTR(errno)
*/
-static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static struct dentry *gfs2_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
- return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0);
+
+ return ERR_PTR(gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0));
}
/**
* gfs2_mknod - Make a special file
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dir: The directory in which the special file will reside
* @dentry: The dentry of the special file
* @mode: The mode of the special file
@@ -1254,7 +1362,7 @@ static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
*
*/
-static int gfs2_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+static int gfs2_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
@@ -1275,27 +1383,19 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags,
umode_t mode)
{
- struct dentry *d;
bool excl = !!(flags & O_EXCL);
- if (!d_in_lookup(dentry))
- goto skip_lookup;
-
- d = __gfs2_lookup(dir, dentry, file);
- if (IS_ERR(d))
- return PTR_ERR(d);
- if (d != NULL)
- dentry = d;
- if (d_really_is_positive(dentry)) {
- if (!(file->f_mode & FMODE_OPENED))
+ if (d_in_lookup(dentry)) {
+ struct dentry *d = __gfs2_lookup(dir, dentry, file);
+ if (file->f_mode & FMODE_OPENED) {
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+ dput(d);
+ return excl && (flags & O_CREAT) ? -EEXIST : 0;
+ }
+ if (d || d_really_is_positive(dentry))
return finish_no_open(file, d);
- dput(d);
- return excl && (flags & O_CREAT) ? -EEXIST : 0;
}
-
- BUG_ON(d != NULL);
-
-skip_lookup:
if (!(flags & O_CREAT))
return -ENOENT;
@@ -1366,7 +1466,7 @@ static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip,
if (dir_rename)
return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
mark_inode_dirty_sync(&ip->i_inode);
return 0;
}
@@ -1504,7 +1604,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
}
}
} else {
- error = gfs2_permission(&init_user_ns, ndir,
+ error = gfs2_permission(&nop_mnt_idmap, ndir,
MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1541,7 +1641,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(&init_user_ns, d_inode(odentry),
+ error = gfs2_permission(&nop_mnt_idmap, d_inode(odentry),
MAY_WRITE);
if (error)
goto out_gunlock;
@@ -1705,13 +1805,13 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
goto out_gunlock;
if (S_ISDIR(old_mode)) {
- error = gfs2_permission(&init_user_ns, odentry->d_inode,
+ error = gfs2_permission(&nop_mnt_idmap, odentry->d_inode,
MAY_WRITE);
if (error)
goto out_gunlock;
}
if (S_ISDIR(new_mode)) {
- error = gfs2_permission(&init_user_ns, ndentry->d_inode,
+ error = gfs2_permission(&nop_mnt_idmap, ndentry->d_inode,
MAY_WRITE);
if (error)
goto out_gunlock;
@@ -1766,7 +1866,7 @@ out:
return error;
}
-static int gfs2_rename2(struct user_namespace *mnt_userns, struct inode *odir,
+static int gfs2_rename2(struct mnt_idmap *idmap, struct inode *odir,
struct dentry *odentry, struct inode *ndir,
struct dentry *ndentry, unsigned int flags)
{
@@ -1841,7 +1941,7 @@ out:
/**
* gfs2_permission
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @inode: The inode
* @mask: The mask to be tested
*
@@ -1852,19 +1952,27 @@ out:
* Returns: errno
*/
-int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
int mask)
{
+ int may_not_block = mask & MAY_NOT_BLOCK;
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
+ struct gfs2_glock *gl;
int error;
gfs2_holder_mark_uninitialized(&i_gh);
ip = GFS2_I(inode);
- if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (mask & MAY_NOT_BLOCK)
+ gl = rcu_dereference_check(ip->i_gl, !may_not_block);
+ if (unlikely(!gl)) {
+ /* inode is getting torn down, must be RCU mode */
+ WARN_ON_ONCE(!may_not_block);
+ return -ECHILD;
+ }
+ if (gfs2_glock_is_locked_by_me(gl) == NULL) {
+ if (may_not_block)
return -ECHILD;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+ error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
}
@@ -1872,7 +1980,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EPERM;
else
- error = generic_permission(&init_user_ns, inode, mask);
+ error = generic_permission(&nop_mnt_idmap, inode, mask);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
@@ -1881,7 +1989,7 @@ int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
{
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(&nop_mnt_idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -1909,7 +2017,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
kuid_t ouid, nuid;
kgid_t ogid, ngid;
int error;
- struct gfs2_alloc_parms ap;
+ struct gfs2_alloc_parms ap = {};
ouid = inode->i_uid;
ogid = inode->i_gid;
@@ -1966,7 +2074,7 @@ out:
/**
* gfs2_setattr - Change attributes on an inode
- * @mnt_userns: User namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @dentry: The dentry which is changing
* @attr: The structure describing the change
*
@@ -1976,7 +2084,7 @@ out:
* Returns: errno
*/
-static int gfs2_setattr(struct user_namespace *mnt_userns,
+static int gfs2_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -1992,11 +2100,11 @@ static int gfs2_setattr(struct user_namespace *mnt_userns,
if (error)
goto out;
- error = may_setattr(&init_user_ns, inode, attr->ia_valid);
+ error = may_setattr(&nop_mnt_idmap, inode, attr->ia_valid);
if (error)
goto error;
- error = setattr_prepare(&init_user_ns, dentry, attr);
+ error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (error)
goto error;
@@ -2007,7 +2115,7 @@ static int gfs2_setattr(struct user_namespace *mnt_userns,
else {
error = gfs2_setattr_simple(inode, attr);
if (!error && attr->ia_valid & ATTR_MODE)
- error = posix_acl_chmod(&init_user_ns, dentry,
+ error = posix_acl_chmod(&nop_mnt_idmap, dentry,
inode->i_mode);
}
@@ -2022,7 +2130,7 @@ out:
/**
* gfs2_getattr - Read out an inode's attributes
- * @mnt_userns: user namespace of the mount the inode was found from
+ * @idmap: idmap of the mount the inode was found from
* @path: Object to query
* @stat: The inode's stats
* @request_mask: Mask of STATX_xxx flags indicating the caller's interests
@@ -2037,7 +2145,7 @@ out:
* Returns: errno
*/
-static int gfs2_getattr(struct user_namespace *mnt_userns,
+static int gfs2_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
@@ -2066,7 +2174,7 @@ static int gfs2_getattr(struct user_namespace *mnt_userns,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
@@ -2134,8 +2242,7 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset)
return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
}
-static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
- int flags)
+static int gfs2_update_time(struct inode *inode, int flags)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_glock *gl = ip->i_gl;
@@ -2143,14 +2250,15 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
int error;
gh = gfs2_glock_is_locked_by_me(gl);
- if (gh && !gfs2_glock_is_held_excl(gl)) {
+ if (gh && gl->gl_state != LM_ST_EXCLUSIVE) {
gfs2_glock_dq(gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh);
error = gfs2_glock_nq(gh);
if (error)
return error;
}
- return generic_update_time(inode, time, flags);
+ generic_update_time(inode, flags);
+ return 0;
}
static const struct inode_operations gfs2_file_iops = {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 0264d514dda7..2fcd96dd1361 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -13,9 +13,9 @@
#include "util.h"
bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask);
-extern int gfs2_internal_read(struct gfs2_inode *ip,
- char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_aops(struct inode *inode);
+ssize_t gfs2_internal_read(struct gfs2_inode *ip,
+ char *buf, loff_t *pos, size_t size);
+void gfs2_set_aops(struct inode *inode);
static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
{
@@ -44,19 +44,17 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip)
static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks)
{
- inode->i_blocks = blocks <<
- (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+ inode->i_blocks = blocks << (inode->i_blkbits - SECTOR_SHIFT);
}
static inline u64 gfs2_get_inode_blocks(const struct inode *inode)
{
- return inode->i_blocks >>
- (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+ return inode->i_blocks >> (inode->i_blkbits - SECTOR_SHIFT);
}
static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change)
{
- change <<= inode->i_blkbits - GFS2_BASIC_BLOCK_SHIFT;
+ change <<= inode->i_blkbits - SECTOR_SHIFT;
gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks >= -change));
inode->i_blocks += change;
}
@@ -88,33 +86,33 @@ err:
return -EIO;
}
-extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
- u64 no_addr, u64 no_formal_ino,
- unsigned int blktype);
-extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
- u64 no_formal_ino,
- unsigned int blktype);
-
-extern int gfs2_inode_refresh(struct gfs2_inode *ip);
-
-extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
- int is_root);
-extern int gfs2_permission(struct user_namespace *mnt_userns,
- struct inode *inode, int mask);
-extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
-extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern int gfs2_open_common(struct inode *inode, struct file *file);
-extern loff_t gfs2_seek_data(struct file *file, loff_t offset);
-extern loff_t gfs2_seek_hole(struct file *file, loff_t offset);
+void gfs2_setup_inode(struct inode *inode);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
+ u64 no_addr, u64 no_formal_ino,
+ unsigned int blktype);
+struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+ u64 no_formal_ino,
+ unsigned int blktype);
+int gfs2_dinode_dealloc(struct gfs2_inode *ip);
+
+struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
+ int is_root);
+int gfs2_permission(struct mnt_idmap *idmap,
+ struct inode *inode, int mask);
+struct inode *gfs2_lookup_meta(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+int gfs2_open_common(struct inode *inode, struct file *file);
+loff_t gfs2_seek_data(struct file *file, loff_t offset);
+loff_t gfs2_seek_hole(struct file *file, loff_t offset);
extern const struct file_operations gfs2_file_fops_nolock;
extern const struct file_operations gfs2_dir_fops_nolock;
-extern int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
-extern int gfs2_fileattr_set(struct user_namespace *mnt_userns,
- struct dentry *dentry, struct fileattr *fa);
-extern void gfs2_set_inode_flags(struct inode *inode);
-
+int gfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
+int gfs2_fileattr_set(struct mnt_idmap *idmap,
+ struct dentry *dentry, struct file_kattr *fa);
+void gfs2_set_inode_flags(struct inode *inode);
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
extern const struct file_operations gfs2_file_fops;
extern const struct file_operations gfs2_dir_fops;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 71911bf9ab34..b8d249925395 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -15,9 +15,6 @@
#include <linux/sched/signal.h>
#include "incore.h"
-#include "glock.h"
-#include "glops.h"
-#include "recovery.h"
#include "util.h"
#include "sys.h"
#include "trace_gfs2.h"
@@ -58,6 +55,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
/**
* gfs2_update_reply_times - Update locking statistics
* @gl: The glock to update
+ * @blocking: The operation may have been blocking
*
* This assumes that gl->gl_dstamp has been set earlier.
*
@@ -72,12 +70,12 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
* TRY_1CB flags are set are classified as non-blocking. All
* other DLM requests are counted as (potentially) blocking.
*/
-static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
+static inline void gfs2_update_reply_times(struct gfs2_glock *gl,
+ bool blocking)
{
struct gfs2_pcpu_lkstats *lks;
const unsigned gltype = gl->gl_name.ln_type;
- unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
- GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
+ unsigned index = blocking ? GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
s64 rtt;
preempt_disable();
@@ -119,9 +117,18 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl)
static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
- unsigned ret = gl->gl_state;
+ bool blocking;
+ unsigned ret;
+
+ blocking = test_bit(GLF_BLOCKING, &gl->gl_flags);
+ gfs2_update_reply_times(gl, blocking);
+ clear_bit(GLF_BLOCKING, &gl->gl_flags);
+
+ /* If the glock is dead, we only react to a dlm_unlock() reply. */
+ if (__lockref_is_dead(&gl->gl_lockref) &&
+ gl->gl_lksb.sb_status != -DLM_EUNLOCK)
+ return;
- gfs2_update_reply_times(gl);
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
@@ -129,18 +136,19 @@ static void gdlm_ast(void *arg)
switch (gl->gl_lksb.sb_status) {
case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
- if (gl->gl_ops->go_free)
- gl->gl_ops->go_free(gl);
gfs2_glock_free(gl);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
- ret |= LM_OUT_CANCELED;
+ ret = LM_OUT_CANCELED;
goto out;
case -EAGAIN: /* Try lock fails */
+ ret = LM_OUT_TRY_AGAIN;
+ goto out;
case -EDEADLK: /* Deadlock detected */
+ ret = LM_OUT_DEADLOCK;
goto out;
case -ETIMEDOUT: /* Canceled due to timeout */
- ret |= LM_OUT_ERROR;
+ ret = LM_OUT_ERROR;
goto out;
case 0: /* Success */
break;
@@ -149,20 +157,22 @@ static void gdlm_ast(void *arg)
}
ret = gl->gl_req;
- if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
- if (gl->gl_req == LM_ST_SHARED)
- ret = LM_ST_DEFERRED;
- else if (gl->gl_req == LM_ST_DEFERRED)
- ret = LM_ST_SHARED;
- else
- BUG();
- }
- set_bit(GLF_INITIAL, &gl->gl_flags);
+ /*
+ * The GLF_INITIAL flag is initially set for new glocks. Upon the
+ * first successful new (non-conversion) request, we clear this flag to
+ * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the
+ * identifier to use for identifying it.
+ *
+ * Any failed initial requests do not create a DLM lock, so we ignore
+ * the gl->gl_lksb.sb_lkid values that come with such requests.
+ */
+
+ clear_bit(GLF_INITIAL, &gl->gl_flags);
gfs2_glock_complete(gl, ret);
return;
out:
- if (!test_bit(GLF_INITIAL, &gl->gl_flags))
+ if (test_bit(GLF_INITIAL, &gl->gl_flags))
gl->gl_lksb.sb_lkid = 0;
gfs2_glock_complete(gl, ret);
}
@@ -171,6 +181,9 @@ static void gdlm_bast(void *arg, int mode)
{
struct gfs2_glock *gl = arg;
+ if (__lockref_is_dead(&gl->gl_lockref))
+ return;
+
switch (mode) {
case DLM_LOCK_EX:
gfs2_glock_cb(gl, LM_ST_UNLOCKED);
@@ -206,8 +219,21 @@ static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate)
return -1;
}
+/* Taken from fs/dlm/lock.c. */
+
+static bool middle_conversion(int cur, int req)
+{
+ return (cur == DLM_LOCK_PR && req == DLM_LOCK_CW) ||
+ (cur == DLM_LOCK_CW && req == DLM_LOCK_PR);
+}
+
+static bool down_conversion(int cur, int req)
+{
+ return !middle_conversion(cur, req) && req < cur;
+}
+
static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
- const int req)
+ const int req, bool blocking)
{
u32 lkf = 0;
@@ -222,23 +248,16 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
lkf |= DLM_LKF_NOQUEUEBAST;
}
- if (gfs_flags & LM_FLAG_PRIORITY) {
- lkf |= DLM_LKF_NOORDER;
- lkf |= DLM_LKF_HEADQUE;
- }
-
- if (gfs_flags & LM_FLAG_ANY) {
- if (req == DLM_LOCK_PR)
- lkf |= DLM_LKF_ALTCW;
- else if (req == DLM_LOCK_CW)
- lkf |= DLM_LKF_ALTPR;
- else
- BUG();
- }
-
- if (gl->gl_lksb.sb_lkid != 0) {
+ if (!test_bit(GLF_INITIAL, &gl->gl_flags)) {
lkf |= DLM_LKF_CONVERT;
- if (test_bit(GLF_BLOCKING, &gl->gl_flags))
+
+ /*
+ * The DLM_LKF_QUECVT flag needs to be set for "first come,
+ * first served" semantics, but it must only be set for
+ * "upward" lock conversions or else DLM will reject the
+ * request as invalid.
+ */
+ if (blocking)
lkf |= DLM_LKF_QUECVT;
}
@@ -258,31 +277,43 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags)
{
struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
- int req;
+ bool blocking;
+ int cur, req;
u32 lkf;
char strname[GDLM_STRNAME_BYTES] = "";
int error;
+ gl->gl_req = req_state;
+ cur = make_mode(gl->gl_name.ln_sbd, gl->gl_state);
req = make_mode(gl->gl_name.ln_sbd, req_state);
- lkf = make_flags(gl, flags, req);
+ blocking = !down_conversion(cur, req) &&
+ !(flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB));
+ lkf = make_flags(gl, flags, req, blocking);
+ if (blocking)
+ set_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
- if (gl->gl_lksb.sb_lkid) {
- gfs2_update_request_times(gl);
- } else {
+ if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
strname[GDLM_STRNAME_BYTES - 1] = '\0';
gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
gl->gl_dstamp = ktime_get_real();
+ } else {
+ gfs2_update_request_times(gl);
}
/*
* Submit the actual lock request.
*/
again:
- error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
- GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
+ GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ }
+ up_read(&ls->ls_sem);
if (error == -EBUSY) {
msleep(20);
goto again;
@@ -294,57 +325,75 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ uint32_t flags = 0;
int error;
- if (gl->gl_lksb.sb_lkid == 0) {
+ BUG_ON(!__lockref_is_dead(&gl->gl_lockref));
+
+ if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
gfs2_glock_free(gl);
return;
}
- clear_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
- /* don't want to call dlm if we've unmounted the lock protocol */
- if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
- gfs2_glock_free(gl);
- return;
- }
- /* don't want to skip dlm_unlock writing the lvb when lock has one */
+ /*
+ * When the lockspace is released, all remaining glocks will be
+ * unlocked automatically. This is more efficient than unlocking them
+ * individually, but when the lock is held in DLM_LOCK_EX or
+ * DLM_LOCK_PW mode, the lock value block (LVB) would be lost.
+ */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !gl->gl_lksb.sb_lvbptr) {
- gfs2_glock_free(gl);
+ (!gl->gl_lksb.sb_lvbptr || gl->gl_state != LM_ST_EXCLUSIVE)) {
+ gfs2_glock_free_later(gl);
return;
}
+ if (gl->gl_lksb.sb_lvbptr)
+ flags |= DLM_LKF_VALBLK;
+
again:
- error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
- NULL, gl);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags,
+ NULL, gl);
+ }
+ up_read(&ls->ls_sem);
if (error == -EBUSY) {
msleep(20);
goto again;
}
+ if (error == -ENODEV) {
+ gfs2_glock_free(gl);
+ return;
+ }
+
if (error) {
fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number, error);
- return;
}
}
static void gdlm_cancel(struct gfs2_glock *gl)
{
struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
- dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+
+ down_read(&ls->ls_sem);
+ if (likely(ls->ls_dlm != NULL)) {
+ dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+ }
+ up_read(&ls->ls_sem);
}
/*
* dlm/gfs2 recovery coordination using dlm_recover callbacks
*
- * 0. gfs2 checks for another cluster node withdraw, needing journal replay
* 1. dlm_controld sees lockspace members change
* 2. dlm_controld blocks dlm-kernel locking activity
* 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
@@ -519,7 +568,11 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
- error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL))
+ error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+ up_read(&ls->ls_sem);
if (error) {
fs_err(sdp, "%s lkid %x error %d\n",
name, lksb->sb_lkid, error);
@@ -546,9 +599,14 @@ static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
memset(strname, 0, GDLM_STRNAME_BYTES);
snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
- error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
- strname, GDLM_STRNAME_BYTES - 1,
- 0, sync_wait_cb, ls, NULL);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
+ strname, GDLM_STRNAME_BYTES - 1,
+ 0, sync_wait_cb, ls, NULL);
+ }
+ up_read(&ls->ls_sem);
if (error) {
fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
name, lksb->sb_lkid, flags, mode, error);
@@ -593,28 +651,6 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
&ls->ls_control_lksb, "control_lock");
}
-/**
- * remote_withdraw - react to a node withdrawing from the file system
- * @sdp: The superblock
- */
-static void remote_withdraw(struct gfs2_sbd *sdp)
-{
- struct gfs2_jdesc *jd;
- int ret = 0, count = 0;
-
- list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
- if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
- continue;
- ret = gfs2_recover_journal(jd, true);
- if (ret)
- break;
- count++;
- }
-
- /* Now drop the additional reference we acquired */
- fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
-}
-
static void gfs2_control_func(struct work_struct *work)
{
struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
@@ -625,13 +661,6 @@ static void gfs2_control_func(struct work_struct *work)
int recover_size;
int i, error;
- /* First check for other nodes that may have done a withdraw. */
- if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
- remote_withdraw(sdp);
- clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
- return;
- }
-
spin_lock(&ls->ls_recover_spin);
/*
* No MOUNT_DONE means we're still mounting; control_mount()
@@ -955,14 +984,15 @@ locks_done:
if (sdp->sd_args.ar_spectator) {
fs_info(sdp, "Recovery is required. Waiting for a "
"non-spectator to mount.\n");
+ spin_unlock(&ls->ls_recover_spin);
msleep_interruptible(1000);
} else {
fs_info(sdp, "control_mount wait1 block %u start %u "
"mount %u lvb %u flags %lx\n", block_gen,
start_gen, mount_gen, lvb_gen,
ls->ls_recover_flags);
+ spin_unlock(&ls->ls_recover_spin);
}
- spin_unlock(&ls->ls_recover_spin);
goto restart;
}
@@ -1274,6 +1304,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
*/
INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
+ ls->ls_dlm = NULL;
spin_lock_init(&ls->ls_recover_spin);
ls->ls_recover_flags = 0;
ls->ls_recover_mount = 0;
@@ -1308,6 +1339,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
* create/join lockspace
*/
+ init_rwsem(&ls->ls_sem);
error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
&gdlm_lockspace_ops, sdp, &ops_result,
&ls->ls_dlm);
@@ -1351,7 +1383,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
return 0;
fail_release:
- dlm_release_lockspace(ls->ls_dlm, 2);
+ dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL);
fail_free:
free_recover_size(ls);
fail:
@@ -1371,7 +1403,15 @@ static void gdlm_first_done(struct gfs2_sbd *sdp)
fs_err(sdp, "mount first_done error %d\n", error);
}
-static void gdlm_unmount(struct gfs2_sbd *sdp)
+/*
+ * gdlm_unmount - release our lockspace
+ * @sdp: the superblock
+ * @clean: Indicates whether or not the remaining nodes in the cluster should
+ * perform recovery. Recovery is necessary when a node withdraws and
+ * its journal remains dirty. Recovery isn't necessary when a node
+ * cleanly unmounts a filesystem.
+ */
+static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -1387,10 +1427,14 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
/* mounted_lock and control_lock will be purged in dlm recovery */
release:
+ down_write(&ls->ls_sem);
if (ls->ls_dlm) {
- dlm_release_lockspace(ls->ls_dlm, 2);
+ dlm_release_lockspace(ls->ls_dlm,
+ clean ? DLM_RELEASE_NORMAL :
+ DLM_RELEASE_RECOVER);
ls->ls_dlm = NULL;
}
+ up_write(&ls->ls_sem);
free_recover_size(ls);
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 61323deb80bc..8312cd2cdae4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -31,6 +31,7 @@
#include "dir.h"
#include "trace_gfs2.h"
#include "trans.h"
+#include "aops.h"
static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
@@ -80,15 +81,6 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
brelse(bd->bd_bh);
}
-static int __gfs2_writepage(struct page *page, struct writeback_control *wbc,
- void *data)
-{
- struct address_space *mapping = data;
- int ret = mapping->a_ops->writepage(page, wbc);
- mapping_set_error(mapping, ret);
- return ret;
-}
-
/**
* gfs2_ail1_start_one - Start I/O on a transaction
* @sdp: The superblock
@@ -120,10 +112,8 @@ __acquires(&sdp->sd_ail_lock)
&tr->tr_ail2_list);
continue;
}
- if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
+ if (!cmpxchg(&sdp->sd_log_error, 0, -EIO))
gfs2_io_error_bh(sdp, bh);
- gfs2_withdraw_delayed(sdp);
- }
}
if (gfs2_withdrawn(sdp)) {
@@ -136,11 +126,15 @@ __acquires(&sdp->sd_ail_lock)
continue;
gl = bd->bd_gl;
list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
- mapping = bh->b_page->mapping;
+ mapping = bh->b_folio->mapping;
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
- ret = write_cache_pages(mapping, wbc, __gfs2_writepage, mapping);
+ BUG_ON(GFS2_SB(mapping->host) != sdp);
+ if (gfs2_is_jdata(GFS2_I(mapping->host)))
+ ret = gfs2_jdata_writeback(mapping, wbc);
+ else
+ ret = mapping->a_ops->writepages(mapping, wbc);
if (need_resched()) {
blk_finish_plug(plug);
cond_resched();
@@ -149,6 +143,7 @@ __acquires(&sdp->sd_ail_lock)
spin_lock(&sdp->sd_ail_lock);
if (ret == -ENODATA) /* if a jdata write into a new hole */
ret = 0; /* ignore it */
+ mapping_set_error(mapping, ret);
if (ret || wbc->nr_to_write <= 0)
break;
return -EBUSY;
@@ -327,10 +322,8 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
continue;
}
if (!buffer_uptodate(bh) &&
- !cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
+ !cmpxchg(&sdp->sd_log_error, 0, -EIO))
gfs2_io_error_bh(sdp, bh);
- gfs2_withdraw_delayed(sdp);
- }
/*
* If we have space for revokes and the bd is no longer on any
* buf list, we can just add a revoke for it immediately and
@@ -352,14 +345,15 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
* @sdp: The superblock
* @max_revokes: If non-zero, add revokes where appropriate
*
- * Tries to empty the ail1 lists, starting with the oldest first
+ * Tries to empty the ail1 lists, starting with the oldest first.
+ * Returns %true if the ail1 list is now empty.
*/
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
+static bool gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
{
struct gfs2_trans *tr, *s;
int oldest_tr = 1;
- int ret;
+ bool empty;
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
@@ -369,15 +363,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
oldest_tr = 0;
}
gfs2_log_update_flush_tail(sdp);
- ret = list_empty(&sdp->sd_ail1_list);
+ empty = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
- if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
- gfs2_lm(sdp, "fatal: I/O error(s)\n");
- gfs2_withdraw(sdp);
- }
-
- return ret;
+ return empty;
}
static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
@@ -790,7 +779,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
{
if (atomic_dec_return(&gl->gl_revokes) == 0) {
clear_bit(GLF_LFLUSH, &gl->gl_flags);
- gfs2_glock_queue_put(gl);
+ gfs2_glock_put_async(gl);
}
}
@@ -914,9 +903,9 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
{
blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+ struct super_block *sb = sdp->sd_vfs;
- gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
+ gfs2_assert_withdraw(sdp, sb->s_writers.frozen != SB_FREEZE_COMPLETE);
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
@@ -975,8 +964,9 @@ void gfs2_ail_drain(struct gfs2_sbd *sdp)
static void empty_ail1_list(struct gfs2_sbd *sdp)
{
unsigned long start = jiffies;
+ bool empty = false;
- for (;;) {
+ while (!empty) {
if (time_after(jiffies, start + (HZ * 600))) {
fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
__func__, current->journal_info ? 1 : 0);
@@ -985,8 +975,10 @@ static void empty_ail1_list(struct gfs2_sbd *sdp)
}
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
- if (gfs2_ail1_empty(sdp, 0))
- return;
+ empty = gfs2_ail1_empty(sdp, 0);
+
+ if (gfs2_withdrawn(sdp))
+ break;
}
}
@@ -1036,7 +1028,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
{
struct gfs2_trans *tr = NULL;
unsigned int reserved_blocks = 0, used_blocks = 0;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+ bool frozen = test_bit(SDF_FROZEN, &sdp->sd_flags);
unsigned int first_log_head;
unsigned int reserved_revokes = 0;
@@ -1048,7 +1040,8 @@ repeat:
* Do this check while holding the log_flush_lock to prevent new
* buffers from being added to the ail via gfs2_pin()
*/
- if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ if (gfs2_withdrawn(sdp) ||
+ !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
goto out;
/* Log might have been flushed while we waited for the flush lock */
@@ -1067,8 +1060,8 @@ repeat:
if (tr) {
sdp->sd_log_tr = NULL;
tr->tr_first = first_log_head;
- if (unlikely (state == SFS_FROZEN)) {
- if (gfs2_assert_withdraw_delayed(sdp,
+ if (unlikely(frozen)) {
+ if (gfs2_assert_withdraw(sdp,
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
goto out_withdraw;
}
@@ -1092,8 +1085,8 @@ repeat:
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- if (unlikely(state == SFS_FROZEN))
- if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
+ if (unlikely(frozen))
+ if (gfs2_assert_withdraw(sdp, !reserved_revokes))
goto out_withdraw;
gfs2_ordered_write(sdp);
@@ -1102,7 +1095,8 @@ repeat:
lops_before_commit(sdp, tr);
if (gfs2_withdrawn(sdp))
goto out_withdraw;
- gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE);
+ if (sdp->sd_jdesc)
+ gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE);
if (gfs2_withdrawn(sdp))
goto out_withdraw;
@@ -1136,8 +1130,6 @@ repeat:
if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
GFS2_LOG_HEAD_FLUSH_FREEZE))
gfs2_log_shutdown(sdp);
- if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
- atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
out_end:
@@ -1149,13 +1141,11 @@ out_end:
reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
out:
if (used_blocks != reserved_blocks) {
- gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
+ gfs2_assert_withdraw(sdp, used_blocks < reserved_blocks);
gfs2_log_release(sdp, reserved_blocks - used_blocks);
}
up_write(&sdp->sd_log_flush_lock);
gfs2_trans_free(sdp, tr);
- if (gfs2_withdrawing(sdp))
- gfs2_withdraw(sdp);
trace_gfs2_log_flush(sdp, 0, flags);
return;
@@ -1230,6 +1220,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_log_unlock(sdp);
}
+static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
+{
+ return atomic_read(&sdp->sd_log_pinned) +
+ atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh1);
+}
+
+static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+{
+ return sdp->sd_jdesc->jd_blocks -
+ atomic_read(&sdp->sd_log_blks_free) +
+ atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh2);
+}
+
/**
* gfs2_log_commit - Commit a transaction to the log
* @sdp: the filesystem
@@ -1249,9 +1254,7 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
log_refund(sdp, tr);
- if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
- ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
- atomic_read(&sdp->sd_log_thresh2)))
+ if (gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp))
wake_up(&sdp->sd_logd_waitq);
}
@@ -1274,24 +1277,6 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
}
-static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
-{
- return (atomic_read(&sdp->sd_log_pinned) +
- atomic_read(&sdp->sd_log_blks_needed) >=
- atomic_read(&sdp->sd_log_thresh1));
-}
-
-static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
-{
- unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
-
- if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
- return 1;
-
- return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
- atomic_read(&sdp->sd_log_thresh2);
-}
-
/**
* gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
* @data: Pointer to GFS2 superblock
@@ -1304,24 +1289,11 @@ int gfs2_logd(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
- DEFINE_WAIT(wait);
+ set_freezable();
while (!kthread_should_stop()) {
-
- if (gfs2_withdrawn(sdp)) {
- msleep_interruptible(HZ);
- continue;
- }
- /* Check for errors writing to the journal */
- if (sdp->sd_log_error) {
- gfs2_lm(sdp,
- "GFS2: fsid=%s: error %d: "
- "withdrawing the file system to "
- "prevent further damage.\n",
- sdp->sd_fsname, sdp->sd_log_error);
- gfs2_withdraw(sdp);
- continue;
- }
+ if (gfs2_withdrawn(sdp))
+ break;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp, 0);
@@ -1329,7 +1301,9 @@ int gfs2_logd(void *data)
GFS2_LFC_LOGD_JFLUSH_REQD);
}
- if (gfs2_ail_flush_reqd(sdp)) {
+ if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+ gfs2_ail_flush_reqd(sdp)) {
+ clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp, 0);
@@ -1339,19 +1313,13 @@ int gfs2_logd(void *data)
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
- try_to_freeze();
-
- do {
- prepare_to_wait(&sdp->sd_logd_waitq, &wait,
- TASK_INTERRUPTIBLE);
- if (!gfs2_ail_flush_reqd(sdp) &&
- !gfs2_jrnl_flush_reqd(sdp) &&
- !kthread_should_stop())
- t = schedule_timeout(t);
- } while(t && !gfs2_ail_flush_reqd(sdp) &&
- !gfs2_jrnl_flush_reqd(sdp) &&
- !kthread_should_stop());
- finish_wait(&sdp->sd_logd_waitq, &wait);
+ t = wait_event_freezable_timeout(sdp->sd_logd_waitq,
+ test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+ gfs2_ail_flush_reqd(sdp) ||
+ gfs2_jrnl_flush_reqd(sdp) ||
+ gfs2_withdrawn(sdp) ||
+ kthread_should_stop(),
+ t);
}
return 0;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 653cffcbf869..fc30ebdad83a 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -44,17 +44,6 @@ __releases(&sdp->sd_log_lock)
spin_unlock(&sdp->sd_log_lock);
}
-static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
- unsigned int value)
-{
- if (++value == sdp->sd_jdesc->jd_blocks) {
- value = 0;
- }
- sdp->sd_log_tail = value;
- sdp->sd_log_flush_tail = value;
- sdp->sd_log_head = value;
-}
-
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -70,29 +59,29 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
}
}
-extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
-extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
-extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
-extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
-extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
-extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
-extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
- unsigned int *extra_revokes);
-extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
- unsigned int *extra_revokes);
-extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
- u64 seq, u32 tail, u32 lblock, u32 flags,
- blk_opf_t op_flags);
-extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
- u32 type);
-extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
-extern void log_flush_wait(struct gfs2_sbd *sdp);
+void gfs2_ordered_del_inode(struct gfs2_inode *ip);
+unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
+void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
+void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
+bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ unsigned int *extra_revokes);
+void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ unsigned int *extra_revokes);
+void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ u64 seq, u32 tail, u32 lblock, u32 flags,
+ blk_opf_t op_flags);
+void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
+ u32 type);
+void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
+void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
+void log_flush_wait(struct gfs2_sbd *sdp);
-extern int gfs2_logd(void *data);
-extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
-extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
-extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
-extern void gfs2_ail_drain(struct gfs2_sbd *sdp);
+int gfs2_logd(void *data);
+void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
+void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
+void gfs2_flush_revokes(struct gfs2_sbd *sdp);
+void gfs2_ail_drain(struct gfs2_sbd *sdp);
#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 1902413d5d12..97ebe457c00a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -49,7 +49,7 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
if (!buffer_uptodate(bh))
- gfs2_io_error_bh_wd(sdp, bh);
+ gfs2_io_error_bh(sdp, bh);
bd = bh->b_private;
/* If this buffer is in the AIL and it has already been written
* to in-place disk block, remove it from the AIL.
@@ -157,7 +157,9 @@ u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock)
/**
* gfs2_end_log_write_bh - end log write of pagecache data with buffers
* @sdp: The superblock
- * @bvec: The bio_vec
+ * @folio: The folio
+ * @offset: The first byte within the folio that completed
+ * @size: The number of bytes that completed
* @error: The i/o status
*
* This finds the relevant buffers and unlocks them and sets the
@@ -166,17 +168,13 @@ u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock)
* that is pinned in the pagecache.
*/
-static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
- struct bio_vec *bvec,
- blk_status_t error)
+static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct folio *folio,
+ size_t offset, size_t size, blk_status_t error)
{
struct buffer_head *bh, *next;
- struct page *page = bvec->bv_page;
- unsigned size;
- bh = page_buffers(page);
- size = bvec->bv_len;
- while (bh_offset(bh) < bvec->bv_offset)
+ bh = folio_buffers(folio);
+ while (bh_offset(bh) < offset)
bh = bh->b_this_page;
do {
if (error)
@@ -186,7 +184,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
size -= bh->b_size;
brelse(bh);
bh = next;
- } while(bh && size);
+ } while (bh && size);
}
/**
@@ -203,23 +201,24 @@ static void gfs2_end_log_write(struct bio *bio)
{
struct gfs2_sbd *sdp = bio->bi_private;
struct bio_vec *bvec;
- struct page *page;
struct bvec_iter_all iter_all;
if (bio->bi_status) {
- if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
+ int err = blk_status_to_errno(bio->bi_status);
+
+ if (!cmpxchg(&sdp->sd_log_error, 0, err))
fs_err(sdp, "Error %d writing to journal, jid=%u\n",
- bio->bi_status, sdp->sd_jdesc->jd_jid);
- gfs2_withdraw_delayed(sdp);
- /* prevent more writes to the journal */
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- wake_up(&sdp->sd_logd_waitq);
+ err, sdp->sd_jdesc->jd_jid);
+ gfs2_withdraw(sdp);
}
bio_for_each_segment_all(bvec, bio, iter_all) {
- page = bvec->bv_page;
- if (page_has_buffers(page))
- gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
+ struct page *page = bvec->bv_page;
+ struct folio *folio = page_folio(page);
+
+ if (folio && folio_buffers(folio))
+ gfs2_end_log_write_bh(sdp, folio, bvec->bv_offset,
+ bvec->bv_len, bio->bi_status);
else
mempool_free(page, gfs2_page_pool);
}
@@ -359,8 +358,8 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
gfs2_log_incr_head(sdp);
- gfs2_log_write(sdp, sdp->sd_jdesc, bh->b_page, bh->b_size,
- bh_offset(bh), dblock);
+ gfs2_log_write(sdp, sdp->sd_jdesc, folio_page(bh->b_folio, 0),
+ bh->b_size, bh_offset(bh), dblock);
}
/**
@@ -391,46 +390,40 @@ static void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
* Simply unlock the pages in the bio. The main thread will wait on them and
* process them in order as necessary.
*/
-
static void gfs2_end_log_read(struct bio *bio)
{
- struct page *page;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ int error = blk_status_to_errno(bio->bi_status);
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- page = bvec->bv_page;
- if (bio->bi_status) {
- int err = blk_status_to_errno(bio->bi_status);
-
- SetPageError(page);
- mapping_set_error(page->mapping, err);
- }
- unlock_page(page);
+ bio_for_each_folio_all(fi, bio) {
+ /* We're abusing wb_err to get the error to gfs2_find_jhead */
+ filemap_set_wb_err(fi.folio->mapping, error);
+ folio_end_read(fi.folio, !error);
}
bio_put(bio);
}
/**
- * gfs2_jhead_pg_srch - Look for the journal head in a given page.
+ * gfs2_jhead_folio_search - Look for the journal head in a given page.
* @jd: The journal descriptor
* @head: The journal head to start from
- * @page: The page to look in
+ * @folio: The folio to look in
*
* Returns: 1 if found, 0 otherwise.
*/
-
-static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head,
- struct page *page)
+static bool gfs2_jhead_folio_search(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head,
+ struct folio *folio)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host lh;
- void *kaddr = kmap_atomic(page);
+ void *kaddr;
unsigned int offset;
bool ret = false;
+ VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
+ kaddr = kmap_local_folio(folio, 0);
for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
if (lh.lh_sequence >= head->lh_sequence)
@@ -441,7 +434,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
}
}
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
return ret;
}
@@ -455,7 +448,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
* Find the folio with 'index' in the journal's mapping. Search the folio for
* the journal head if requested (cleanup == false). Release refs on the
* folio so the page cache can reclaim it. We grabbed a
- * reference on this folio twice, first when we did a find_or_create_page()
+ * reference on this folio twice, first when we did a filemap_grab_folio()
* to obtain the folio to add it to the bio and second when we do a
* filemap_get_folio() here to get the folio to wait on while I/O on it is being
* completed.
@@ -474,13 +467,13 @@ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
folio = filemap_get_folio(jd->jd_inode->i_mapping, index);
folio_wait_locked(folio);
- if (folio_test_error(folio))
+ if (!folio_test_uptodate(folio))
*done = true;
if (!*done)
- *done = gfs2_jhead_pg_srch(jd, head, &folio->page);
+ *done = gfs2_jhead_folio_search(jd, head, folio);
- /* filemap_get_folio() and the earlier find_or_create_page() */
+ /* filemap_get_folio() and the earlier filemap_grab_folio() */
folio_put_refs(folio, 2);
}
@@ -491,7 +484,7 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO);
bio_clone_blkg_association(new, prev);
new->bi_iter.bi_sector = bio_end_sector(prev);
- bio_chain(new, prev);
+ bio_chain(prev, new);
submit_bio(prev);
return new;
}
@@ -500,15 +493,13 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
* gfs2_find_jhead - find the head of a log
* @jd: The journal descriptor
* @head: The log descriptor for the head of the log is returned here
- * @keep_cache: If set inode pages will not be truncated
*
* Do a search of a journal by reading it in large chunks using bios and find
* the valid log entry with the highest sequence number. (i.e. the log head)
*
* Returns: 0 on success, errno otherwise
*/
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
- bool keep_cache)
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct address_space *mapping = jd->jd_inode->i_mapping;
@@ -518,9 +509,9 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
unsigned int shift = PAGE_SHIFT - bsize_shift;
unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift;
struct gfs2_journal_extent *je;
- int sz, ret = 0;
+ int ret = 0;
struct bio *bio = NULL;
- struct page *page = NULL;
+ struct folio *folio = NULL;
bool done = false;
errseq_t since;
@@ -533,11 +524,11 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
u64 dblock = je->dblock;
for (; block < je->lblock + je->blocks; block++, dblock++) {
- if (!page) {
- page = find_or_create_page(mapping,
- block >> shift, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ if (!folio) {
+ folio = filemap_grab_folio(mapping,
+ block >> shift);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
done = true;
goto out;
}
@@ -548,8 +539,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
sector_t sector = dblock << sdp->sd_fsb2bb_shift;
if (bio_end_sector(bio) == sector) {
- sz = bio_add_page(bio, page, bsize, off);
- if (sz == bsize)
+ if (bio_add_folio(bio, folio, bsize, off))
goto block_added;
}
if (off) {
@@ -569,12 +559,11 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read);
bio->bi_opf = REQ_OP_READ;
add_block_to_new_bio:
- sz = bio_add_page(bio, page, bsize, off);
- BUG_ON(sz != bsize);
+ bio_add_folio_nofail(bio, folio, bsize, off);
block_added:
off += bsize;
- if (off == PAGE_SIZE)
- page = NULL;
+ if (off == folio_size(folio))
+ folio = NULL;
if (blocks_submitted <= blocks_read + max_blocks) {
/* Keep at least one bio in flight */
continue;
@@ -598,8 +587,7 @@ out:
if (!ret)
ret = filemap_check_wb_err(mapping, since);
- if (!keep_cache)
- truncate_inode_pages(mapping, 0);
+ truncate_inode_pages(mapping, 0);
return ret;
}
@@ -622,15 +610,13 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
static void gfs2_check_magic(struct buffer_head *bh)
{
- void *kaddr;
__be32 *ptr;
clear_buffer_escaped(bh);
- kaddr = kmap_atomic(bh->b_page);
- ptr = kaddr + bh_offset(bh);
+ ptr = kmap_local_folio(bh->b_folio, bh_offset(bh));
if (*ptr == cpu_to_be32(GFS2_MAGIC))
set_buffer_escaped(bh);
- kunmap_atomic(kaddr);
+ kunmap_local(ptr);
}
static int blocknr_cmp(void *priv, const struct list_head *a,
@@ -696,14 +682,12 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
lock_buffer(bd2->bd_bh);
if (buffer_escaped(bd2->bd_bh)) {
- void *kaddr;
+ void *p;
+
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
- ptr = page_address(page);
- kaddr = kmap_atomic(bd2->bd_bh->b_page);
- memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
- bd2->bd_bh->b_size);
- kunmap_atomic(kaddr);
- *(__be32 *)ptr = 0;
+ p = page_address(page);
+ memcpy_from_page(p, page, bh_offset(bd2->bd_bh), bd2->bd_bh->b_size);
+ *(__be32 *)p = 0;
clear_buffer_escaped(bd2->bd_bh);
unlock_buffer(bd2->bd_bh);
brelse(bd2->bd_bh);
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 1412ffba1d44..be740bf33666 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -11,16 +11,18 @@
#include "incore.h"
extern const struct gfs2_log_operations *gfs2_log_ops[];
-extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
-extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
-extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
- struct page *page, unsigned size, unsigned offset,
- u64 blkno);
-extern void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf);
-extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
-extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
- struct gfs2_log_header_host *head, bool keep_cache);
-extern void gfs2_drain_revokes(struct gfs2_sbd *sdp);
+
+void gfs2_log_incr_head(struct gfs2_sbd *sdp);
+u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
+void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ struct page *page, unsigned size, unsigned offset,
+ u64 blkno);
+void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf);
+void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+int gfs2_find_jhead(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head);
+void gfs2_drain_revokes(struct gfs2_sbd *sdp);
+
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
return sdp->sd_ldptrs;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index afcb32854f14..9d65719353fa 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
- spin_lock_init(&gl->gl_lockref.lock);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
@@ -111,7 +110,6 @@ static int __init init_gfs2_fs(void)
gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
sizeof(struct gfs2_inode),
0, SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|
SLAB_ACCOUNT,
gfs2_init_inode_once);
if (!gfs2_inode_cachep)
@@ -147,14 +145,15 @@ static int __init init_gfs2_fs(void)
if (!gfs2_trans_cachep)
goto fail_cachep8;
- error = register_shrinker(&gfs2_qd_shrinker, "gfs2-qd");
+ error = gfs2_qd_shrinker_init();
if (error)
goto fail_shrinker;
error = -ENOMEM;
- gfs_recovery_wq = alloc_workqueue("gfs_recovery",
- WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
- if (!gfs_recovery_wq)
+ gfs2_recovery_wq = alloc_workqueue("gfs2_recovery",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU,
+ 0);
+ if (!gfs2_recovery_wq)
goto fail_wq1;
gfs2_control_wq = alloc_workqueue("gfs2_control",
@@ -162,7 +161,7 @@ static int __init init_gfs2_fs(void)
if (!gfs2_control_wq)
goto fail_wq2;
- gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+ gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", WQ_PERCPU, 0);
if (!gfs2_freeze_wq)
goto fail_wq3;
@@ -194,9 +193,9 @@ fail_mempool:
fail_wq3:
destroy_workqueue(gfs2_control_wq);
fail_wq2:
- destroy_workqueue(gfs_recovery_wq);
+ destroy_workqueue(gfs2_recovery_wq);
fail_wq1:
- unregister_shrinker(&gfs2_qd_shrinker);
+ gfs2_qd_shrinker_exit();
fail_shrinker:
kmem_cache_destroy(gfs2_trans_cachep);
fail_cachep8:
@@ -229,12 +228,12 @@ fail_lru:
static void __exit exit_gfs2_fs(void)
{
- unregister_shrinker(&gfs2_qd_shrinker);
+ gfs2_qd_shrinker_exit();
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
- destroy_workqueue(gfs_recovery_wq);
+ destroy_workqueue(gfs2_recovery_wq);
destroy_workqueue(gfs2_control_wq);
destroy_workqueue(gfs2_freeze_wq);
list_lru_destroy(&gfs2_qd_lru);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3c41b864ee5b..e4356198d8d8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -30,16 +30,16 @@
#include "util.h"
#include "trace_gfs2.h"
-static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
+static void gfs2_aspace_write_folio(struct folio *folio,
+ struct writeback_control *wbc)
{
struct buffer_head *bh, *head;
int nr_underway = 0;
blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
- BUG_ON(!PageLocked(page));
- BUG_ON(!page_has_buffers(page));
+ BUG_ON(!folio_test_locked(folio));
- head = page_buffers(page);
+ head = folio_buffers(folio);
bh = head;
do {
@@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
if (wbc->sync_mode != WB_SYNC_NONE) {
lock_buffer(bh);
} else if (!trylock_buffer(bh)) {
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
continue;
}
if (test_clear_buffer_dirty(bh)) {
@@ -66,11 +66,11 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
} while ((bh = bh->b_this_page) != head);
/*
- * The page and its buffers are protected by PageWriteback(), so we can
- * drop the bh refcounts early.
+ * The folio and its buffers are protected from truncation by
+ * the writeback flag, so we can drop the bh refcounts early.
*/
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
do {
struct buffer_head *next = bh->b_this_page;
@@ -80,26 +80,38 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
}
bh = next;
} while (bh != head);
- unlock_page(page);
+ folio_unlock(folio);
if (nr_underway == 0)
- end_page_writeback(page);
+ folio_end_writeback(folio);
+}
- return 0;
+static int gfs2_aspace_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct folio *folio = NULL;
+ int error;
+
+ while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ gfs2_aspace_write_folio(folio, wbc);
+
+ return error;
}
const struct address_space_operations gfs2_meta_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
- .writepage = gfs2_aspace_writepage,
+ .writepages = gfs2_aspace_writepages,
.release_folio = gfs2_release_folio,
+ .migrate_folio = buffer_migrate_folio_norefs,
};
const struct address_space_operations gfs2_rgrp_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
- .writepage = gfs2_aspace_writepage,
+ .writepages = gfs2_aspace_writepages,
.release_folio = gfs2_release_folio,
+ .migrate_folio = buffer_migrate_folio_norefs,
};
/**
@@ -115,50 +127,45 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
unsigned int shift;
unsigned long index;
unsigned int bufnum;
if (mapping == NULL)
- mapping = &sdp->sd_aspace;
+ mapping = gfs2_aspace(sdp);
shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
bufnum = blkno - (index << shift); /* block buf index within page */
if (create) {
- for (;;) {
- page = grab_cache_page(mapping, index);
- if (page)
- break;
- yield();
- }
- if (!page_has_buffers(page))
- create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(mapping) | __GFP_NOFAIL);
+ bh = folio_buffers(folio);
+ if (!bh)
+ bh = create_empty_buffers(folio,
+ sdp->sd_sb.sb_bsize, 0);
} else {
- page = find_get_page_flags(mapping, index,
- FGP_LOCK|FGP_ACCESSED);
- if (!page)
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED, 0);
+ if (IS_ERR(folio))
return NULL;
- if (!page_has_buffers(page)) {
- bh = NULL;
- goto out_unlock;
- }
+ bh = folio_buffers(folio);
}
- /* Locate header for our buffer within our page */
- for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
- /* Do nothing */;
- get_bh(bh);
+ if (!bh)
+ goto out_unlock;
+ bh = get_nth_bh(bh, bufnum);
if (!buffer_mapped(bh))
map_bh(bh, sdp->sd_vfs, blkno);
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return bh;
}
@@ -193,15 +200,14 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
static void gfs2_meta_read_endio(struct bio *bio)
{
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
- struct buffer_head *bh = page_buffers(page);
- unsigned int len = bvec->bv_len;
+ bio_for_each_folio_all(fi, bio) {
+ struct folio *folio = fi.folio;
+ struct buffer_head *bh = folio_buffers(folio);
+ size_t len = fi.length;
- while (bh_offset(bh) < bvec->bv_offset)
+ while (bh_offset(bh) < fi.offset)
bh = bh->b_this_page;
do {
struct buffer_head *next = bh->b_this_page;
@@ -224,10 +230,10 @@ static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num)
struct bio *bio;
bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO);
- bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> SECTOR_SHIFT);
while (num > 0) {
bh = *bhs;
- if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
+ if (!bio_add_folio(bio, bh->b_folio, bh->b_size, bh_offset(bh))) {
BUG_ON(bio->bi_iter.bi_size == 0);
break;
}
@@ -257,7 +263,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) {
+ if (gfs2_withdrawn(sdp)) {
*bhp = NULL;
return -EIO;
}
@@ -296,7 +302,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
if (unlikely(!buffer_uptodate(bh))) {
struct gfs2_trans *tr = current->journal_info;
if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
- gfs2_io_error_bh_wd(sdp, bh);
+ gfs2_io_error_bh(sdp, bh);
brelse(bh);
*bhp = NULL;
return -EIO;
@@ -315,7 +321,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
- if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
+ if (gfs2_withdrawn(sdp))
return -EIO;
wait_on_buffer(bh);
@@ -323,10 +329,10 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (!buffer_uptodate(bh)) {
struct gfs2_trans *tr = current->journal_info;
if (tr && test_bit(TR_TOUCHED, &tr->tr_flags))
- gfs2_io_error_bh_wd(sdp, bh);
+ gfs2_io_error_bh(sdp, bh);
return -EIO;
}
- if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
+ if (gfs2_withdrawn(sdp))
return -EIO;
return 0;
@@ -334,7 +340,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
{
- struct address_space *mapping = bh->b_page->mapping;
+ struct address_space *mapping = bh->b_folio->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
struct gfs2_trans *tr = current->journal_info;
@@ -405,26 +411,20 @@ static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
{
struct address_space *mapping = ip->i_inode.i_mapping;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
unsigned long index = blkno >> shift; /* convert block to page */
unsigned int bufnum = blkno - (index << shift);
- page = find_get_page_flags(mapping, index, FGP_LOCK|FGP_ACCESSED);
- if (!page)
- return NULL;
- if (!page_has_buffers(page)) {
- unlock_page(page);
- put_page(page);
+ folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED, 0);
+ if (IS_ERR(folio))
return NULL;
- }
- /* Locate header for our buffer within our page */
- for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
- /* Do nothing */;
- get_bh(bh);
- unlock_page(page);
- put_page(page);
+ bh = folio_buffers(folio);
+ if (bh)
+ bh = get_nth_bh(bh, bufnum);
+ folio_unlock(folio);
+ folio_put(folio);
return bh;
}
@@ -442,11 +442,9 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
struct buffer_head *bh;
int ty;
- if (!ip->i_gl) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ /* This can only happen during incomplete inode creation. */
+ if (!ip->i_gl)
return;
- }
gfs2_ail1_wipe(sdp, bstart, blen);
while (blen) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index d0a58cdd433a..b7c8a6684d02 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -44,27 +44,25 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
struct gfs2_glock_aspace *gla =
container_of(mapping, struct gfs2_glock_aspace, mapping);
return gla->glock.gl_name.ln_sbd;
- } else if (mapping->a_ops == &gfs2_rgrp_aops)
- return container_of(mapping, struct gfs2_sbd, sd_aspace);
- else
+ } else
return inode->i_sb->s_fs_info;
}
-extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
-extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
- int rahead, struct buffer_head **bhp);
-extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
-extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
- int create);
+struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
+int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
+ int rahead, struct buffer_head **bhp);
+int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
+struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
+ int create);
enum {
REMOVE_JDATA = 0,
REMOVE_META = 1,
};
-extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
-extern void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
- struct buffer_head **bhp);
+void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
+void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
+int gfs2_meta_buffer(struct gfs2_inode *ip, u32 mtype, u64 num,
+ struct buffer_head **bhp);
static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
struct buffer_head **bhp)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c0cf1d2d0ef5..e7a88b717991 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -60,19 +60,21 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_new_files_jdata = 0;
gt->gt_max_readahead = BIT(18);
gt->gt_complain_secs = 10;
+ gt->gt_withdraw_helper_timeout = 5;
}
void free_sbd(struct gfs2_sbd *sdp)
{
- if (sdp->sd_lkstats)
- free_percpu(sdp->sd_lkstats);
+ struct super_block *sb = sdp->sd_vfs;
+
+ free_percpu(sdp->sd_lkstats);
+ sb->s_fs_info = NULL;
kfree(sdp);
}
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
- struct address_space *mapping;
sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
if (!sdp)
@@ -87,11 +89,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
gfs2_tune_init(&sdp->sd_tune);
- init_waitqueue_head(&sdp->sd_glock_wait);
+ init_waitqueue_head(&sdp->sd_kill_wait);
init_waitqueue_head(&sdp->sd_async_glock_wait);
atomic_set(&sdp->sd_glock_disposal, 0);
init_completion(&sdp->sd_locking_init);
- init_completion(&sdp->sd_wdack);
+ init_completion(&sdp->sd_withdraw_helper);
spin_lock_init(&sdp->sd_statfs_spin);
spin_lock_init(&sdp->sd_rindex_spin);
@@ -103,23 +105,12 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_completion(&sdp->sd_journal_ready);
INIT_LIST_HEAD(&sdp->sd_quota_list);
- mutex_init(&sdp->sd_quota_mutex);
mutex_init(&sdp->sd_quota_sync_mutex);
init_waitqueue_head(&sdp->sd_quota_wait);
spin_lock_init(&sdp->sd_bitmap_lock);
INIT_LIST_HEAD(&sdp->sd_sc_inodes_list);
- mapping = &sdp->sd_aspace;
-
- address_space_init_once(mapping);
- mapping->a_ops = &gfs2_rgrp_aops;
- mapping->host = sb->s_bdev->bd_inode;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->private_data = NULL;
- mapping->writeback_index = 0;
-
spin_lock_init(&sdp->sd_log_lock);
atomic_set(&sdp->sd_log_pinned, 0);
INIT_LIST_HEAD(&sdp->sd_log_revokes);
@@ -135,8 +126,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_rwsem(&sdp->sd_log_flush_lock);
atomic_set(&sdp->sd_log_in_flight, 0);
init_waitqueue_head(&sdp->sd_log_flush_wait);
- atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
mutex_init(&sdp->sd_freeze_mutex);
+ INIT_LIST_HEAD(&sdp->sd_dead_glocks);
return sdp;
@@ -173,7 +164,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ if (sb->sb_bsize < SECTOR_SIZE || sb->sb_bsize > PAGE_SIZE ||
(sb->sb_bsize & (sb->sb_bsize - 1))) {
pr_warn("Invalid block size\n");
return -EINVAL;
@@ -185,22 +176,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return 0;
}
-static void end_bio_io_page(struct bio *bio)
-{
- struct page *page = bio->bi_private;
-
- if (!bio->bi_status)
- SetPageUptodate(page);
- else
- pr_warn("error %d reading superblock\n", bio->bi_status);
- unlock_page(page);
-}
-
-static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
+static void gfs2_sb_in(struct gfs2_sbd *sdp, const struct gfs2_sb *str)
{
struct gfs2_sb_host *sb = &sdp->sd_sb;
struct super_block *s = sdp->sd_vfs;
- const struct gfs2_sb *str = buf;
sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
@@ -215,7 +194,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
- memcpy(&s->s_uuid, str->sb_uuid, 16);
+ super_set_uuid(s, str->sb_uuid, 16);
}
/**
@@ -239,36 +218,22 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
{
- struct super_block *sb = sdp->sd_vfs;
- struct gfs2_sb *p;
- struct page *page;
- struct bio *bio;
+ struct gfs2_sb *sb;
+ int err;
- page = alloc_page(GFP_NOFS);
- if (unlikely(!page))
+ sb = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (unlikely(!sb))
return -ENOMEM;
-
- ClearPageUptodate(page);
- ClearPageDirty(page);
- lock_page(page);
-
- bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS);
- bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
- bio_add_page(bio, page, PAGE_SIZE, 0);
-
- bio->bi_end_io = end_bio_io_page;
- bio->bi_private = page;
- submit_bio(bio);
- wait_on_page_locked(page);
- bio_put(bio);
- if (!PageUptodate(page)) {
- __free_page(page);
- return -EIO;
- }
- p = kmap(page);
- gfs2_sb_in(sdp, p);
- kunmap(page);
- __free_page(page);
+ err = bdev_rw_virt(sdp->sd_vfs->s_bdev,
+ sector << (sdp->sd_vfs->s_blocksize_bits - SECTOR_SHIFT),
+ sb, PAGE_SIZE, REQ_OP_READ | REQ_META);
+ if (err) {
+ pr_warn("error %d reading superblock\n", err);
+ kfree(sb);
+ return err;
+ }
+ gfs2_sb_in(sdp, sb);
+ kfree(sb);
return gfs2_check_sb(sdp, silent);
}
@@ -293,8 +258,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
return error;
}
- sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
- GFS2_BASIC_BLOCK_SHIFT;
+ sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT;
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode)) / sizeof(u64);
@@ -407,7 +371,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_nq_num(sdp,
GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID,
+ LM_FLAG_RECOVER | GL_NOCACHE | GL_NOPID,
mount_gh);
if (error) {
fs_err(sdp, "can't acquire mount glock: %d\n", error);
@@ -417,7 +381,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_nq_num(sdp,
GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
+ LM_FLAG_RECOVER | GL_EXACT | GL_NOPID,
&sdp->sd_live_gh);
if (error) {
fs_err(sdp, "can't acquire live glock: %d\n", error);
@@ -434,7 +398,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops,
CREATE, &sdp->sd_freeze_gl);
if (error) {
- fs_err(sdp, "can't create transaction glock: %d\n", error);
+ fs_err(sdp, "can't create freeze glock: %d\n", error);
goto fail_rename;
}
@@ -522,7 +486,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
goto out;
}
- sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
+ ret = -EINVAL;
+ if (!sb_set_blocksize(sb, sdp->sd_sb.sb_bsize))
+ goto out;
/* Get the root inode */
no_addr = sdp->sd_sb.sb_root_dir.no_addr;
@@ -577,8 +543,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
mutex_lock(&sdp->sd_jindex_mutex);
for (;;) {
- struct gfs2_inode *jip;
-
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
if (error)
break;
@@ -619,8 +583,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
d_mark_dontcache(jd->jd_inode);
spin_lock(&sdp->sd_jindex_spin);
jd->jd_jid = sdp->sd_journals++;
- jip = GFS2_I(jd->jd_inode);
- jd->jd_no_addr = jip->i_no_addr;
list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
spin_unlock(&sdp->sd_jindex_spin);
}
@@ -649,7 +611,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
struct gfs2_jdesc *jd;
struct gfs2_inode *ip;
- sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
+ sdp->sd_statfs_inode = gfs2_lookup_meta(master, "statfs");
if (IS_ERR(sdp->sd_statfs_inode)) {
error = PTR_ERR(sdp->sd_statfs_inode);
fs_err(sdp, "can't read in statfs inode: %d\n", error);
@@ -658,7 +620,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
if (sdp->sd_args.ar_spectator)
goto out;
- pn = gfs2_lookup_simple(master, "per_node");
+ pn = gfs2_lookup_meta(master, "per_node");
if (IS_ERR(pn)) {
error = PTR_ERR(pn);
fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -675,7 +637,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
goto free_local;
}
sprintf(buf, "statfs_change%u", jd->jd_jid);
- lsi->si_sc_inode = gfs2_lookup_simple(pn, buf);
+ lsi->si_sc_inode = gfs2_lookup_meta(pn, buf);
if (IS_ERR(lsi->si_sc_inode)) {
error = PTR_ERR(lsi->si_sc_inode);
fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
@@ -734,15 +696,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
struct inode *master = d_inode(sdp->sd_master_dir);
struct gfs2_holder ji_gh;
struct gfs2_inode *ip;
- int jindex = 1;
int error = 0;
- if (undo) {
- jindex = 0;
+ gfs2_holder_mark_uninitialized(&ji_gh);
+ if (undo)
goto fail_statfs;
- }
- sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
+ sdp->sd_jindex = gfs2_lookup_meta(master, "jindex");
if (IS_ERR(sdp->sd_jindex)) {
fs_err(sdp, "can't lookup journal index: %d\n", error);
return PTR_ERR(sdp->sd_jindex);
@@ -782,7 +742,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
&gfs2_journal_glops,
LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID,
+ LM_FLAG_RECOVER | GL_NOPID,
&sdp->sd_journal_gh);
if (error) {
fs_err(sdp, "can't acquire journal glock: %d\n", error);
@@ -790,9 +750,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
}
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
- sdp->sd_jinode_gl = ip->i_gl;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT |
+ LM_FLAG_RECOVER | GL_EXACT |
GL_NOCACHE | GL_NOPID,
&sdp->sd_jinode_gh);
if (error) {
@@ -852,24 +811,20 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
sdp->sd_log_idle = 1;
set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
gfs2_glock_dq_uninit(&ji_gh);
- jindex = 0;
INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
return 0;
fail_statfs:
uninit_statfs(sdp);
fail_jinode_gh:
- /* A withdraw may have done dq/uninit so now we need to check it */
- if (!sdp->sd_args.ar_spectator &&
- gfs2_holder_initialized(&sdp->sd_jinode_gh))
+ if (!sdp->sd_args.ar_spectator)
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
fail_journal_gh:
- if (!sdp->sd_args.ar_spectator &&
- gfs2_holder_initialized(&sdp->sd_journal_gh))
+ if (!sdp->sd_args.ar_spectator)
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
fail_jindex:
gfs2_jindex_free(sdp);
- if (jindex)
+ if (gfs2_holder_initialized(&ji_gh))
gfs2_glock_dq_uninit(&ji_gh);
fail:
iput(sdp->sd_jindex);
@@ -892,7 +847,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
goto fail;
/* Read in the resource index inode */
- sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
+ sdp->sd_rindex = gfs2_lookup_meta(master, "rindex");
if (IS_ERR(sdp->sd_rindex)) {
error = PTR_ERR(sdp->sd_rindex);
fs_err(sdp, "can't get resource index inode: %d\n", error);
@@ -901,7 +856,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
sdp->sd_rindex_uptodate = 0;
/* Read in the quota inode */
- sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
+ sdp->sd_quota_inode = gfs2_lookup_meta(master, "quota");
if (IS_ERR(sdp->sd_quota_inode)) {
error = PTR_ERR(sdp->sd_quota_inode);
fs_err(sdp, "can't get quota file inode: %d\n", error);
@@ -945,7 +900,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
if (undo)
goto fail_qc_gh;
- pn = gfs2_lookup_simple(master, "per_node");
+ pn = gfs2_lookup_meta(master, "per_node");
if (IS_ERR(pn)) {
error = PTR_ERR(pn);
fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -953,7 +908,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
}
sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
- sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
+ sdp->sd_qc_inode = gfs2_lookup_meta(pn, buf);
if (IS_ERR(sdp->sd_qc_inode)) {
error = PTR_ERR(sdp->sd_qc_inode);
fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
@@ -1078,8 +1033,8 @@ hostdata_error:
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
- if (likely(!gfs2_withdrawn(sdp)) && lm->lm_unmount)
- lm->lm_unmount(sdp);
+ if (!gfs2_withdrawn(sdp) && lm->lm_unmount)
+ lm->lm_unmount(sdp, true);
}
static int wait_on_journal(struct gfs2_sbd *sdp)
@@ -1107,29 +1062,46 @@ static int init_threads(struct gfs2_sbd *sdp)
struct task_struct *p;
int error = 0;
- p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+ p = kthread_create(gfs2_logd, sdp, "gfs2_logd/%s", sdp->sd_fsname);
if (IS_ERR(p)) {
error = PTR_ERR(p);
- fs_err(sdp, "can't start logd thread: %d\n", error);
+ fs_err(sdp, "can't create logd thread: %d\n", error);
return error;
}
+ get_task_struct(p);
sdp->sd_logd_process = p;
- p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+ p = kthread_create(gfs2_quotad, sdp, "gfs2_quotad/%s", sdp->sd_fsname);
if (IS_ERR(p)) {
error = PTR_ERR(p);
- fs_err(sdp, "can't start quotad thread: %d\n", error);
+ fs_err(sdp, "can't create quotad thread: %d\n", error);
goto fail;
}
+ get_task_struct(p);
sdp->sd_quotad_process = p;
+
+ wake_up_process(sdp->sd_logd_process);
+ wake_up_process(sdp->sd_quotad_process);
return 0;
fail:
- kthread_stop(sdp->sd_logd_process);
+ kthread_stop_put(sdp->sd_logd_process);
sdp->sd_logd_process = NULL;
return error;
}
+void gfs2_destroy_threads(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_logd_process) {
+ kthread_stop_put(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
+ }
+ if (sdp->sd_quotad_process) {
+ kthread_stop_put(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ }
+}
+
/**
* gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
@@ -1143,7 +1115,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
- struct gfs2_holder freeze_gh;
+ struct address_space *mapping;
int error;
sdp = init_sbd(sb);
@@ -1165,7 +1137,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOSEC;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
- sb->s_d_op = &gfs2_dops;
+
+ set_default_d_op(sb, &gfs2_dops);
sb->s_export_op = &gfs2_export_ops;
sb->s_qcop = &gfs2_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
@@ -1175,10 +1148,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
/* Set up the buffer cache and fill in some fake block size values
to allow us to read-in the on-disk superblock. */
- sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
+ sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, SECTOR_SIZE);
+ error = -EINVAL;
+ if (!sdp->sd_sb.sb_bsize)
+ goto fail_free;
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
- sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
- GFS2_BASIC_BLOCK_SHIFT;
+ sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - SECTOR_SHIFT;
sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
@@ -1191,15 +1166,41 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sdp->sd_tune.gt_statfs_quantum = 30;
}
+ /* Set up an address space for metadata writes */
+ sdp->sd_inode = new_inode(sb);
+ error = -ENOMEM;
+ if (!sdp->sd_inode)
+ goto fail_free;
+ sdp->sd_inode->i_ino = GFS2_BAD_INO;
+ sdp->sd_inode->i_size = OFFSET_MAX;
+
+ mapping = gfs2_aspace(sdp);
+ mapping->a_ops = &gfs2_rgrp_aops;
+ gfs2_setup_inode(sdp->sd_inode);
+
error = init_names(sdp, silent);
if (error)
- goto fail_free;
+ goto fail_iput;
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
+ error = -ENOMEM;
+ sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE | WQ_PERCPU,
+ 0,
+ sdp->sd_fsname);
+ if (!sdp->sd_glock_wq)
+ goto fail_iput;
+
+ sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, 0,
+ sdp->sd_fsname);
+ if (!sdp->sd_delete_wq)
+ goto fail_glock_wq;
+
error = gfs2_sys_fs_add(sdp);
if (error)
- goto fail_free;
+ goto fail_delete_wq;
gfs2_create_debugfs_file(sdp);
@@ -1207,6 +1208,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
if (error)
goto fail_debug;
+ INIT_WORK(&sdp->sd_withdraw_work, gfs2_withdraw_func);
+
error = init_locking(sdp, &mount_gh, DO);
if (error)
goto fail_lm;
@@ -1260,27 +1263,20 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
if (!sb_rdonly(sb)) {
error = init_threads(sdp);
- if (error) {
- gfs2_withdraw_delayed(sdp);
+ if (error)
goto fail_per_node;
- }
}
- error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
+ error = gfs2_freeze_lock_shared(sdp);
if (error)
goto fail_per_node;
if (!sb_rdonly(sb))
error = gfs2_make_fs_rw(sdp);
- gfs2_freeze_unlock(&freeze_gh);
if (error) {
- if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ gfs2_freeze_unlock(sdp);
+ gfs2_destroy_threads(sdp);
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_per_node;
}
@@ -1309,9 +1305,15 @@ fail_lm:
fail_debug:
gfs2_delete_debugfs_file(sdp);
gfs2_sys_fs_del(sdp);
+fail_delete_wq:
+ destroy_workqueue(sdp->sd_delete_wq);
+fail_glock_wq:
+ if (sdp->sd_glock_wq)
+ destroy_workqueue(sdp->sd_glock_wq);
+fail_iput:
+ iput(sdp->sd_inode);
fail_free:
free_sbd(sdp);
- sb->s_fs_info = NULL;
return error;
}
@@ -1378,6 +1380,7 @@ static const struct constant_table gfs2_param_quota[] = {
{"off", GFS2_QUOTA_OFF},
{"account", GFS2_QUOTA_ACCOUNT},
{"on", GFS2_QUOTA_ON},
+ {"quiet", GFS2_QUOTA_QUIET},
{}
};
@@ -1393,12 +1396,14 @@ static const struct constant_table gfs2_param_data[] = {
};
enum opt_errors {
- Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
- Opt_errors_panic = GFS2_ERRORS_PANIC,
+ Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
+ Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE,
+ Opt_errors_panic = GFS2_ERRORS_PANIC,
};
static const struct constant_table gfs2_param_errors[] = {
{"withdraw", Opt_errors_withdraw },
+ {"deactivate", Opt_errors_deactivate },
{"panic", Opt_errors_panic },
{}
};
@@ -1585,12 +1590,6 @@ static int gfs2_reconfigure(struct fs_context *fc)
fc->sb_flags |= SB_RDONLY;
if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
- struct gfs2_holder freeze_gh;
-
- error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
- if (error)
- return -EINVAL;
-
if (fc->sb_flags & SB_RDONLY) {
gfs2_make_fs_ro(sdp);
} else {
@@ -1598,7 +1597,6 @@ static int gfs2_reconfigure(struct fs_context *fc)
if (error)
errorfc(fc, "unable to remount read-write");
}
- gfs2_freeze_unlock(&freeze_gh);
}
sdp->sd_args = *newargs;
@@ -1720,6 +1718,55 @@ static int gfs2_meta_init_fs_context(struct fs_context *fc)
return 0;
}
+/**
+ * gfs2_evict_inodes - evict inodes cooperatively
+ * @sb: the superblock
+ *
+ * When evicting an inode with a zero link count, we are trying to upgrade the
+ * inode's iopen glock from SH to EX mode in order to determine if we can
+ * delete the inode. The other nodes are supposed to evict the inode from
+ * their caches if they can, and to poke the inode's inode glock if they cannot
+ * do so. Either behavior allows gfs2_upgrade_iopen_glock() to proceed
+ * quickly, but if the other nodes are not cooperating, the lock upgrading
+ * attempt will time out. Since inodes are evicted sequentially, this can add
+ * up quickly.
+ *
+ * Function evict_inodes() tries to keep the s_inode_list_lock list locked over
+ * a long time, which prevents other inodes from being evicted concurrently.
+ * This precludes the cooperative behavior we are looking for. This special
+ * version of evict_inodes() avoids that.
+ *
+ * Modeled after drop_pagecache_sb().
+ */
+static void gfs2_evict_inodes(struct super_block *sb)
+{
+ struct inode *inode, *toput_inode = NULL;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+
+ set_bit(SDF_EVICTING, &sdp->sd_flags);
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ spin_lock(&inode->i_lock);
+ if ((inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_NEW)) &&
+ !need_resched()) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&sb->s_inode_list_lock);
+
+ iput(toput_inode);
+ toput_inode = inode;
+
+ cond_resched();
+ spin_lock(&sb->s_inode_list_lock);
+ }
+ spin_unlock(&sb->s_inode_list_lock);
+ iput(toput_inode);
+}
+
static void gfs2_kill_sb(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1735,6 +1782,18 @@ static void gfs2_kill_sb(struct super_block *sb)
sdp->sd_root_dir = NULL;
sdp->sd_master_dir = NULL;
shrink_dcache_sb(sb);
+
+ gfs2_evict_inodes(sb);
+
+ /*
+ * Flush and then drain the delete workqueue here (via
+ * destroy_workqueue()) to ensure that any delete work that
+ * may be running will also see the SDF_KILL flag.
+ */
+ set_bit(SDF_KILL, &sdp->sd_flags);
+ gfs2_flush_delete_work(sdp);
+ destroy_workqueue(sdp->sd_delete_wq);
+
kill_block_super(sb);
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 1ed17226d9ed..b1692f12a602 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -106,58 +106,68 @@ static inline void spin_unlock_bucket(unsigned int hash)
static void gfs2_qd_dealloc(struct rcu_head *rcu)
{
struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
+ struct gfs2_sbd *sdp = qd->qd_sbd;
+
kmem_cache_free(gfs2_quotad_cachep, qd);
+ if (atomic_dec_and_test(&sdp->sd_quota_count))
+ wake_up(&sdp->sd_kill_wait);
}
-static void gfs2_qd_dispose(struct list_head *list)
+static void gfs2_qd_dispose(struct gfs2_quota_data *qd)
{
- struct gfs2_quota_data *qd;
- struct gfs2_sbd *sdp;
-
- while (!list_empty(list)) {
- qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
- sdp = qd->qd_gl->gl_name.ln_sbd;
-
- list_del(&qd->qd_lru);
+ struct gfs2_sbd *sdp = qd->qd_sbd;
- /* Free from the filesystem-specific list */
- spin_lock(&qd_lock);
- list_del(&qd->qd_list);
- spin_unlock(&qd_lock);
+ spin_lock(&qd_lock);
+ list_del(&qd->qd_list);
+ spin_unlock(&qd_lock);
- spin_lock_bucket(qd->qd_hash);
- hlist_bl_del_rcu(&qd->qd_hlist);
- spin_unlock_bucket(qd->qd_hash);
+ spin_lock_bucket(qd->qd_hash);
+ hlist_bl_del_rcu(&qd->qd_hlist);
+ spin_unlock_bucket(qd->qd_hash);
+ if (!gfs2_withdrawn(sdp)) {
gfs2_assert_warn(sdp, !qd->qd_change);
- gfs2_assert_warn(sdp, !qd->qd_slot_count);
+ gfs2_assert_warn(sdp, !qd->qd_slot_ref);
gfs2_assert_warn(sdp, !qd->qd_bh_count);
+ }
- gfs2_glock_put(qd->qd_gl);
- atomic_dec(&sdp->sd_quota_count);
+ gfs2_glock_put(qd->qd_gl);
+ call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+}
+
+static void gfs2_qd_list_dispose(struct list_head *list)
+{
+ struct gfs2_quota_data *qd;
+
+ while (!list_empty(list)) {
+ qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
+ list_del(&qd->qd_lru);
- /* Delete it from the common reclaim list */
- call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+ gfs2_qd_dispose(qd);
}
}
static enum lru_status gfs2_qd_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
- struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru);
+ struct gfs2_quota_data *qd =
+ list_entry(item, struct gfs2_quota_data, qd_lru);
+ enum lru_status status;
if (!spin_trylock(&qd->qd_lockref.lock))
return LRU_SKIP;
+ status = LRU_SKIP;
if (qd->qd_lockref.count == 0) {
lockref_mark_dead(&qd->qd_lockref);
list_lru_isolate_move(lru, &qd->qd_lru, dispose);
+ status = LRU_REMOVED;
}
spin_unlock(&qd->qd_lockref.lock);
- return LRU_REMOVED;
+ return status;
}
static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
@@ -172,7 +182,7 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
freed = list_lru_shrink_walk(&gfs2_qd_lru, sc,
gfs2_qd_isolate, &dispose);
- gfs2_qd_dispose(&dispose);
+ gfs2_qd_list_dispose(&dispose);
return freed;
}
@@ -183,13 +193,26 @@ static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc));
}
-struct shrinker gfs2_qd_shrinker = {
- .count_objects = gfs2_qd_shrink_count,
- .scan_objects = gfs2_qd_shrink_scan,
- .seeks = DEFAULT_SEEKS,
- .flags = SHRINKER_NUMA_AWARE,
-};
+static struct shrinker *gfs2_qd_shrinker;
+
+int __init gfs2_qd_shrinker_init(void)
+{
+ gfs2_qd_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "gfs2-qd");
+ if (!gfs2_qd_shrinker)
+ return -ENOMEM;
+
+ gfs2_qd_shrinker->count_objects = gfs2_qd_shrink_count;
+ gfs2_qd_shrinker->scan_objects = gfs2_qd_shrink_scan;
+
+ shrinker_register(gfs2_qd_shrinker);
+
+ return 0;
+}
+void gfs2_qd_shrinker_exit(void)
+{
+ shrinker_free(gfs2_qd_shrinker);
+}
static u64 qd2index(struct gfs2_quota_data *qd)
{
@@ -200,12 +223,7 @@ static u64 qd2index(struct gfs2_quota_data *qd)
static u64 qd2offset(struct gfs2_quota_data *qd)
{
- u64 offset;
-
- offset = qd2index(qd);
- offset *= sizeof(struct gfs2_quota);
-
- return offset;
+ return qd2index(qd) * sizeof(struct gfs2_quota);
}
static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
@@ -218,8 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str
return NULL;
qd->qd_sbd = sdp;
- qd->qd_lockref.count = 1;
- spin_lock_init(&qd->qd_lockref.lock);
+ lockref_init(&qd->qd_lockref);
qd->qd_id = qid;
qd->qd_slot = -1;
INIT_LIST_HEAD(&qd->qd_lru);
@@ -250,7 +267,7 @@ static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash,
if (qd->qd_sbd != sdp)
continue;
if (lockref_get_not_dead(&qd->qd_lockref)) {
- list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
+ list_lru_del_obj(&gfs2_qd_lru, &qd->qd_lru);
return qd;
}
}
@@ -297,22 +314,33 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
}
-static void qd_hold(struct gfs2_quota_data *qd)
+static void __qd_hold(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
- gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref));
- lockref_get(&qd->qd_lockref);
+ struct gfs2_sbd *sdp = qd->qd_sbd;
+ gfs2_assert(sdp, qd->qd_lockref.count > 0);
+ qd->qd_lockref.count++;
}
static void qd_put(struct gfs2_quota_data *qd)
{
+ struct gfs2_sbd *sdp;
+
if (lockref_put_or_lock(&qd->qd_lockref))
return;
+ BUG_ON(__lockref_is_dead(&qd->qd_lockref));
+ sdp = qd->qd_sbd;
+ if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+ lockref_mark_dead(&qd->qd_lockref);
+ spin_unlock(&qd->qd_lockref.lock);
+
+ gfs2_qd_dispose(qd);
+ return;
+ }
+
qd->qd_lockref.count = 0;
- list_lru_add(&gfs2_qd_lru, &qd->qd_lru);
+ list_lru_add_obj(&gfs2_qd_lru, &qd->qd_lru);
spin_unlock(&qd->qd_lockref.lock);
-
}
static int slot_get(struct gfs2_quota_data *qd)
@@ -322,20 +350,19 @@ static int slot_get(struct gfs2_quota_data *qd)
int error = 0;
spin_lock(&sdp->sd_bitmap_lock);
- if (qd->qd_slot_count != 0)
- goto out;
-
- error = -ENOSPC;
- bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots);
- if (bit < sdp->sd_quota_slots) {
+ if (qd->qd_slot_ref == 0) {
+ bit = find_first_zero_bit(sdp->sd_quota_bitmap,
+ sdp->sd_quota_slots);
+ if (bit >= sdp->sd_quota_slots) {
+ error = -ENOSPC;
+ goto out;
+ }
set_bit(bit, sdp->sd_quota_bitmap);
qd->qd_slot = bit;
- error = 0;
-out:
- qd->qd_slot_count++;
}
+ qd->qd_slot_ref++;
+out:
spin_unlock(&sdp->sd_bitmap_lock);
-
return error;
}
@@ -344,8 +371,8 @@ static void slot_hold(struct gfs2_quota_data *qd)
struct gfs2_sbd *sdp = qd->qd_sbd;
spin_lock(&sdp->sd_bitmap_lock);
- gfs2_assert(sdp, qd->qd_slot_count);
- qd->qd_slot_count++;
+ gfs2_assert(sdp, qd->qd_slot_ref);
+ qd->qd_slot_ref++;
spin_unlock(&sdp->sd_bitmap_lock);
}
@@ -354,8 +381,8 @@ static void slot_put(struct gfs2_quota_data *qd)
struct gfs2_sbd *sdp = qd->qd_sbd;
spin_lock(&sdp->sd_bitmap_lock);
- gfs2_assert(sdp, qd->qd_slot_count);
- if (!--qd->qd_slot_count) {
+ gfs2_assert(sdp, qd->qd_slot_ref);
+ if (!--qd->qd_slot_ref) {
BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap));
qd->qd_slot = -1;
}
@@ -364,20 +391,21 @@ static void slot_put(struct gfs2_quota_data *qd)
static int bh_get(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct inode *inode = sdp->sd_qc_inode;
struct gfs2_inode *ip = GFS2_I(inode);
unsigned int block, offset;
- struct buffer_head *bh;
+ struct buffer_head *bh = NULL;
struct iomap iomap = { };
int error;
- mutex_lock(&sdp->sd_quota_mutex);
-
- if (qd->qd_bh_count++) {
- mutex_unlock(&sdp->sd_quota_mutex);
+ spin_lock(&qd->qd_lockref.lock);
+ if (qd->qd_bh_count) {
+ qd->qd_bh_count++;
+ spin_unlock(&qd->qd_lockref.lock);
return 0;
}
+ spin_unlock(&qd->qd_lockref.lock);
block = qd->qd_slot / sdp->sd_qc_per_block;
offset = qd->qd_slot % sdp->sd_qc_per_block;
@@ -386,115 +414,101 @@ static int bh_get(struct gfs2_quota_data *qd)
(loff_t)block << inode->i_blkbits,
i_blocksize(inode), &iomap);
if (error)
- goto fail;
+ return error;
error = -ENOENT;
if (iomap.type != IOMAP_MAPPED)
- goto fail;
+ return error;
error = gfs2_meta_read(ip->i_gl, iomap.addr >> inode->i_blkbits,
DIO_WAIT, 0, &bh);
if (error)
- goto fail;
+ return error;
error = -EIO;
if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
- goto fail_brelse;
-
- qd->qd_bh = bh;
- qd->qd_bh_qc = (struct gfs2_quota_change *)
- (bh->b_data + sizeof(struct gfs2_meta_header) +
- offset * sizeof(struct gfs2_quota_change));
-
- mutex_unlock(&sdp->sd_quota_mutex);
+ goto out;
- return 0;
+ spin_lock(&qd->qd_lockref.lock);
+ if (qd->qd_bh == NULL) {
+ qd->qd_bh = bh;
+ qd->qd_bh_qc = (struct gfs2_quota_change *)
+ (bh->b_data + sizeof(struct gfs2_meta_header) +
+ offset * sizeof(struct gfs2_quota_change));
+ bh = NULL;
+ }
+ qd->qd_bh_count++;
+ spin_unlock(&qd->qd_lockref.lock);
+ error = 0;
-fail_brelse:
+out:
brelse(bh);
-fail:
- qd->qd_bh_count--;
- mutex_unlock(&sdp->sd_quota_mutex);
return error;
}
static void bh_put(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
+ struct buffer_head *bh = NULL;
- mutex_lock(&sdp->sd_quota_mutex);
+ spin_lock(&qd->qd_lockref.lock);
gfs2_assert(sdp, qd->qd_bh_count);
if (!--qd->qd_bh_count) {
- brelse(qd->qd_bh);
+ bh = qd->qd_bh;
qd->qd_bh = NULL;
qd->qd_bh_qc = NULL;
}
- mutex_unlock(&sdp->sd_quota_mutex);
+ spin_unlock(&qd->qd_lockref.lock);
+ brelse(bh);
}
-static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
- u64 *sync_gen)
+static bool qd_grab_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
+ u64 sync_gen)
{
+ bool ret = false;
+
+ spin_lock(&qd->qd_lockref.lock);
if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
!test_bit(QDF_CHANGE, &qd->qd_flags) ||
- (sync_gen && (qd->qd_sync_gen >= *sync_gen)))
- return 0;
+ qd->qd_sync_gen >= sync_gen)
+ goto out;
- if (!lockref_get_not_dead(&qd->qd_lockref))
- return 0;
+ if (__lockref_is_dead(&qd->qd_lockref))
+ goto out;
+ qd->qd_lockref.count++;
list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
set_bit(QDF_LOCKED, &qd->qd_flags);
qd->qd_change_sync = qd->qd_change;
slot_hold(qd);
- return 1;
+ ret = true;
+
+out:
+ spin_unlock(&qd->qd_lockref.lock);
+ return ret;
}
-static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
+static void qd_ungrab_sync(struct gfs2_quota_data *qd)
{
- struct gfs2_quota_data *qd = NULL, *iter;
- int error;
-
- *qdp = NULL;
-
- if (sb_rdonly(sdp->sd_vfs))
- return 0;
-
- spin_lock(&qd_lock);
-
- list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) {
- if (qd_check_sync(sdp, iter, &sdp->sd_quota_sync_gen)) {
- qd = iter;
- break;
- }
- }
-
- spin_unlock(&qd_lock);
-
- if (qd) {
- gfs2_assert_warn(sdp, qd->qd_change_sync);
- error = bh_get(qd);
- if (error) {
- clear_bit(QDF_LOCKED, &qd->qd_flags);
- slot_put(qd);
- qd_put(qd);
- return error;
- }
- }
-
- *qdp = qd;
-
- return 0;
+ clear_bit(QDF_LOCKED, &qd->qd_flags);
+ slot_put(qd);
+ qd_put(qd);
}
-static void qd_unlock(struct gfs2_quota_data *qd)
+static void qdsb_put(struct gfs2_quota_data *qd)
{
- gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd,
- test_bit(QDF_LOCKED, &qd->qd_flags));
- clear_bit(QDF_LOCKED, &qd->qd_flags);
bh_put(qd);
slot_put(qd);
qd_put(qd);
}
+static void qd_unlock(struct gfs2_quota_data *qd)
+{
+ spin_lock(&qd->qd_lockref.lock);
+ gfs2_assert_warn(qd->qd_sbd, test_bit(QDF_LOCKED, &qd->qd_flags));
+ clear_bit(QDF_LOCKED, &qd->qd_flags);
+ spin_unlock(&qd->qd_lockref.lock);
+ qdsb_put(qd);
+}
+
static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
struct gfs2_quota_data **qdp)
{
@@ -521,13 +535,6 @@ fail:
return error;
}
-static void qdsb_put(struct gfs2_quota_data *qd)
-{
- bh_put(qd);
- slot_put(qd);
- qd_put(qd);
-}
-
/**
* gfs2_qa_get - make sure we have a quota allocations data structure,
* if necessary
@@ -591,6 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) ||
gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) {
error = -EIO;
+ gfs2_qa_put(ip);
goto out;
}
@@ -663,72 +671,81 @@ static int sort_qd(const void *a, const void *b)
static void do_qc(struct gfs2_quota_data *qd, s64 change)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
struct gfs2_quota_change *qc = qd->qd_bh_qc;
+ bool needs_put = false;
s64 x;
- mutex_lock(&sdp->sd_quota_mutex);
gfs2_trans_add_meta(ip->i_gl, qd->qd_bh);
- if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
- qc->qc_change = 0;
+ /*
+ * The QDF_CHANGE flag indicates that the slot in the quota change file
+ * is used. Here, we use the value of qc->qc_change when the slot is
+ * used, and we assume a value of 0 otherwise.
+ */
+
+ spin_lock(&qd->qd_lockref.lock);
+
+ x = 0;
+ if (test_bit(QDF_CHANGE, &qd->qd_flags))
+ x = be64_to_cpu(qc->qc_change);
+ x += change;
+ qd->qd_change += change;
+
+ if (!x && test_bit(QDF_CHANGE, &qd->qd_flags)) {
+ /* The slot in the quota change file becomes unused. */
+ clear_bit(QDF_CHANGE, &qd->qd_flags);
+ qc->qc_flags = 0;
+ qc->qc_id = 0;
+ needs_put = true;
+ } else if (x && !test_bit(QDF_CHANGE, &qd->qd_flags)) {
+ /* The slot in the quota change file becomes used. */
+ set_bit(QDF_CHANGE, &qd->qd_flags);
+ __qd_hold(qd);
+ slot_hold(qd);
+
qc->qc_flags = 0;
if (qd->qd_id.type == USRQUOTA)
qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id));
}
-
- x = be64_to_cpu(qc->qc_change) + change;
qc->qc_change = cpu_to_be64(x);
- spin_lock(&qd_lock);
- qd->qd_change = x;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd->qd_lockref.lock);
- if (!x) {
- gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
- clear_bit(QDF_CHANGE, &qd->qd_flags);
- qc->qc_flags = 0;
- qc->qc_id = 0;
+ if (needs_put) {
slot_put(qd);
qd_put(qd);
- } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
- qd_hold(qd);
- slot_hold(qd);
}
-
if (change < 0) /* Reset quiet flag if we freed some blocks */
clear_bit(QDF_QMSG_QUIET, &qd->qd_flags);
- mutex_unlock(&sdp->sd_quota_mutex);
}
-static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
+static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
unsigned off, void *buf, unsigned bytes)
{
+ struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
- void *kaddr;
u64 blk;
unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0;
unsigned to_write = bytes, pg_off = off;
- int done = 0;
blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
boff = off % bsize;
- page = find_or_create_page(mapping, index, GFP_NOFS);
- if (!page)
- return -ENOMEM;
- if (!page_has_buffers(page))
- create_empty_buffers(page, bsize, 0);
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ bh = folio_buffers(folio);
+ if (!bh)
+ bh = create_empty_buffers(folio, bsize, 0);
- bh = page_buffers(page);
- while (!done) {
- /* Find the beginning block within the page */
+ for (;;) {
+ /* Find the beginning block within the folio */
if (pg_off >= ((bnum * bsize) + bsize)) {
bh = bh->b_this_page;
bnum++;
@@ -741,16 +758,14 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
goto unlock_out;
/* If it's a newly allocated disk block, zero it */
if (buffer_new(bh))
- zero_user(page, bnum * bsize, bh->b_size);
+ folio_zero_range(folio, bnum * bsize,
+ bh->b_size);
}
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
goto unlock_out;
- if (gfs2_is_jdata(ip))
- gfs2_trans_add_data(ip->i_gl, bh);
- else
- gfs2_ordered_add_inode(ip);
+ gfs2_trans_add_data(ip->i_gl, bh);
/* If we need to write to the next block as well */
if (to_write > (bsize - boff)) {
@@ -759,31 +774,29 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
boff = pg_off % bsize;
continue;
}
- done = 1;
+ break;
}
- /* Write to the page, now that we have setup the buffer(s) */
- kaddr = kmap_atomic(page);
- memcpy(kaddr + off, buf, bytes);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
- unlock_page(page);
- put_page(page);
+ /* Write to the folio, now that we have setup the buffer(s) */
+ memcpy_to_folio(folio, off, buf, bytes);
+ flush_dcache_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return 0;
unlock_out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return -EIO;
}
-static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
+static int gfs2_write_disk_quota(struct gfs2_sbd *sdp, struct gfs2_quota *qp,
loff_t loc)
{
unsigned long pg_beg;
unsigned pg_off, nbytes, overflow = 0;
- int pg_oflow = 0, error;
+ int error;
void *ptr;
nbytes = sizeof(struct gfs2_quota);
@@ -792,17 +805,15 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
pg_off = offset_in_page(loc);
/* If the quota straddles a page boundary, split the write in two */
- if ((pg_off + nbytes) > PAGE_SIZE) {
- pg_oflow = 1;
+ if ((pg_off + nbytes) > PAGE_SIZE)
overflow = (pg_off + nbytes) - PAGE_SIZE;
- }
ptr = qp;
- error = gfs2_write_buf_to_page(ip, pg_beg, pg_off, ptr,
+ error = gfs2_write_buf_to_page(sdp, pg_beg, pg_off, ptr,
nbytes - overflow);
/* If there's an overflow, write the remaining bytes to the next page */
- if (!error && pg_oflow)
- error = gfs2_write_buf_to_page(ip, pg_beg + 1, 0,
+ if (!error && overflow)
+ error = gfs2_write_buf_to_page(sdp, pg_beg + 1, 0,
ptr + nbytes - overflow,
overflow);
return error;
@@ -810,7 +821,7 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
/**
* gfs2_adjust_quota - adjust record of current block usage
- * @ip: The quota inode
+ * @sdp: The superblock
* @loc: Offset of the entry in the quota file
* @change: The amount of usage change to record
* @qd: The quota data
@@ -822,12 +833,12 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
* Returns: 0 or -ve on error
*/
-static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc,
s64 change, struct gfs2_quota_data *qd,
struct qc_dqblk *fdq)
{
+ struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_quota q;
int err;
u64 size;
@@ -844,10 +855,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
return err;
loc -= sizeof(q); /* gfs2_internal_read would've advanced the loc ptr */
- err = -EIO;
be64_add_cpu(&q.qu_value, change);
if (((s64)be64_to_cpu(q.qu_value)) < 0)
q.qu_value = 0; /* Never go negative on quota usage */
+ spin_lock(&qd->qd_lockref.lock);
qd->qd_qb.qb_value = q.qu_value;
if (fdq) {
if (fdq->d_fieldmask & QC_SPC_SOFT) {
@@ -863,13 +874,14 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
qd->qd_qb.qb_value = q.qu_value;
}
}
+ spin_unlock(&qd->qd_lockref.lock);
- err = gfs2_write_disk_quota(ip, &q, loc);
+ err = gfs2_write_disk_quota(sdp, &q, loc);
if (!err) {
size = loc + sizeof(struct gfs2_quota);
if (size > inode->i_size)
i_size_write(inode, size);
- inode->i_mtime = inode->i_atime = current_time(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
mark_inode_dirty(inode);
set_bit(QDF_REFRESH, &qd->qd_flags);
}
@@ -877,11 +889,12 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
return err;
}
-static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
+static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda,
+ u64 sync_gen)
{
- struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = (*qda)->qd_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
- struct gfs2_alloc_parms ap = { .aflags = 0, };
+ struct gfs2_alloc_parms ap = {};
unsigned int data_blocks, ind_blocks;
struct gfs2_holder *ghs, i_gh;
unsigned int qx, x;
@@ -891,18 +904,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
unsigned int nalloc = 0, blocks;
int error;
- error = gfs2_qa_get(ip);
- if (error)
- return error;
-
gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
&data_blocks, &ind_blocks);
ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
- if (!ghs) {
- error = -ENOMEM;
- goto out;
- }
+ if (!ghs)
+ return -ENOMEM;
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
inode_lock(&ip->i_inode);
@@ -951,7 +958,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
for (x = 0; x < num_qd; x++) {
qd = qda[x];
offset = qd2offset(qd);
- error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
+ error = gfs2_adjust_quota(sdp, offset, qd->qd_change_sync, qd,
+ NULL);
if (error)
goto out_end_trans;
@@ -959,8 +967,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
set_bit(QDF_REFRESH, &qd->qd_flags);
}
- error = 0;
-
out_end_trans:
gfs2_trans_end(sdp);
out_ipres:
@@ -974,8 +980,15 @@ out_dq:
kfree(ghs);
gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
-out:
- gfs2_qa_put(ip);
+ if (!error) {
+ for (x = 0; x < num_qd; x++) {
+ qd = qda[x];
+ spin_lock(&qd->qd_lockref.lock);
+ if (qd->qd_sync_gen < sync_gen)
+ qd->qd_sync_gen = sync_gen;
+ spin_unlock(&qd->qd_lockref.lock);
+ }
+ }
return error;
}
@@ -999,7 +1012,9 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
qlvb->qb_limit = q.qu_limit;
qlvb->qb_warn = q.qu_warn;
qlvb->qb_value = q.qu_value;
+ spin_lock(&qd->qd_lockref.lock);
qd->qd_qb = *qlvb;
+ spin_unlock(&qd->qd_lockref.lock);
return 0;
}
@@ -1007,11 +1022,12 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
struct gfs2_holder *q_gh)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct gfs2_holder i_gh;
int error;
+ gfs2_assert_warn(sdp, sdp == qd->qd_gl->gl_name.ln_sbd);
restart:
error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
if (error)
@@ -1020,7 +1036,9 @@ restart:
if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
force_refresh = FORCE;
+ spin_lock(&qd->qd_lockref.lock);
qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
+ spin_unlock(&qd->qd_lockref.lock);
if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
gfs2_glock_dq_uninit(q_gh);
@@ -1057,9 +1075,9 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
u32 x;
- int error = 0;
+ int error;
- if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
return 0;
error = gfs2_quota_hold(ip, uid, gid);
@@ -1087,56 +1105,56 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
return error;
}
-static int need_sync(struct gfs2_quota_data *qd)
+static bool need_sync(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct gfs2_tune *gt = &sdp->sd_tune;
- s64 value;
+ s64 value, change, limit;
unsigned int num, den;
- int do_sync = 1;
+ int ret = false;
+ spin_lock(&qd->qd_lockref.lock);
if (!qd->qd_qb.qb_limit)
- return 0;
+ goto out;
- spin_lock(&qd_lock);
- value = qd->qd_change;
- spin_unlock(&qd_lock);
+ change = qd->qd_change;
+ if (change <= 0)
+ goto out;
+ value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
+ limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
+ if (value >= limit)
+ goto out;
spin_lock(&gt->gt_spin);
num = gt->gt_quota_scale_num;
den = gt->gt_quota_scale_den;
spin_unlock(&gt->gt_spin);
- if (value < 0)
- do_sync = 0;
- else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
- (s64)be64_to_cpu(qd->qd_qb.qb_limit))
- do_sync = 0;
- else {
- value *= gfs2_jindex_size(sdp) * num;
- value = div_s64(value, den);
- value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
- if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
- do_sync = 0;
- }
+ change *= gfs2_jindex_size(sdp) * num;
+ change = div_s64(change, den);
+ if (value + change < limit)
+ goto out;
- return do_sync;
+ ret = true;
+out:
+ spin_unlock(&qd->qd_lockref.lock);
+ return ret;
}
void gfs2_quota_unlock(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_quota_data *qda[4];
+ struct gfs2_quota_data *qda[2 * GFS2_MAXQUOTAS];
unsigned int count = 0;
u32 x;
- int found;
if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
return;
for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
struct gfs2_quota_data *qd;
- int sync;
+ bool sync;
+ int error;
qd = ip->i_qadata->qa_qd[x];
sync = need_sync(qd);
@@ -1146,17 +1164,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
continue;
spin_lock(&qd_lock);
- found = qd_check_sync(sdp, qd, NULL);
+ sync = qd_grab_sync(sdp, qd, U64_MAX);
spin_unlock(&qd_lock);
- if (!found)
+ if (!sync)
continue;
gfs2_assert_warn(sdp, qd->qd_change_sync);
- if (bh_get(qd)) {
- clear_bit(QDF_LOCKED, &qd->qd_flags);
- slot_put(qd);
- qd_put(qd);
+ error = bh_get(qd);
+ if (error) {
+ qd_ungrab_sync(qd);
continue;
}
@@ -1164,7 +1181,9 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
}
if (count) {
- do_sync(count, qda);
+ u64 sync_gen = READ_ONCE(sdp->sd_quota_sync_gen);
+
+ do_sync(count, qda, sync_gen);
for (x = 0; x < count; x++)
qd_unlock(qda[x]);
}
@@ -1174,16 +1193,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
#define MAX_LINE 256
-static int print_message(struct gfs2_quota_data *qd, char *type)
+static void print_message(struct gfs2_quota_data *qd, char *type)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
-
- fs_info(sdp, "quota %s for %s %u\n",
- type,
- (qd->qd_id.type == USRQUOTA) ? "user" : "group",
- from_kqid(&init_user_ns, qd->qd_id));
+ struct gfs2_sbd *sdp = qd->qd_sbd;
- return 0;
+ if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) {
+ fs_info(sdp, "quota %s for %s %u\n",
+ type,
+ (qd->qd_id.type == USRQUOTA) ? "user" : "group",
+ from_kqid(&init_user_ns, qd->qd_id));
+ }
}
/**
@@ -1223,12 +1242,12 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
qid_eq(qd->qd_id, make_kqid_gid(gid))))
continue;
+ spin_lock(&qd->qd_lockref.lock);
warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
- spin_lock(&qd_lock);
value += qd->qd_change;
- spin_unlock(&qd_lock);
+ spin_unlock(&qd->qd_lockref.lock);
if (limit > 0 && (limit - value) < ap->allowed)
ap->allowed = limit - value;
@@ -1253,7 +1272,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
* HZ)) {
quota_send_warning(qd->qd_id,
sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
- error = print_message(qd, "warning");
+ print_message(qd, "warning");
+ error = 0;
qd->qd_last_warn = jiffies;
}
}
@@ -1267,7 +1287,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
u32 x;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON ||
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ||
gfs2_assert_warn(sdp, change))
return;
if (ip->i_diskflags & GFS2_DIF_SYSTEM)
@@ -1291,40 +1311,55 @@ int gfs2_quota_sync(struct super_block *sb, int type)
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_quota_data **qda;
unsigned int max_qd = PAGE_SIZE / sizeof(struct gfs2_holder);
- unsigned int num_qd;
- unsigned int x;
+ u64 sync_gen;
int error = 0;
+ if (sb_rdonly(sdp->sd_vfs))
+ return 0;
+
qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
if (!qda)
return -ENOMEM;
mutex_lock(&sdp->sd_quota_sync_mutex);
- sdp->sd_quota_sync_gen++;
+ sync_gen = sdp->sd_quota_sync_gen + 1;
do {
- num_qd = 0;
+ struct gfs2_quota_data *iter;
+ unsigned int num_qd = 0;
+ unsigned int x;
- for (;;) {
- error = qd_fish(sdp, qda + num_qd);
- if (error || !qda[num_qd])
- break;
- if (++num_qd == max_qd)
- break;
+ spin_lock(&qd_lock);
+ list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) {
+ if (qd_grab_sync(sdp, iter, sync_gen)) {
+ qda[num_qd++] = iter;
+ if (num_qd == max_qd)
+ break;
+ }
}
+ spin_unlock(&qd_lock);
- if (num_qd) {
- if (!error)
- error = do_sync(num_qd, qda);
+ if (!num_qd)
+ break;
+
+ for (x = 0; x < num_qd; x++) {
+ error = bh_get(qda[x]);
if (!error)
- for (x = 0; x < num_qd; x++)
- qda[x]->qd_sync_gen =
- sdp->sd_quota_sync_gen;
+ continue;
+
+ while (x < num_qd)
+ qd_ungrab_sync(qda[--num_qd]);
+ break;
+ }
- for (x = 0; x < num_qd; x++)
- qd_unlock(qda[x]);
+ if (!error) {
+ WRITE_ONCE(sdp->sd_quota_sync_gen, sync_gen);
+ error = do_sync(num_qd, qda, sync_gen);
}
- } while (!error && num_qd == max_qd);
+
+ for (x = 0; x < num_qd; x++)
+ qd_unlock(qda[x]);
+ } while (!error);
mutex_unlock(&sdp->sd_quota_sync_mutex);
kfree(qda);
@@ -1359,6 +1394,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
unsigned int found = 0;
unsigned int hash;
unsigned int bm_size;
+ struct buffer_head *bh;
u64 dblock;
u32 extlen = 0;
int error;
@@ -1378,8 +1414,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
return error;
for (x = 0; x < blocks; x++) {
- struct buffer_head *bh;
- const struct gfs2_quota_change *qc;
+ struct gfs2_quota_change *qc;
unsigned int y;
if (!extlen) {
@@ -1392,15 +1427,13 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
if (!bh)
goto fail;
- if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
- brelse(bh);
- goto fail;
- }
+ if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
+ goto fail_brelse;
- qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header));
+ qc = (struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header));
for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
y++, slot++) {
- struct gfs2_quota_data *qd;
+ struct gfs2_quota_data *old_qd, *qd;
s64 qc_change = be64_to_cpu(qc->qc_change);
u32 qc_flags = be32_to_cpu(qc->qc_flags);
enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ?
@@ -1413,29 +1446,51 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
hash = gfs2_qd_hash(sdp, qc_id);
qd = qd_alloc(hash, sdp, qc_id);
- if (qd == NULL) {
- brelse(bh);
- goto fail;
- }
+ if (qd == NULL)
+ goto fail_brelse;
+ qd->qd_lockref.count = 0;
set_bit(QDF_CHANGE, &qd->qd_flags);
qd->qd_change = qc_change;
qd->qd_slot = slot;
- qd->qd_slot_count = 1;
+ qd->qd_slot_ref = 1;
spin_lock(&qd_lock);
+ spin_lock_bucket(hash);
+ old_qd = gfs2_qd_search_bucket(hash, sdp, qc_id);
+ if (old_qd) {
+ fs_err(sdp, "Corruption found in quota_change%u"
+ "file: duplicate identifier in "
+ "slot %u\n",
+ sdp->sd_jdesc->jd_jid, slot);
+
+ spin_unlock_bucket(hash);
+ spin_unlock(&qd_lock);
+ qd_put(old_qd);
+
+ gfs2_glock_put(qd->qd_gl);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
+
+ /* zero out the duplicate slot */
+ lock_buffer(bh);
+ memset(qc, 0, sizeof(*qc));
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+
+ continue;
+ }
BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap));
list_add(&qd->qd_list, &sdp->sd_quota_list);
atomic_inc(&sdp->sd_quota_count);
- spin_unlock(&qd_lock);
-
- spin_lock_bucket(hash);
hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]);
spin_unlock_bucket(hash);
+ spin_unlock(&qd_lock);
found++;
}
+ if (buffer_dirty(bh))
+ sync_dirty_buffer(bh);
brelse(bh);
dblock++;
extlen--;
@@ -1446,6 +1501,10 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
return 0;
+fail_brelse:
+ if (buffer_dirty(bh))
+ sync_dirty_buffer(bh);
+ brelse(bh);
fail:
gfs2_quota_cleanup(sdp);
return error;
@@ -1453,36 +1512,36 @@ fail:
void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
{
- struct list_head *head = &sdp->sd_quota_list;
struct gfs2_quota_data *qd;
+ LIST_HEAD(dispose);
+ int count;
- spin_lock(&qd_lock);
- while (!list_empty(head)) {
- qd = list_last_entry(head, struct gfs2_quota_data, qd_list);
-
- list_del(&qd->qd_list);
-
- /* Also remove if this qd exists in the reclaim list */
- list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
- atomic_dec(&sdp->sd_quota_count);
- spin_unlock(&qd_lock);
-
- spin_lock_bucket(qd->qd_hash);
- hlist_bl_del_rcu(&qd->qd_hlist);
- spin_unlock_bucket(qd->qd_hash);
-
- gfs2_assert_warn(sdp, !qd->qd_change);
- gfs2_assert_warn(sdp, !qd->qd_slot_count);
- gfs2_assert_warn(sdp, !qd->qd_bh_count);
+ BUG_ON(!test_bit(SDF_NORECOVERY, &sdp->sd_flags) &&
+ test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
- gfs2_glock_put(qd->qd_gl);
- call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+ spin_lock(&qd_lock);
+ list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+ spin_lock(&qd->qd_lockref.lock);
+ if (qd->qd_lockref.count != 0) {
+ spin_unlock(&qd->qd_lockref.lock);
+ continue;
+ }
+ lockref_mark_dead(&qd->qd_lockref);
+ spin_unlock(&qd->qd_lockref.lock);
- spin_lock(&qd_lock);
+ list_lru_del_obj(&gfs2_qd_lru, &qd->qd_lru);
+ list_add(&qd->qd_lru, &dispose);
}
spin_unlock(&qd_lock);
- gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
+ gfs2_qd_list_dispose(&dispose);
+
+ wait_event_timeout(sdp->sd_kill_wait,
+ (count = atomic_read(&sdp->sd_quota_count)) == 0,
+ HZ * 60);
+
+ if (count != 0)
+ fs_err(sdp, "%d left-over quota data objects\n", count);
kvfree(sdp->sd_quota_bitmap);
sdp->sd_quota_bitmap = NULL;
@@ -1499,20 +1558,6 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
}
}
-static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
- int (*fxn)(struct super_block *sb, int type),
- unsigned long t, unsigned long *timeo,
- unsigned int *new_timeo)
-{
- if (t >= *timeo) {
- int error = fxn(sdp->sd_vfs, 0);
- quotad_error(sdp, msg, error);
- *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ;
- } else {
- *timeo -= t;
- }
-}
-
void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
if (!sdp->sd_statfs_force_sync) {
sdp->sd_statfs_force_sync = 1;
@@ -1530,42 +1575,51 @@ void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
int gfs2_quotad(void *data)
{
struct gfs2_sbd *sdp = data;
- struct gfs2_tune *tune = &sdp->sd_tune;
- unsigned long statfs_timeo = 0;
- unsigned long quotad_timeo = 0;
- unsigned long t = 0;
- DEFINE_WAIT(wait);
+ unsigned long now = jiffies;
+ unsigned long statfs_deadline = now;
+ unsigned long quotad_deadline = now;
+ set_freezable();
while (!kthread_should_stop()) {
+ unsigned long t;
if (gfs2_withdrawn(sdp))
- goto bypass;
- /* Update the master statfs file */
- if (sdp->sd_statfs_force_sync) {
- int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
+ break;
+
+ now = jiffies;
+ if (sdp->sd_statfs_force_sync ||
+ time_after(now, statfs_deadline)) {
+ unsigned int quantum;
+ int error;
+
+ /* Update the master statfs file */
+ error = gfs2_statfs_sync(sdp->sd_vfs, 0);
quotad_error(sdp, "statfs", error);
- statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
+
+ quantum = gfs2_tune_get(sdp, gt_statfs_quantum);
+ statfs_deadline = now + quantum * HZ;
}
- else
- quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t,
- &statfs_timeo,
- &tune->gt_statfs_quantum);
+ if (time_after(now, quotad_deadline)) {
+ unsigned int quantum;
+ int error;
- /* Update quota file */
- quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
- &quotad_timeo, &tune->gt_quota_quantum);
+ /* Update the quota file */
+ error = gfs2_quota_sync(sdp->sd_vfs, 0);
+ quotad_error(sdp, "sync", error);
- try_to_freeze();
+ quantum = gfs2_tune_get(sdp, gt_quota_quantum);
+ quotad_deadline = now + quantum * HZ;
+ }
-bypass:
- t = min(quotad_timeo, statfs_timeo);
+ t = min(statfs_deadline - now, quotad_deadline - now);
+ wait_event_freezable_timeout(sdp->sd_quota_wait,
+ sdp->sd_statfs_force_sync ||
+ gfs2_withdrawn(sdp) ||
+ kthread_should_stop(),
+ t);
- prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
- if (!sdp->sd_statfs_force_sync)
- t -= schedule_timeout(t);
- else
+ if (sdp->sd_statfs_force_sync)
t = 0;
- finish_wait(&sdp->sd_quota_wait, &wait);
}
return 0;
@@ -1578,6 +1632,8 @@ static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
memset(state, 0, sizeof(*state));
switch (sdp->sd_args.ar_quota) {
+ case GFS2_QUOTA_QUIET:
+ fallthrough;
case GFS2_QUOTA_ON:
state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
@@ -1706,7 +1762,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
if (gfs2_is_stuffed(ip))
alloc_required = 1;
if (alloc_required) {
- struct gfs2_alloc_parms ap = { .aflags = 0, };
+ struct gfs2_alloc_parms ap = {};
gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
&data_blocks, &ind_blocks);
blocks = 1 + data_blocks + ind_blocks;
@@ -1724,7 +1780,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
goto out_release;
/* Apply changes */
- error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
+ error = gfs2_adjust_quota(sdp, offset, 0, qd, fdq);
if (!error)
clear_bit(QDF_QMSG_QUIET, &qd->qd_flags);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 21ada332d555..988f38dc5b2c 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -15,27 +15,27 @@ struct gfs2_sbd;
#define NO_UID_QUOTA_CHANGE INVALID_UID
#define NO_GID_QUOTA_CHANGE INVALID_GID
-extern int gfs2_qa_get(struct gfs2_inode *ip);
-extern void gfs2_qa_put(struct gfs2_inode *ip);
-extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
-extern void gfs2_quota_unhold(struct gfs2_inode *ip);
+int gfs2_qa_get(struct gfs2_inode *ip);
+void gfs2_qa_put(struct gfs2_inode *ip);
+int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+void gfs2_quota_unhold(struct gfs2_inode *ip);
-extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
-extern void gfs2_quota_unlock(struct gfs2_inode *ip);
+int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+void gfs2_quota_unlock(struct gfs2_inode *ip);
-extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
- struct gfs2_alloc_parms *ap);
-extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
- kuid_t uid, kgid_t gid);
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap);
+void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
+ kuid_t uid, kgid_t gid);
-extern int gfs2_quota_sync(struct super_block *sb, int type);
-extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
+int gfs2_quota_sync(struct super_block *sb, int type);
+int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
-extern int gfs2_quota_init(struct gfs2_sbd *sdp);
-extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
-extern int gfs2_quotad(void *data);
+int gfs2_quota_init(struct gfs2_sbd *sdp);
+void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
+int gfs2_quotad(void *data);
-extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
+void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
struct gfs2_alloc_parms *ap)
@@ -44,13 +44,13 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
int ret;
ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
- if (capable(CAP_SYS_RESOURCE) ||
- sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ||
+ capable(CAP_SYS_RESOURCE))
return 0;
ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (ret)
return ret;
- if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+ if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT)
return 0;
ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
if (ret)
@@ -59,8 +59,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
}
extern const struct quotactl_ops gfs2_quotactl_ops;
-extern struct shrinker gfs2_qd_shrinker;
+int __init gfs2_qd_shrinker_init(void);
+void gfs2_qd_shrinker_exit(void);
extern struct list_lru gfs2_qd_lru;
-extern void __init gfs2_quota_hash_init(void);
+
+void __init gfs2_quota_hash_init(void);
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2bb085a72e8e..8c8202c68b64 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -27,7 +27,7 @@
#include "util.h"
#include "dir.h"
-struct workqueue_struct *gfs_recovery_wq;
+struct workqueue_struct *gfs2_recovery_wq;
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh)
@@ -118,6 +118,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
unsigned int blkno, struct gfs2_log_header_host *head)
{
+ const u32 zero = 0;
u32 hash, crc;
if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
@@ -126,7 +127,7 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
return 1;
hash = crc32(~0, lh, LH_V1_SIZE - 4);
- hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
+ hash = ~crc32(hash, &zero, 4); /* assume lh_hash is zero */
if (be32_to_cpu(lh->lh_hash) != hash)
return 1;
@@ -263,16 +264,12 @@ static void clean_journal(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- u32 lblock = head->lh_blkno;
- gfs2_replay_incr_blk(jd, &lblock);
- gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
+ gfs2_replay_incr_blk(jd, &head->lh_blkno);
+ head->lh_sequence++;
+ gfs2_write_log_header(sdp, jd, head->lh_sequence, 0, head->lh_blkno,
GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
- if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
- sdp->sd_log_flush_head = lblock;
- gfs2_log_incr_head(sdp);
- }
}
@@ -404,7 +401,7 @@ void gfs2_recover_func(struct work_struct *work)
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host head;
- struct gfs2_holder j_gh, ji_gh, thaw_gh;
+ struct gfs2_holder j_gh, ji_gh;
ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
int ro = 0;
unsigned int pass;
@@ -420,14 +417,15 @@ void gfs2_recover_func(struct work_struct *work)
if (sdp->sd_args.ar_spectator)
goto fail;
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
- fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
+ fs_info(sdp, "jid=%u: Trying to acquire journal glock...\n",
jd->jd_jid);
jlocked = 1;
- /* Acquire the journal lock so we can do recovery */
+ /* Acquire the journal glock so we can do recovery */
error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
+ LM_FLAG_RECOVER | LM_FLAG_TRY |
+ GL_NOCACHE,
&j_gh);
switch (error) {
case 0:
@@ -443,7 +441,8 @@ void gfs2_recover_func(struct work_struct *work)
}
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
+ LM_FLAG_RECOVER | GL_NOCACHE,
+ &ji_gh);
if (error)
goto fail_gunlock_j;
} else {
@@ -457,7 +456,7 @@ void gfs2_recover_func(struct work_struct *work)
if (error)
goto fail_gunlock_ji;
- error = gfs2_find_jhead(jd, &head, true);
+ error = gfs2_find_jhead(jd, &head);
if (error)
goto fail_gunlock_ji;
t_jhd = ktime_get();
@@ -465,14 +464,14 @@ void gfs2_recover_func(struct work_struct *work)
ktime_ms_delta(t_jhd, t_jlck));
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
- fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
- jd->jd_jid);
+ mutex_lock(&sdp->sd_freeze_mutex);
- /* Acquire a shared hold on the freeze lock */
-
- error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
- if (error)
+ if (test_bit(SDF_FROZEN, &sdp->sd_flags)) {
+ mutex_unlock(&sdp->sd_freeze_mutex);
+ fs_warn(sdp, "jid=%u: Can't replay: filesystem "
+ "is frozen\n", jd->jd_jid);
goto fail_gunlock_ji;
+ }
if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
ro = 1;
@@ -496,7 +495,7 @@ void gfs2_recover_func(struct work_struct *work)
fs_warn(sdp, "jid=%u: Can't replay: read-only block "
"device\n", jd->jd_jid);
error = -EROFS;
- goto fail_gunlock_thaw;
+ goto fail_gunlock_nofreeze;
}
t_tlck = ktime_get();
@@ -514,7 +513,7 @@ void gfs2_recover_func(struct work_struct *work)
lops_after_scan(jd, error, pass);
if (error) {
up_read(&sdp->sd_log_flush_lock);
- goto fail_gunlock_thaw;
+ goto fail_gunlock_nofreeze;
}
}
@@ -522,7 +521,7 @@ void gfs2_recover_func(struct work_struct *work)
clean_journal(jd, &head);
up_read(&sdp->sd_log_flush_lock);
- gfs2_freeze_unlock(&thaw_gh);
+ mutex_unlock(&sdp->sd_freeze_mutex);
t_rep = ktime_get();
fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
"jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
@@ -533,6 +532,9 @@ void gfs2_recover_func(struct work_struct *work)
ktime_ms_delta(t_rep, t_tlck));
}
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
+ gfs2_log_pointers_init(sdp, &head);
+
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jlocked) {
@@ -543,8 +545,8 @@ void gfs2_recover_func(struct work_struct *work)
fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
goto done;
-fail_gunlock_thaw:
- gfs2_freeze_unlock(&thaw_gh);
+fail_gunlock_nofreeze:
+ mutex_unlock(&sdp->sd_freeze_mutex);
fail_gunlock_ji:
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
@@ -570,7 +572,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
return -EBUSY;
/* we have JDF_RECOVERY, queue should always succeed */
- rv = queue_work(gfs_recovery_wq, &jd->jd_work);
+ rv = queue_work(gfs2_recovery_wq, &jd->jd_work);
BUG_ON(!rv);
if (wait)
@@ -580,3 +582,13 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
return wait ? jd->jd_recover_error : 0;
}
+void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
+ struct gfs2_log_header_host *head)
+{
+ sdp->sd_log_sequence = head->lh_sequence + 1;
+ gfs2_replay_incr_blk(sdp->sd_jdesc, &head->lh_blkno);
+ sdp->sd_log_tail = head->lh_blkno;
+ sdp->sd_log_flush_head = head->lh_blkno;
+ sdp->sd_log_flush_tail = head->lh_blkno;
+ sdp->sd_log_head = head->lh_blkno;
+}
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 0d30f8e804f4..5a5ba72ecd75 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -9,7 +9,7 @@
#include "incore.h"
-extern struct workqueue_struct *gfs_recovery_wq;
+extern struct workqueue_struct *gfs2_recovery_wq;
static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk)
{
@@ -17,17 +17,19 @@ static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk)
*blk = 0;
}
-extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
+int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh);
-extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
-extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
-extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
+int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
+int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
+void gfs2_revoke_clean(struct gfs2_jdesc *jd);
-extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
-extern void gfs2_recover_func(struct work_struct *work);
-extern int __get_log_header(struct gfs2_sbd *sdp,
- const struct gfs2_log_header *lh, unsigned int blkno,
+int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
+void gfs2_recover_func(struct work_struct *work);
+int __get_log_header(struct gfs2_sbd *sdp,
+ const struct gfs2_log_header *lh, unsigned int blkno,
+ struct gfs2_log_header_host *head);
+void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
struct gfs2_log_header_host *head);
#endif /* __RECOVERY_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f602fb844951..b14e54b38ee8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -159,13 +159,13 @@ static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
}
/**
- * gfs2_bit_search
+ * gfs2_bit_search - search bitmap for a state
* @ptr: Pointer to bitmap data
* @mask: Mask to use (normally 0x55555.... but adjusted for search start)
* @state: The state we are searching for
*
- * We xor the bitmap data with a patter which is the bitwise opposite
- * of what we are looking for, this gives rise to a pattern of ones
+ * We xor the bitmap data with a pattern which is the bitwise opposite
+ * of what we are looking for. This gives rise to a pattern of ones
* wherever there is a match. Since we have two bits per entry, we
* take this pattern, shift it down by one place and then and it with
* the original. All the even bit positions (0,2,4, etc) then represent
@@ -814,11 +814,11 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
bi = rgd->rd_bits + (length - 1);
if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
gfs2_lm(sdp,
- "ri_addr = %llu\n"
- "ri_length = %u\n"
- "ri_data0 = %llu\n"
- "ri_data = %u\n"
- "ri_bitbytes = %u\n"
+ "ri_addr=%llu "
+ "ri_length=%u "
+ "ri_data0=%llu "
+ "ri_data=%u "
+ "ri_bitbytes=%u "
"start=%u len=%u offset=%u\n",
(unsigned long long)rgd->rd_addr,
rgd->rd_length,
@@ -1188,7 +1188,7 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd)
/**
* gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps
- * @gh: the glock holder representing the rgrpd to read in
+ * @gl: the glock representing the rgrpd to read in
*
* Read in all of a Resource Group's header and bitmap blocks.
* Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
@@ -1879,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
*/
ip = gl->gl_object;
- if (ip || !gfs2_queue_delete_work(gl, 0))
+ if (ip || !gfs2_queue_verify_delete(gl, false))
gfs2_glock_put(gl);
else
found++;
@@ -1967,7 +1967,7 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
}
/**
- * gfs2_rgrp_used_recently
+ * gfs2_rgrp_used_recently - test if an rgrp has been used recently
* @rs: The block reservation with the rgrp to test
* @msecs: The time limit in milliseconds
*
@@ -1987,10 +1987,8 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- u32 skip;
- get_random_bytes(&skip, sizeof(skip));
- return skip % sdp->sd_rgrps;
+ return get_random_u32() % sdp->sd_rgrps;
}
static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
@@ -2306,7 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
- if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
+ if (rgd->rd_sbd->sd_args.ar_rgrplvb && rgd->rd_rgl) {
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
@@ -2411,13 +2409,12 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
* @bn: Used to return the starting block number
* @nblocks: requested number of blocks/extent length (value/result)
* @dinode: 1 if we're allocating a dinode block, else 0
- * @generation: the generation number of the inode
*
* Returns: 0 or error
*/
int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
- bool dinode, u64 *generation)
+ bool dinode)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
@@ -2477,10 +2474,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
rbm.rgd->rd_free -= *nblocks;
spin_unlock(&rbm.rgd->rd_rsspin);
if (dinode) {
+ u64 generation;
+
rbm.rgd->rd_dinodes++;
- *generation = rbm.rgd->rd_igeneration++;
- if (*generation == 0)
- *generation = rbm.rgd->rd_igeneration++;
+ generation = rbm.rgd->rd_igeneration++;
+ if (generation == 0)
+ generation = rbm.rgd->rd_igeneration++;
+ ip->i_generation = generation;
}
gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
@@ -2584,8 +2584,8 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- rgrp_unlock_local(rgd);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
+ rgrp_unlock_local(rgd);
gfs2_statfs_change(sdp, 0, +1, -1);
trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 00b30cf893af..8d20e99385db 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -22,38 +22,38 @@ struct gfs2_rgrpd;
struct gfs2_sbd;
struct gfs2_holder;
-extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
+void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
-extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
-extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
-extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
+struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
+struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
+struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
-extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
-extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
-extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
-extern int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl);
-extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
+void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
+int gfs2_rindex_update(struct gfs2_sbd *sdp);
+void gfs2_free_clones(struct gfs2_rgrpd *rgd);
+int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl);
+void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
-extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
#define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
- struct gfs2_alloc_parms *ap);
-extern void gfs2_inplace_release(struct gfs2_inode *ip);
-
-extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
- bool dinode, u64 *generation);
-
-extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip);
-extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
- u64 bstart, u32 blen, int meta);
-extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
- u64 bstart, u32 blen);
-extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
-extern void gfs2_unlink_di(struct inode *inode);
-extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
- unsigned int type);
+int gfs2_inplace_reserve(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap);
+void gfs2_inplace_release(struct gfs2_inode *ip);
+
+int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
+ bool dinode);
+
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+void gfs2_rs_delete(struct gfs2_inode *ip);
+void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 bstart, u32 blen, int meta);
+void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 bstart, u32 blen);
+void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
+void gfs2_unlink_di(struct inode *inode);
+int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
+ unsigned int type);
struct gfs2_rgrp_list {
unsigned int rl_rgrps;
@@ -62,18 +62,19 @@ struct gfs2_rgrp_list {
struct gfs2_holder *rl_ghs;
};
-extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
- u64 block);
-extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist,
- unsigned int state, u16 flags);
-extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
-extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
- const char *fs_id_buf);
-extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
- struct buffer_head *bh,
- const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
-extern int gfs2_fitrim(struct file *filp, void __user *argp);
+void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
+ u64 block);
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist,
+ unsigned int state, u16 flags);
+void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
+ const char *fs_id_buf);
+int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+ struct buffer_head *bh,
+ const struct gfs2_bitmap *bi, unsigned minlen,
+ u64 *ptrimmed);
+int gfs2_fitrim(struct file *filp, void __user *argp);
/* This is how to tell if a reservation is in the rgrp tree: */
static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
@@ -88,9 +89,9 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
return first <= block && block < last;
}
-extern void check_and_update_goal(struct gfs2_inode *ip);
+void check_and_update_goal(struct gfs2_inode *ip);
-extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
-extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 999cc146d708..f6cd907b3ec6 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -44,10 +44,10 @@
#include "xattr.h"
#include "lops.h"
-enum dinode_demise {
- SHOULD_DELETE_DINODE,
- SHOULD_NOT_DELETE_DINODE,
- SHOULD_DEFER_EVICTION,
+enum evict_behavior {
+ EVICT_SHOULD_DELETE,
+ EVICT_SHOULD_SKIP_DELETE,
+ EVICT_SHOULD_DEFER_DELETE,
};
/**
@@ -67,9 +67,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
sdp->sd_journals = 0;
spin_unlock(&sdp->sd_jindex_spin);
+ down_write(&sdp->sd_log_flush_lock);
sdp->sd_jdesc = NULL;
+ up_write(&sdp->sd_log_flush_lock);
+
while (!list_empty(&list)) {
jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
+ BUG_ON(jd->jd_log_bio);
gfs2_free_journal_extents(jd);
list_del(&jd->jd_list);
iput(jd->jd_inode);
@@ -130,28 +134,22 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
- struct gfs2_log_header_host head;
int error;
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
if (gfs2_withdrawn(sdp))
return -EIO;
- error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (error || gfs2_withdrawn(sdp))
- return error;
-
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
- gfs2_consist(sdp);
+ if (sdp->sd_log_sequence == 0) {
+ fs_err(sdp, "unknown status of our own journal jid %d",
+ sdp->sd_lockstruct.ls_jid);
return -EIO;
}
- /* Initialize some head of the log stuff */
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
-
error = gfs2_quota_init(sdp);
- if (!error && !gfs2_withdrawn(sdp))
+ if (!error && gfs2_withdrawn(sdp))
+ error = -EIO;
+ if (!error)
set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
return error;
}
@@ -328,7 +326,12 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
struct lfcc *lfcc;
LIST_HEAD(list);
struct gfs2_log_header_host lh;
- int error;
+ int error, error2;
+
+ /*
+ * Grab all the journal glocks in SH mode. We are *probably* doing
+ * that to prevent recovery.
+ */
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
@@ -345,17 +348,19 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
list_add(&lfcc->list, &list);
}
+ gfs2_freeze_unlock(sdp);
+
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP | GL_NOPID,
+ LM_FLAG_RECOVER | GL_NOPID,
&sdp->sd_freeze_gh);
if (error)
- goto out;
+ goto relock_shared;
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
error = gfs2_jdesc_check(jd);
if (error)
break;
- error = gfs2_find_jhead(jd, &lh, false);
+ error = gfs2_find_jhead(jd, &lh);
if (error)
break;
if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
@@ -364,8 +369,14 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
}
}
- if (error)
- gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+ if (!error)
+ goto out; /* success */
+
+ gfs2_freeze_unlock(sdp);
+
+relock_shared:
+ error2 = gfs2_freeze_lock_shared(sdp);
+ gfs2_assert_withdraw(sdp, !error2);
out:
while (!list_empty(&list)) {
@@ -393,9 +404,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_nlink = cpu_to_be32(inode->i_nlink);
str->di_size = cpu_to_be64(i_size_read(inode));
str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
- str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
- str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
- str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
+ str->di_atime = cpu_to_be64(inode_get_atime_sec(inode));
+ str->di_mtime = cpu_to_be64(inode_get_mtime_sec(inode));
+ str->di_ctime = cpu_to_be64(inode_get_ctime_sec(inode));
str->di_goal_meta = cpu_to_be64(ip->i_goal);
str->di_goal_data = cpu_to_be64(ip->i_goal);
@@ -410,9 +421,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_entries = cpu_to_be32(ip->i_entries);
str->di_eattr = cpu_to_be64(ip->i_eattr);
- str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
- str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
- str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+ str->di_atime_nsec = cpu_to_be32(inode_get_atime_nsec(inode));
+ str->di_mtime_nsec = cpu_to_be32(inode_get_mtime_nsec(inode));
+ str->di_ctime_nsec = cpu_to_be32(inode_get_ctime_nsec(inode));
}
/**
@@ -459,7 +470,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
* @flags: The type of dirty
*
* Unfortunately it can be called under any combination of inode
- * glock and transaction lock, so we have to check carefully.
+ * glock and freeze glock, so we have to check carefully.
*
* At the moment this deals only with atime - it should be possible
* to expand that role in future, once a review of the locking has
@@ -476,13 +487,11 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
- if (unlikely(!ip->i_gl)) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ /* This can only happen during incomplete inode creation. */
+ if (unlikely(!ip->i_gl))
return;
- }
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (gfs2_withdrawn(sdp))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
@@ -529,38 +538,32 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- gfs2_flush_delete_work(sdp);
- if (!log_write_allowed && current == sdp->sd_quotad_process)
- fs_warn(sdp, "The quotad daemon is withdrawing.\n");
- else if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
+ if (!test_bit(SDF_KILL, &sdp->sd_flags))
+ gfs2_flush_delete_work(sdp);
- if (!log_write_allowed && current == sdp->sd_logd_process)
- fs_warn(sdp, "The logd daemon is withdrawing.\n");
- else if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ gfs2_destroy_threads(sdp);
if (log_write_allowed) {
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_statfs_sync(sdp->sd_vfs, 0);
+ /* We do two log flushes here. The first one commits dirty inodes
+ * and rgrps to the journal, but queues up revokes to the ail list.
+ * The second flush writes out and removes the revokes.
+ *
+ * The first must be done before the FLUSH_SHUTDOWN code
+ * clears the LIVE flag, otherwise it will not be able to start
+ * a transaction to write its revokes, and the error will cause
+ * a withdraw of the file system. */
+ gfs2_log_flush(sdp, NULL, GFS2_LFC_MAKE_FS_RO);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
GFS2_LFC_MAKE_FS_RO);
wait_event_timeout(sdp->sd_log_waitq,
gfs2_log_is_empty(sdp),
HZ * 5);
gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
- } else {
- wait_event_timeout(sdp->sd_log_waitq,
- gfs2_log_is_empty(sdp),
- HZ * 5);
}
gfs2_quota_cleanup(sdp);
-
- if (!log_write_allowed)
- sdp->sd_vfs->s_flags |= SB_RDONLY;
}
/**
@@ -591,15 +594,23 @@ restart:
}
spin_unlock(&sdp->sd_jindex_spin);
- if (!sb_rdonly(sb)) {
+ if (!sb_rdonly(sb))
gfs2_make_fs_ro(sdp);
+ else {
+ if (gfs2_withdrawn(sdp))
+ gfs2_destroy_threads(sdp);
+
+ gfs2_quota_cleanup(sdp);
}
- WARN_ON(gfs2_withdrawing(sdp));
+
+ flush_work(&sdp->sd_withdraw_work);
/* At this point, we're through modifying the disk */
/* Release stuff */
+ gfs2_freeze_unlock(sdp);
+
iput(sdp->sd_jindex);
iput(sdp->sd_statfs_inode);
iput(sdp->sd_rindex);
@@ -625,12 +636,9 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
- truncate_inode_pages_final(&sdp->sd_aspace);
+ iput(sdp->sd_inode);
gfs2_delete_debugfs_file(sdp);
- /* Unmount the locking protocol */
- gfs2_lm_unmount(sdp);
- /* At this point, we're through participating in the lockspace */
gfs2_sys_fs_del(sdp);
free_sbd(sdp);
}
@@ -654,59 +662,94 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
return sdp->sd_log_error;
}
-void gfs2_freeze_func(struct work_struct *work)
+static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who, const void *freeze_owner)
{
+ struct super_block *sb = sdp->sd_vfs;
int error;
- struct gfs2_holder freeze_gh;
+
+ error = gfs2_freeze_lock_shared(sdp);
+ if (error)
+ goto fail;
+ error = thaw_super(sb, who, freeze_owner);
+ if (!error)
+ return 0;
+
+fail:
+ fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error);
+ gfs2_assert_withdraw(sdp, 0);
+ return error;
+}
+
+void gfs2_freeze_func(struct work_struct *work)
+{
struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
struct super_block *sb = sdp->sd_vfs;
+ int error;
- atomic_inc(&sb->s_active);
- error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
- if (error) {
- gfs2_assert_withdraw(sdp, 0);
- } else {
- atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
- error = thaw_super(sb);
- if (error) {
- fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
- error);
- gfs2_assert_withdraw(sdp, 0);
- }
- gfs2_freeze_unlock(&freeze_gh);
- }
+ mutex_lock(&sdp->sd_freeze_mutex);
+ error = -EBUSY;
+ if (test_bit(SDF_FROZEN, &sdp->sd_flags))
+ goto freeze_failed;
+
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ if (error)
+ goto freeze_failed;
+
+ gfs2_freeze_unlock(sdp);
+ set_bit(SDF_FROZEN, &sdp->sd_flags);
+
+ error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE, NULL);
+ if (error)
+ goto out;
+
+ clear_bit(SDF_FROZEN, &sdp->sd_flags);
+ goto out;
+
+freeze_failed:
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error);
+
+out:
+ mutex_unlock(&sdp->sd_freeze_mutex);
deactivate_super(sb);
- clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
- wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
- return;
}
/**
- * gfs2_freeze - prevent further writes to the filesystem
+ * gfs2_freeze_super - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
+ * @freeze_owner: owner of the freeze
*
*/
-static int gfs2_freeze(struct super_block *sb)
+static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
- mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
- error = -EBUSY;
- goto out;
+ if (!mutex_trylock(&sdp->sd_freeze_mutex))
+ return -EBUSY;
+ if (test_bit(SDF_FROZEN, &sdp->sd_flags)) {
+ mutex_unlock(&sdp->sd_freeze_mutex);
+ return -EBUSY;
}
for (;;) {
- if (gfs2_withdrawn(sdp)) {
- error = -EINVAL;
+ error = freeze_super(sb, who, freeze_owner);
+ if (error) {
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
+ error);
goto out;
}
error = gfs2_lock_fs_check_clean(sdp);
- if (!error)
+ if (!error) {
+ set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+ set_bit(SDF_FROZEN, &sdp->sd_flags);
break;
+ }
+
+ (void)gfs2_do_thaw(sdp, who, freeze_owner);
if (error == -EBUSY)
fs_err(sdp, "waiting for recovery before freeze\n");
@@ -720,32 +763,58 @@ static int gfs2_freeze(struct super_block *sb)
fs_err(sdp, "retrying...\n");
msleep(1000);
}
- set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
+
out:
mutex_unlock(&sdp->sd_freeze_mutex);
return error;
}
+static int gfs2_freeze_fs(struct super_block *sb)
+{
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+
+ if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+ gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
+ GFS2_LFC_FREEZE_GO_SYNC);
+ if (gfs2_withdrawn(sdp))
+ return -EIO;
+ }
+ return 0;
+}
+
/**
- * gfs2_unfreeze - reallow writes to the filesystem
+ * gfs2_thaw_super - reallow writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
+ * @freeze_owner: owner of the freeze
*
*/
-static int gfs2_unfreeze(struct super_block *sb)
+static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
+ int error;
- mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
- !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
+ if (!mutex_trylock(&sdp->sd_freeze_mutex))
+ return -EBUSY;
+ if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) {
mutex_unlock(&sdp->sd_freeze_mutex);
return -EINVAL;
}
- gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+ atomic_inc(&sb->s_active);
+ gfs2_freeze_unlock(sdp);
+
+ error = gfs2_do_thaw(sdp, who, freeze_owner);
+
+ if (!error) {
+ clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+ clear_bit(SDF_FROZEN, &sdp->sd_flags);
+ }
mutex_unlock(&sdp->sd_freeze_mutex);
- return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
+ deactivate_super(sb);
+ return error;
}
/**
@@ -911,6 +980,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = sc.sc_dinodes + sc.sc_free;
buf->f_ffree = sc.sc_free;
buf->f_namelen = GFS2_FNAMESIZE;
+ buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);
return 0;
}
@@ -933,11 +1003,12 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
static int gfs2_drop_inode(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
if (inode->i_nlink &&
gfs2_holder_initialized(&ip->i_iopen_gh)) {
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
- if (test_bit(GLF_DEMOTE, &gl->gl_flags))
+ if (glock_needs_demote(gl))
clear_nlink(inode);
}
@@ -952,22 +1023,18 @@ static int gfs2_drop_inode(struct inode *inode)
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
gfs2_glock_hold(gl);
- if (!gfs2_queue_delete_work(gl, 0))
- gfs2_glock_queue_put(gl);
+ if (!gfs2_queue_verify_delete(gl, true))
+ gfs2_glock_put_async(gl);
return 0;
}
- return generic_drop_inode(inode);
-}
+ /*
+ * No longer cache inodes when trying to evict them all.
+ */
+ if (test_bit(SDF_EVICTING, &sdp->sd_flags))
+ return 1;
-static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
-{
- do {
- if (d1 == d2)
- return 1;
- d1 = d1->d_parent;
- } while (!IS_ROOT(d1));
- return 0;
+ return inode_generic_drop(inode);
}
/**
@@ -982,9 +1049,16 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
{
struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
struct gfs2_args *args = &sdp->sd_args;
- int val;
+ unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
- if (is_ancestor(root, sdp->sd_master_dir))
+ spin_lock(&sdp->sd_tune.gt_spin);
+ logd_secs = sdp->sd_tune.gt_logd_secs;
+ quota_quantum = sdp->sd_tune.gt_quota_quantum;
+ statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
+ statfs_slow = sdp->sd_tune.gt_statfs_slow;
+ spin_unlock(&sdp->sd_tune.gt_spin);
+
+ if (is_subdir(root, sdp->sd_master_dir))
seq_puts(s, ",meta");
if (args->ar_lockproto[0])
seq_show_option(s, "lockproto", args->ar_lockproto);
@@ -1012,6 +1086,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
case GFS2_QUOTA_ON:
state = "on";
break;
+ case GFS2_QUOTA_QUIET:
+ state = "quiet";
+ break;
default:
state = "unknown";
break;
@@ -1037,17 +1114,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
}
if (args->ar_discard)
seq_puts(s, ",discard");
- val = sdp->sd_tune.gt_logd_secs;
- if (val != 30)
- seq_printf(s, ",commit=%d", val);
- val = sdp->sd_tune.gt_statfs_quantum;
- if (val != 30)
- seq_printf(s, ",statfs_quantum=%d", val);
- else if (sdp->sd_tune.gt_statfs_slow)
+ if (logd_secs != 30)
+ seq_printf(s, ",commit=%d", logd_secs);
+ if (statfs_quantum != 30)
+ seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
+ else if (statfs_slow)
seq_puts(s, ",statfs_quantum=0");
- val = sdp->sd_tune.gt_quota_quantum;
- if (val != 60)
- seq_printf(s, ",quota_quantum=%d", val);
+ if (quota_quantum != 60)
+ seq_printf(s, ",quota_quantum=%d", quota_quantum);
if (args->ar_statfs_percent)
seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
@@ -1057,6 +1131,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
case GFS2_ERRORS_WITHDRAW:
state = "withdraw";
break;
+ case GFS2_ERRORS_DEACTIVATE:
+ state = "deactivate";
+ break;
case GFS2_ERRORS_PANIC:
state = "panic";
break;
@@ -1077,76 +1154,6 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
return 0;
}
-static void gfs2_final_release_pages(struct gfs2_inode *ip)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_glock *gl = ip->i_gl;
-
- if (unlikely(!gl)) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
- return;
- }
-
- truncate_inode_pages(gfs2_glock2aspace(gl), 0);
- truncate_inode_pages(&inode->i_data, 0);
-
- if (atomic_read(&gl->gl_revokes) == 0) {
- clear_bit(GLF_LFLUSH, &gl->gl_flags);
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- }
-}
-
-static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder gh;
- int error;
-
- if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
- gfs2_consist_inode(ip);
- return -EIO;
- }
-
- error = gfs2_rindex_update(sdp);
- if (error)
- return error;
-
- error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
- if (error)
- return error;
-
- rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
- if (!rgd) {
- gfs2_consist_inode(ip);
- error = -EIO;
- goto out_qs;
- }
-
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
- LM_FLAG_NODE_SCOPE, &gh);
- if (error)
- goto out_qs;
-
- error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
- sdp->sd_jdesc->jd_blocks);
- if (error)
- goto out_rg_gunlock;
-
- gfs2_free_di(rgd, ip);
-
- gfs2_final_release_pages(ip);
-
- gfs2_trans_end(sdp);
-
-out_rg_gunlock:
- gfs2_glock_dq_uninit(&gh);
-out_qs:
- gfs2_quota_unhold(ip);
- return error;
-}
-
/**
* gfs2_glock_put_eventually
* @gl: The glock to put
@@ -1158,58 +1165,60 @@ out_qs:
static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
{
if (current->flags & PF_MEMALLOC)
- gfs2_glock_queue_put(gl);
+ gfs2_glock_put_async(gl);
else
gfs2_glock_put(gl);
}
-static bool gfs2_upgrade_iopen_glock(struct inode *inode)
+static enum evict_behavior gfs2_upgrade_iopen_glock(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder *gh = &ip->i_iopen_gh;
- long timeout = 5 * HZ;
int error;
gh->gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(gh);
/*
- * If there are no other lock holders, we'll get the lock immediately.
+ * If there are no other lock holders, we will immediately get
+ * exclusive access to the iopen glock here.
+ *
* Otherwise, the other nodes holding the lock will be notified about
- * our locking request. If they don't have the inode open, they'll
- * evict the cached inode and release the lock. Otherwise, if they
- * poke the inode glock, we'll take this as an indication that they
- * still need the iopen glock and that they'll take care of deleting
- * the inode when they're done. As a last resort, if another node
- * keeps holding the iopen glock without showing any activity on the
- * inode glock, we'll eventually time out.
+ * our locking request (see iopen_go_callback()). If they do not have
+ * the inode open, they are expected to evict the cached inode and
+ * release the lock, allowing us to proceed.
+ *
+ * Otherwise, if they cannot evict the inode, they are expected to poke
+ * the inode glock (note: not the iopen glock). We will notice that
+ * and stop waiting for the iopen glock immediately. The other node(s)
+ * are then expected to take care of deleting the inode when they no
+ * longer use it.
*
- * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
- * locking request as an optimization to notify lock holders as soon as
- * possible. Without that flag, they'd be notified implicitly by the
- * second locking request.
+ * As a last resort, if another node keeps holding the iopen glock
+ * without showing any activity on the inode glock, we will eventually
+ * time out and fail the iopen glock upgrade.
*/
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
- error = gfs2_glock_nq(gh);
- if (error != GLR_TRYFAILED)
- return !error;
-
gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
error = gfs2_glock_nq(gh);
if (error)
- return false;
+ return EVICT_SHOULD_SKIP_DELETE;
- timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
+ wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
!test_bit(HIF_WAIT, &gh->gh_iflags) ||
- test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
- timeout);
+ glock_needs_demote(ip->i_gl),
+ 5 * HZ);
if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
gfs2_glock_dq(gh);
- return false;
+ if (glock_needs_demote(ip->i_gl))
+ return EVICT_SHOULD_SKIP_DELETE;
+ return EVICT_SHOULD_DEFER_DELETE;
}
- return gfs2_glock_holder_ready(gh) == 0;
+ error = gfs2_glock_holder_ready(gh);
+ if (error)
+ return EVICT_SHOULD_SKIP_DELETE;
+ return EVICT_SHOULD_DELETE;
}
/**
@@ -1222,58 +1231,47 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
*
* Returns: the fate of the dinode
*/
-static enum dinode_demise evict_should_delete(struct inode *inode,
- struct gfs2_holder *gh)
+static enum evict_behavior evict_should_delete(struct inode *inode,
+ struct gfs2_holder *gh)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct super_block *sb = inode->i_sb;
struct gfs2_sbd *sdp = sb->s_fs_info;
int ret;
- if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
- goto should_delete;
-
- if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
- return SHOULD_DEFER_EVICTION;
+ if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
+ test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags))
+ return EVICT_SHOULD_DEFER_DELETE;
/* Deletes should never happen under memory pressure anymore. */
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
- return SHOULD_DEFER_EVICTION;
+ return EVICT_SHOULD_DEFER_DELETE;
/* Must not read inode block until block type has been verified */
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
- if (unlikely(ret)) {
- glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_uninit(&ip->i_iopen_gh);
- return SHOULD_DEFER_EVICTION;
- }
+ if (unlikely(ret))
+ return EVICT_SHOULD_SKIP_DELETE;
if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
- return SHOULD_NOT_DELETE_DINODE;
+ return EVICT_SHOULD_SKIP_DELETE;
ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
if (ret)
- return SHOULD_NOT_DELETE_DINODE;
+ return EVICT_SHOULD_SKIP_DELETE;
ret = gfs2_instantiate(gh);
if (ret)
- return SHOULD_NOT_DELETE_DINODE;
+ return EVICT_SHOULD_SKIP_DELETE;
/*
* The inode may have been recreated in the meantime.
*/
if (inode->i_nlink)
- return SHOULD_NOT_DELETE_DINODE;
+ return EVICT_SHOULD_SKIP_DELETE;
-should_delete:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
- test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
- if (!gfs2_upgrade_iopen_glock(inode)) {
- gfs2_holder_uninit(&ip->i_iopen_gh);
- return SHOULD_NOT_DELETE_DINODE;
- }
- }
- return SHOULD_DELETE_DINODE;
+ test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+ return gfs2_upgrade_iopen_glock(inode);
+ return EVICT_SHOULD_DELETE;
}
/**
@@ -1293,7 +1291,7 @@ static int evict_unlinked_inode(struct inode *inode)
}
if (ip->i_eattr) {
- ret = gfs2_ea_dealloc(ip);
+ ret = gfs2_ea_dealloc(ip, true);
if (ret)
goto out;
}
@@ -1304,9 +1302,6 @@ static int evict_unlinked_inode(struct inode *inode)
goto out;
}
- if (ip->i_gl)
- gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
-
/*
* As soon as we clear the bitmap for the dinode, gfs2_create_inode()
* can get called to recreate it, or even gfs2_inode_lookup() if the
@@ -1320,6 +1315,9 @@ static int evict_unlinked_inode(struct inode *inode)
*/
ret = gfs2_dinode_dealloc(ip);
+ if (!ret && ip->i_gl)
+ gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
+
out:
return ret;
}
@@ -1384,16 +1382,35 @@ static void gfs2_evict_inode(struct inode *inode)
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
+ enum evict_behavior behavior;
int ret;
+ gfs2_holder_mark_uninitialized(&gh);
if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
goto out;
- gfs2_holder_mark_uninitialized(&gh);
- ret = evict_should_delete(inode, &gh);
- if (ret == SHOULD_DEFER_EVICTION)
+ /*
+ * In case of an incomplete mount, gfs2_evict_inode() may be called for
+ * system files without having an active journal to write to. In that
+ * case, skip the filesystem evict.
+ */
+ if (!sdp->sd_jdesc)
goto out;
- if (ret == SHOULD_DELETE_DINODE)
+
+ behavior = evict_should_delete(inode, &gh);
+ if (behavior == EVICT_SHOULD_DEFER_DELETE &&
+ !test_bit(SDF_KILL, &sdp->sd_flags)) {
+ struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
+
+ if (io_gl) {
+ gfs2_glock_hold(io_gl);
+ if (!gfs2_queue_verify_delete(io_gl, true))
+ gfs2_glock_put(io_gl);
+ goto out;
+ }
+ behavior = EVICT_SHOULD_SKIP_DELETE;
+ }
+ if (behavior == EVICT_SHOULD_DELETE)
ret = evict_unlinked_inode(inode);
else
ret = evict_linked_inode(inode);
@@ -1401,13 +1418,11 @@ static void gfs2_evict_inode(struct inode *inode)
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
- if (gfs2_holder_initialized(&gh)) {
- glock_clear_object(ip->i_gl, ip);
- gfs2_glock_dq_uninit(&gh);
- }
if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
out:
+ if (gfs2_holder_initialized(&gh))
+ gfs2_glock_dq_uninit(&gh);
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
@@ -1427,9 +1442,8 @@ out:
if (ip->i_gl) {
glock_clear_object(ip->i_gl, ip);
wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
- gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put_eventually(ip->i_gl);
- ip->i_gl = NULL;
+ rcu_assign_pointer(ip->i_gl, NULL);
}
}
@@ -1441,11 +1455,13 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
if (!ip)
return NULL;
ip->i_no_addr = 0;
+ ip->i_no_formal_ino = 0;
ip->i_flags = 0;
ip->i_gl = NULL;
gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
memset(&ip->i_res, 0, sizeof(ip->i_res));
RB_CLEAR_NODE(&ip->i_res.rs_node);
+ ip->i_diskflags = 0;
ip->i_rahead = 0;
return &ip->i_inode;
}
@@ -1455,7 +1471,7 @@ static void gfs2_free_inode(struct inode *inode)
kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
}
-extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
+void free_local_statfs_inodes(struct gfs2_sbd *sdp)
{
struct local_statfs_inode *lsi, *safe;
@@ -1470,8 +1486,8 @@ extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
}
}
-extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
- unsigned int index)
+struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+ unsigned int index)
{
struct local_statfs_inode *lsi;
@@ -1492,8 +1508,9 @@ const struct super_operations gfs2_super_ops = {
.evict_inode = gfs2_evict_inode,
.put_super = gfs2_put_super,
.sync_fs = gfs2_sync_fs,
- .freeze_super = gfs2_freeze,
- .thaw_super = gfs2_unfreeze,
+ .freeze_super = gfs2_freeze_super,
+ .freeze_fs = gfs2_freeze_fs,
+ .thaw_super = gfs2_thaw_super,
.statfs = gfs2_statfs,
.drop_inode = gfs2_drop_inode,
.show_options = gfs2_show_options,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 58d13fd77aed..173f1e74c2a9 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -15,7 +15,7 @@
#define GFS2_FS_FORMAT_MIN (1801)
#define GFS2_FS_FORMAT_MAX (1802)
-extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
+void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
{
@@ -26,31 +26,32 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
return x;
}
-extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
+void gfs2_jindex_free(struct gfs2_sbd *sdp);
-extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
-extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
- struct gfs2_inode **ipp);
+struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
+int gfs2_jdesc_check(struct gfs2_jdesc *jd);
+int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
+ struct gfs2_inode **ipp);
-extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
-extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
-extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
-extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
- s64 dinodes);
-extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
- const void *buf);
-extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
- void *buf);
-extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
-extern int gfs2_statfs_sync(struct super_block *sb, int type);
-extern void gfs2_freeze_func(struct work_struct *work);
+int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
+void gfs2_online_uevent(struct gfs2_sbd *sdp);
+void gfs2_destroy_threads(struct gfs2_sbd *sdp);
+int gfs2_statfs_init(struct gfs2_sbd *sdp);
+void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
+ s64 dinodes);
+void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
+ const void *buf);
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
+ void *buf);
+void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
+int gfs2_statfs_sync(struct super_block *sb, int type);
+void gfs2_freeze_func(struct work_struct *work);
-extern void free_local_statfs_inodes(struct gfs2_sbd *sdp);
-extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
- unsigned int index);
-extern void free_sbd(struct gfs2_sbd *sdp);
+void free_local_statfs_inodes(struct gfs2_sbd *sdp);
+struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+ unsigned int index);
+void free_sbd(struct gfs2_sbd *sdp);
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
@@ -58,8 +59,8 @@ extern const struct export_operations gfs2_export_ops;
extern const struct super_operations gfs2_super_ops;
extern const struct dentry_operations gfs2_dops;
-extern const struct xattr_handler *gfs2_xattr_handlers_max[];
-extern const struct xattr_handler **gfs2_xattr_handlers_min;
+extern const struct xattr_handler * const gfs2_xattr_handlers_max[];
+extern const struct xattr_handler * const *gfs2_xattr_handlers_min;
#endif /* __SUPER_DOT_H__ */
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d87ea98cf535..7051db9dbea0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -59,7 +59,7 @@ static struct kset *gfs2_kset;
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u:%u\n",
+ return sysfs_emit(buf, "%u:%u\n",
MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev));
}
@@ -68,7 +68,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
unsigned long f = sdp->sd_flags;
ssize_t s;
- s = snprintf(buf, PAGE_SIZE,
+ s = sysfs_emit(buf,
"Journal Checked: %d\n"
"Journal Live: %d\n"
"Journal ID: %d\n"
@@ -82,11 +82,9 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
"RO Recovery: %d\n"
"Skip DLM Unlock: %d\n"
"Force AIL Flush: %d\n"
+ "FS Freeze Initiator: %d\n"
"FS Frozen: %d\n"
- "Withdrawing: %d\n"
- "Withdraw In Prog: %d\n"
- "Remote Withdraw: %d\n"
- "Withdraw Recovery: %d\n"
+ "Killing: %d\n"
"sd_log_error: %d\n"
"sd_log_flush_lock: %d\n"
"sd_log_num_revoke: %u\n"
@@ -96,7 +94,10 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
"sd_log_flush_head: %d\n"
"sd_log_flush_tail: %d\n"
"sd_log_blks_reserved: %d\n"
- "sd_log_revokes_available: %d\n",
+ "sd_log_revokes_available: %d\n"
+ "sd_log_pinned: %d\n"
+ "sd_log_thresh1: %d\n"
+ "sd_log_thresh2: %d\n",
test_bit(SDF_JOURNAL_CHECKED, &f),
test_bit(SDF_JOURNAL_LIVE, &f),
(sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0),
@@ -110,11 +111,9 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
test_bit(SDF_RORECOVERY, &f),
test_bit(SDF_SKIP_DLM_UNLOCK, &f),
test_bit(SDF_FORCE_AIL_FLUSH, &f),
- test_bit(SDF_FS_FROZEN, &f),
- test_bit(SDF_WITHDRAWING, &f),
- test_bit(SDF_WITHDRAW_IN_PROG, &f),
- test_bit(SDF_REMOTE_WITHDRAW, &f),
- test_bit(SDF_WITHDRAW_RECOVERY, &f),
+ test_bit(SDF_FREEZE_INITIATOR, &f),
+ test_bit(SDF_FROZEN, &f),
+ test_bit(SDF_KILL, &f),
sdp->sd_log_error,
rwsem_is_locked(&sdp->sd_log_flush_lock),
sdp->sd_log_num_revoke,
@@ -124,13 +123,16 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
sdp->sd_log_flush_head,
sdp->sd_log_flush_tail,
sdp->sd_log_blks_reserved,
- atomic_read(&sdp->sd_log_revokes_available));
+ atomic_read(&sdp->sd_log_revokes_available),
+ atomic_read(&sdp->sd_log_pinned),
+ atomic_read(&sdp->sd_log_thresh1),
+ atomic_read(&sdp->sd_log_thresh2));
return s;
}
static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
+ return sysfs_emit(buf, "%s\n", sdp->sd_fsname);
}
static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
@@ -140,7 +142,7 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
buf[0] = '\0';
if (uuid_is_null(&s->s_uuid))
return 0;
- return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
+ return sysfs_emit(buf, "%pUB\n", &s->s_uuid);
}
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -148,7 +150,7 @@ static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
struct super_block *sb = sdp->sd_vfs;
int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
- return snprintf(buf, PAGE_SIZE, "%d\n", frozen);
+ return sysfs_emit(buf, "%d\n", frozen);
}
static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
@@ -164,10 +166,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
switch (n) {
case 0:
- error = thaw_super(sdp->sd_vfs);
+ error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL);
break;
case 1:
- error = freeze_super(sdp->sd_vfs);
+ error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL);
break;
default:
return -EINVAL;
@@ -184,7 +186,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int b = gfs2_withdrawn(sdp);
- return snprintf(buf, PAGE_SIZE, "%u\n", b);
+ return sysfs_emit(buf, "%u\n", b);
}
static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
@@ -326,7 +328,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
return -EINVAL;
if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags))
fs_info(sdp, "demote interface used\n");
- rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
+ rv = gfs2_glock_get(sdp, glnum, glops, NO_CREATE, &gl);
if (rv)
return rv;
gfs2_glock_cb(gl, glmode);
@@ -387,7 +389,7 @@ static struct kobj_type gfs2_ktype = {
static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
{
const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
- return sprintf(buf, "%s\n", ops->lm_proto_name);
+ return sysfs_emit(buf, "%s\n", ops->lm_proto_name);
}
static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
@@ -398,7 +400,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
val = 1;
- ret = sprintf(buf, "%d\n", val);
+ ret = sysfs_emit(buf, "%d\n", val);
return ret;
}
@@ -423,33 +425,27 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
return len;
}
-static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
-{
- int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
-
- return sprintf(buf, "%d\n", val);
-}
-
-static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp,
+ const char *buf,
+ size_t len)
{
int ret, val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
-
- if ((val == 1) &&
- !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
- complete(&sdp->sd_wdack);
- else
+ if (val < 0 || val > 1)
return -EINVAL;
+
+ sdp->sd_withdraw_helper_status = val;
+ complete(&sdp->sd_withdraw_helper);
return len;
}
static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- return sprintf(buf, "%d\n", ls->ls_first);
+ return sysfs_emit(buf, "%d\n", ls->ls_first);
}
static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
@@ -482,7 +478,7 @@ out:
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
+ return sysfs_emit(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
}
int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
@@ -540,18 +536,18 @@ out:
static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
+ return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_done);
}
static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
+ return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_status);
}
static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
{
- return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid);
+ return sysfs_emit(buf, "%d\n", sdp->sd_lockstruct.ls_jid);
}
static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
@@ -589,7 +585,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
GDLM_ATTR(block, 0644, block_show, block_store);
-GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
+GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store);
GDLM_ATTR(jid, 0644, jid_show, jid_store);
GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
@@ -616,7 +612,7 @@ static struct attribute *lock_module_attrs[] = {
static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u %u\n",
+ return sysfs_emit(buf, "%u %u\n",
sdp->sd_tune.gt_quota_scale_num,
sdp->sd_tune.gt_quota_scale_den);
}
@@ -669,7 +665,7 @@ static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store)
#define TUNE_ATTR_2(name, store) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
- return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \
+ return sysfs_emit(buf, "%u\n", sdp->sd_tune.gt_##name); \
} \
TUNE_ATTR_3(name, name##_show, store)
@@ -688,6 +684,7 @@ TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
+TUNE_ATTR(withdraw_helper_timeout, 1);
static struct attribute *tune_attrs[] = {
&tune_attr_quota_warn_period.attr,
@@ -698,6 +695,7 @@ static struct attribute *tune_attrs[] = {
&tune_attr_statfs_quantum.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
+ &tune_attr_withdraw_helper_timeout.attr,
NULL,
};
@@ -754,7 +752,6 @@ fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
wait_for_completion(&sdp->sd_kobj_unregister);
- sb->s_fs_info = NULL;
return error;
}
@@ -767,10 +764,10 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
wait_for_completion(&sdp->sd_kobj_unregister);
}
-static int gfs2_uevent(struct kobject *kobj, struct kobj_uevent_env *env)
+static int gfs2_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
{
- struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- struct super_block *s = sdp->sd_vfs;
+ const struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+ const struct super_block *s = sdp->sd_vfs;
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index a5deb9f86831..fcfbf68ec725 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -52,13 +52,19 @@
{(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \
{(1UL << GLF_DIRTY), "y" }, \
{(1UL << GLF_LFLUSH), "f" }, \
- {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \
- {(1UL << GLF_REPLY_PENDING), "r" }, \
- {(1UL << GLF_INITIAL), "I" }, \
- {(1UL << GLF_FROZEN), "F" }, \
+ {(1UL << GLF_PENDING_REPLY), "R" }, \
+ {(1UL << GLF_HAVE_REPLY), "r" }, \
+ {(1UL << GLF_INITIAL), "a" }, \
+ {(1UL << GLF_HAVE_FROZEN_REPLY), "F" }, \
{(1UL << GLF_LRU), "L" }, \
{(1UL << GLF_OBJECT), "o" }, \
- {(1UL << GLF_BLOCKING), "b" })
+ {(1UL << GLF_BLOCKING), "b" }, \
+ {(1UL << GLF_INSTANTIATE_NEEDED), "n" }, \
+ {(1UL << GLF_INSTANTIATE_IN_PROG), "N" }, \
+ {(1UL << GLF_TRY_TO_EVICT), "e" }, \
+ {(1UL << GLF_VERIFY_DELETE), "E" }, \
+ {(1UL << GLF_DEFER_DELETE), "s" }, \
+ {(1UL << GLF_CANCELING), "C" })
#ifndef NUMPTY
#define NUMPTY
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 63fec11ef2ce..6df65540e13d 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -49,7 +49,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
}
BUG_ON(blocks == 0 && revokes == 0);
- if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ if (gfs2_withdrawn(sdp))
return -EROFS;
tr->tr_ip = ip;
@@ -85,25 +85,30 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
*/
down_read(&sdp->sd_log_flush_lock);
+ if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)))
+ goto out_not_live;
if (gfs2_log_try_reserve(sdp, tr, &extra_revokes))
goto reserved;
+
up_read(&sdp->sd_log_flush_lock);
gfs2_log_reserve(sdp, tr, &extra_revokes);
down_read(&sdp->sd_log_flush_lock);
-
-reserved:
- gfs2_log_release_revokes(sdp, extra_revokes);
if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
- gfs2_log_release_revokes(sdp, tr->tr_revokes);
- up_read(&sdp->sd_log_flush_lock);
+ revokes = tr->tr_revokes + extra_revokes;
+ gfs2_log_release_revokes(sdp, revokes);
gfs2_log_release(sdp, tr->tr_reserved);
- sb_end_intwrite(sdp->sd_vfs);
- return -EROFS;
+ goto out_not_live;
}
+reserved:
+ gfs2_log_release_revokes(sdp, extra_revokes);
current->journal_info = tr;
-
return 0;
+
+out_not_live:
+ up_read(&sdp->sd_log_flush_lock);
+ sb_end_intwrite(sdp->sd_vfs);
+ return -EROFS;
}
int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
@@ -226,14 +231,35 @@ out:
unlock_buffer(bh);
}
+void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio,
+ size_t from, size_t len)
+{
+ struct buffer_head *head = folio_buffers(folio);
+ unsigned int bsize = head->b_size;
+ struct buffer_head *bh;
+ size_t to = from + len;
+ size_t start, end;
+
+ for (bh = head, start = 0; bh != head || !start;
+ bh = bh->b_this_page, start = end) {
+ end = start + bsize;
+ if (end <= from)
+ continue;
+ if (start >= to)
+ break;
+ set_buffer_uptodate(bh);
+ gfs2_trans_add_data(gl, bh);
+ }
+}
+
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct super_block *sb = sdp->sd_vfs;
struct gfs2_bufdata *bd;
struct gfs2_meta_header *mh;
struct gfs2_trans *tr = current->journal_info;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lock_buffer(bh);
if (buffer_pinned(bh)) {
@@ -245,12 +271,12 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
if (bd == NULL) {
gfs2_log_unlock(sdp);
unlock_buffer(bh);
- lock_page(bh->b_page);
+ folio_lock(bh->b_folio);
if (bh->b_private == NULL)
bd = gfs2_alloc_bufdata(gl, bh);
else
bd = bh->b_private;
- unlock_page(bh->b_page);
+ folio_unlock(bh->b_folio);
lock_buffer(bh);
gfs2_log_lock(sdp);
}
@@ -267,13 +293,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
(unsigned long long)bd->bd_bh->b_blocknr);
BUG();
}
- if (unlikely(state == SFS_FROZEN)) {
- fs_info(sdp, "GFS2:adding buf while frozen\n");
- gfs2_assert_withdraw(sdp, 0);
- }
- if (unlikely(gfs2_withdrawn(sdp))) {
+ if (gfs2_withdrawn(sdp)) {
fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
(unsigned long long)bd->bd_bh->b_blocknr);
+ goto out_unlock;
+ }
+ if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) {
+ fs_info(sdp, "GFS2:adding buf while frozen\n");
+ gfs2_withdraw(sdp);
+ goto out_unlock;
}
gfs2_pin(sdp, bd->bd_bh);
mh->__pad0 = cpu_to_be64(0);
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index c76ad9a4c75a..790c55f59e61 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -34,17 +34,19 @@ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned
return rgd->rd_length;
}
-extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
- unsigned int blocks, unsigned int revokes,
- unsigned long ip);
-extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
- unsigned int revokes);
-
-extern void gfs2_trans_end(struct gfs2_sbd *sdp);
-extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
-extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
-extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
-extern void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
-extern void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
+int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+ unsigned int blocks, unsigned int revokes,
+ unsigned long ip);
+int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
+ unsigned int revokes);
+
+void gfs2_trans_end(struct gfs2_sbd *sdp);
+void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio,
+ size_t from, size_t len);
+void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
+void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
+void gfs2_trans_free(struct gfs2_sbd *sdp, struct gfs2_trans *tr);
#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 7a6aeffcdf5c..02603200846d 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -9,6 +9,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
+#include <linux/kthread.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/delay.h>
@@ -57,7 +58,7 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
struct gfs2_inode *ip;
ip = GFS2_I(jd->jd_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_RECOVER |
GL_EXACT | GL_NOCACHE, &j_gh);
if (error) {
if (verbose)
@@ -72,7 +73,7 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
"mount.\n");
goto out_unlock;
}
- error = gfs2_find_jhead(jd, &head, false);
+ error = gfs2_find_jhead(jd, &head);
if (error) {
if (verbose)
fs_err(sdp, "Error parsing journal for spectator "
@@ -93,222 +94,53 @@ out_unlock:
}
/**
- * gfs2_freeze_lock - hold the freeze glock
+ * gfs2_freeze_lock_shared - hold the freeze glock
* @sdp: the superblock
- * @freeze_gh: pointer to the requested holder
- * @caller_flags: any additional flags needed by the caller
*/
-int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
- int caller_flags)
+int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp)
{
- int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
+ int flags = LM_FLAG_RECOVER | GL_EXACT;
int error;
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
- freeze_gh);
+ &sdp->sd_freeze_gh);
if (error && error != GLR_TRYFAILED)
- fs_err(sdp, "can't lock the freeze lock: %d\n", error);
+ fs_err(sdp, "can't lock the freeze glock: %d\n", error);
return error;
}
-void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
+void gfs2_freeze_unlock(struct gfs2_sbd *sdp)
{
- if (gfs2_holder_initialized(freeze_gh))
- gfs2_glock_dq_uninit(freeze_gh);
+ if (gfs2_holder_initialized(&sdp->sd_freeze_gh))
+ gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
}
-static void signal_our_withdraw(struct gfs2_sbd *sdp)
+static void do_withdraw(struct gfs2_sbd *sdp)
{
- struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
- struct inode *inode;
- struct gfs2_inode *ip;
- struct gfs2_glock *i_gl;
- u64 no_formal_ino;
- int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- int ret = 0;
- int tries;
-
- if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
+ down_write(&sdp->sd_log_flush_lock);
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+ up_write(&sdp->sd_log_flush_lock);
return;
-
- gfs2_ail_drain(sdp); /* frees all transactions */
- inode = sdp->sd_jdesc->jd_inode;
- ip = GFS2_I(inode);
- i_gl = ip->i_gl;
- no_formal_ino = ip->i_no_formal_ino;
-
- /* Prevent any glock dq until withdraw recovery is complete */
- set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
- /*
- * Don't tell dlm we're bailing until we have no more buffers in the
- * wind. If journal had an IO error, the log code should just purge
- * the outstanding buffers rather than submitting new IO. Making the
- * file system read-only will flush the journal, etc.
- *
- * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
- * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
- * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
- * therefore we need to clear SDF_JOURNAL_LIVE manually.
- */
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- if (!sb_rdonly(sdp->sd_vfs)) {
- struct gfs2_holder freeze_gh;
-
- gfs2_holder_mark_uninitialized(&freeze_gh);
- if (sdp->sd_freeze_gl &&
- !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
- ret = gfs2_freeze_lock(sdp, &freeze_gh,
- log_write_allowed ? 0 : LM_FLAG_TRY);
- if (ret == GLR_TRYFAILED)
- ret = 0;
- }
- if (!ret)
- gfs2_make_fs_ro(sdp);
- /*
- * Dequeue any pending non-system glock holders that can no
- * longer be granted because the file system is withdrawn.
- */
- gfs2_gl_dq_holders(sdp);
- gfs2_freeze_unlock(&freeze_gh);
- }
-
- if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
- if (!ret)
- ret = -EIO;
- clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
- goto skip_recovery;
- }
- /*
- * Drop the glock for our journal so another node can recover it.
- */
- if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
- gfs2_glock_dq_wait(&sdp->sd_journal_gh);
- gfs2_holder_uninit(&sdp->sd_journal_gh);
- }
- sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq(&sdp->sd_jinode_gh);
- if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
- /* Make sure gfs2_unfreeze works if partially-frozen */
- flush_work(&sdp->sd_freeze_work);
- atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
- thaw_super(sdp->sd_vfs);
- } else {
- wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
- TASK_UNINTERRUPTIBLE);
- }
-
- /*
- * holder_uninit to force glock_put, to force dlm to let go
- */
- gfs2_holder_uninit(&sdp->sd_jinode_gh);
-
- /*
- * Note: We need to be careful here:
- * Our iput of jd_inode will evict it. The evict will dequeue its
- * glock, but the glock dq will wait for the withdraw unless we have
- * exception code in glock_dq.
- */
- iput(inode);
- sdp->sd_jdesc->jd_inode = NULL;
- /*
- * Wait until the journal inode's glock is freed. This allows try locks
- * on other nodes to be successful, otherwise we remain the owner of
- * the glock as far as dlm is concerned.
- */
- if (i_gl->gl_ops->go_free) {
- set_bit(GLF_FREEING, &i_gl->gl_flags);
- wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
}
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+ up_write(&sdp->sd_log_flush_lock);
- /*
- * Dequeue the "live" glock, but keep a reference so it's never freed.
- */
- gfs2_glock_hold(live_gl);
- gfs2_glock_dq_wait(&sdp->sd_live_gh);
- /*
- * We enqueue the "live" glock in EX so that all other nodes
- * get a demote request and act on it. We don't really want the
- * lock in EX, so we send a "try" lock with 1CB to produce a callback.
- */
- fs_warn(sdp, "Requesting recovery of jid %d.\n",
- sdp->sd_lockstruct.ls_jid);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE,
- LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID,
- &sdp->sd_live_gh);
- msleep(GL_GLOCK_MAX_HOLD);
- /*
- * This will likely fail in a cluster, but succeed standalone:
- */
- ret = gfs2_glock_nq(&sdp->sd_live_gh);
+ gfs2_ail_drain(sdp); /* frees all transactions */
- /*
- * If we actually got the "live" lock in EX mode, there are no other
- * nodes available to replay our journal. So we try to replay it
- * ourselves. We hold the "live" glock to prevent other mounters
- * during recovery, then just dequeue it and reacquire it in our
- * normal SH mode. Just in case the problem that caused us to
- * withdraw prevents us from recovering our journal (e.g. io errors
- * and such) we still check if the journal is clean before proceeding
- * but we may wait forever until another mounter does the recovery.
- */
- if (ret == 0) {
- fs_warn(sdp, "No other mounters found. Trying to recover our "
- "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
- if (gfs2_recover_journal(sdp->sd_jdesc, 1))
- fs_warn(sdp, "Unable to recover our journal jid %d.\n",
- sdp->sd_lockstruct.ls_jid);
- gfs2_glock_dq_wait(&sdp->sd_live_gh);
- gfs2_holder_reinit(LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
- &sdp->sd_live_gh);
- gfs2_glock_nq(&sdp->sd_live_gh);
- }
+ wake_up(&sdp->sd_logd_waitq);
+ wake_up(&sdp->sd_quota_wait);
- gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
- clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+ wait_event_timeout(sdp->sd_log_waitq,
+ gfs2_log_is_empty(sdp),
+ HZ * 5);
- /*
- * At this point our journal is evicted, so we need to get a new inode
- * for it. Once done, we need to call gfs2_find_jhead which
- * calls gfs2_map_journal_extents to map it for us again.
- *
- * Note that we don't really want it to look up a FREE block. The
- * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
- * which would otherwise fail because it requires grabbing an rgrp
- * glock, which would fail with -EIO because we're withdrawing.
- */
- inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
- sdp->sd_jdesc->jd_no_addr, no_formal_ino,
- GFS2_BLKST_FREE);
- if (IS_ERR(inode)) {
- fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
- sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
- goto skip_recovery;
- }
- sdp->sd_jdesc->jd_inode = inode;
- d_mark_dontcache(inode);
+ sdp->sd_vfs->s_flags |= SB_RDONLY;
/*
- * Now wait until recovery is complete.
+ * Dequeue any pending non-system glock holders that can no
+ * longer be granted because the file system is withdrawn.
*/
- for (tries = 0; tries < 10; tries++) {
- ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
- if (!ret)
- break;
- msleep(HZ);
- fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
- sdp->sd_lockstruct.ls_jid);
- }
-skip_recovery:
- if (!ret)
- fs_warn(sdp, "Journal recovery complete for jid %d.\n",
- sdp->sd_lockstruct.ls_jid);
- else
- fs_warn(sdp, "Journal recovery skipped for jid %d until next "
- "mount.\n", sdp->sd_lockstruct.ls_jid);
- fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
- sdp->sd_glock_dqs_held = 0;
- wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
+ gfs2_withdraw_glocks(sdp);
}
void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
@@ -327,50 +159,108 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
va_end(args);
}
-int gfs2_withdraw(struct gfs2_sbd *sdp)
+/**
+ * gfs2_offline_uevent - run gfs2_withdraw_helper
+ * @sdp: The GFS2 superblock
+ */
+static bool gfs2_offline_uevent(struct gfs2_sbd *sdp)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- const struct lm_lockops *lm = ls->ls_ops;
+ long timeout;
- if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
- test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
- if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
- return -1;
+ /* Skip protocol "lock_nolock" which doesn't require shared storage. */
+ if (!ls->ls_ops->lm_lock)
+ return false;
+
+ /*
+ * The gfs2_withdraw_helper replies by writing one of the following
+ * status codes to "/sys$DEVPATH/lock_module/withdraw":
+ *
+ * 0 - The shared block device has been marked inactive. Future write
+ * operations will fail.
+ *
+ * 1 - The shared block device may still be active and carry out
+ * write operations.
+ *
+ * If the "offline" uevent isn't reacted upon in time, the event
+ * handler is assumed to have failed.
+ */
- wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
- TASK_UNINTERRUPTIBLE);
- return -1;
+ sdp->sd_withdraw_helper_status = -1;
+ kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+ timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ;
+ wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout);
+ if (sdp->sd_withdraw_helper_status == -1) {
+ fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper");
+ } else {
+ fs_err(sdp, "%s %s with status %d\n",
+ "gfs2_withdraw_helper",
+ sdp->sd_withdraw_helper_status == 0 ?
+ "succeeded" : "failed",
+ sdp->sd_withdraw_helper_status);
}
+ return sdp->sd_withdraw_helper_status == 0;
+}
+
+void gfs2_withdraw_func(struct work_struct *work)
+{
+ struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work);
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ const struct lm_lockops *lm = ls->ls_ops;
+ bool device_inactive;
- set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+ if (test_bit(SDF_KILL, &sdp->sd_flags))
+ return;
- if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
- fs_err(sdp, "about to withdraw this file system\n");
- BUG_ON(sdp->sd_args.ar_debug);
+ BUG_ON(sdp->sd_args.ar_debug);
- signal_our_withdraw(sdp);
+ /*
+ * Try to deactivate the shared block device so that no more I/O will
+ * go through. If successful, we can immediately trigger remote
+ * recovery. Otherwise, we must first empty out all our local caches.
+ */
- kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+ device_inactive = gfs2_offline_uevent(sdp);
- if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
- wait_for_completion(&sdp->sd_wdack);
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive)
+ panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
- if (lm->lm_unmount) {
- fs_err(sdp, "telling LM to unmount\n");
- lm->lm_unmount(sdp);
+ if (lm->lm_unmount) {
+ if (device_inactive) {
+ lm->lm_unmount(sdp, false);
+ do_withdraw(sdp);
+ } else {
+ do_withdraw(sdp);
+ lm->lm_unmount(sdp, false);
}
- set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
- fs_err(sdp, "File system withdrawn\n");
+ } else {
+ do_withdraw(sdp);
+ }
+
+ fs_err(sdp, "file system withdrawn\n");
+}
+
+void gfs2_withdraw(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW ||
+ sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) {
+ if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
+ return;
+
dump_stack();
- clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
- smp_mb__after_atomic();
- wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
+ /*
+ * There is no need to withdraw when the superblock hasn't been
+ * fully initialized, yet.
+ */
+ if (!(sdp->sd_vfs->s_flags & SB_BORN))
+ return;
+ fs_err(sdp, "about to withdraw this file system\n");
+ schedule_work(&sdp->sd_withdraw_work);
+ return;
}
if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
-
- return -1;
}
/*
@@ -378,28 +268,17 @@ int gfs2_withdraw(struct gfs2_sbd *sdp)
*/
void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
- const char *function, char *file, unsigned int line,
- bool delayed)
+ const char *function, char *file, unsigned int line)
{
if (gfs2_withdrawn(sdp))
return;
fs_err(sdp,
- "fatal: assertion \"%s\" failed\n"
- " function = %s, file = %s, line = %u\n",
+ "fatal: assertion \"%s\" failed - "
+ "function = %s, file = %s, line = %u\n",
assertion, function, file, line);
- /*
- * If errors=panic was specified on mount, it won't help to delay the
- * withdraw.
- */
- if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
- delayed = false;
-
- if (delayed)
- gfs2_withdraw_delayed(sdp);
- else
- gfs2_withdraw(sdp);
+ gfs2_withdraw(sdp);
dump_stack();
}
@@ -416,7 +295,8 @@ void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
return;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
- fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
+ fs_warn(sdp, "warning: assertion \"%s\" failed - "
+ "function = %s, file = %s, line = %u\n",
assertion, function, file, line);
if (sdp->sd_args.ar_debug)
@@ -425,10 +305,10 @@ void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
dump_stack();
if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
- panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
- "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
+ panic("GFS2: fsid=%s: warning: assertion \"%s\" failed - "
+ "function = %s, file = %s, line = %u\n",
sdp->sd_fsname, assertion,
- sdp->sd_fsname, function, file, line);
+ function, file, line);
sdp->sd_last_warning = jiffies;
}
@@ -441,7 +321,8 @@ void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
char *file, unsigned int line)
{
gfs2_lm(sdp,
- "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
+ "fatal: filesystem consistency error - "
+ "function = %s, file = %s, line = %u\n",
function, file, line);
gfs2_withdraw(sdp);
}
@@ -456,9 +337,9 @@ void gfs2_consist_inode_i(struct gfs2_inode *ip,
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
gfs2_lm(sdp,
- "fatal: filesystem consistency error\n"
- " inode = %llu %llu\n"
- " function = %s, file = %s, line = %u\n",
+ "fatal: filesystem consistency error - "
+ "inode = %llu %llu, "
+ "function = %s, file = %s, line = %u\n",
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
function, file, line);
@@ -479,9 +360,9 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
gfs2_lm(sdp,
- "fatal: filesystem consistency error\n"
- " RG = %llu\n"
- " function = %s, file = %s, line = %u\n",
+ "fatal: filesystem consistency error - "
+ "RG = %llu, "
+ "function = %s, file = %s, line = %u\n",
(unsigned long long)rgd->rd_addr,
function, file, line);
gfs2_dump_glock(NULL, rgd->rd_gl, 1);
@@ -490,46 +371,36 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
/*
* gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- * -2 if it was already withdrawn
*/
-int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
- const char *type, const char *function, char *file,
- unsigned int line)
+void gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+ const char *function, char *file,
+ unsigned int line)
{
- int me;
-
gfs2_lm(sdp,
- "fatal: invalid metadata block\n"
- " bh = %llu (%s)\n"
- " function = %s, file = %s, line = %u\n",
- (unsigned long long)bh->b_blocknr, type,
+ "fatal: invalid metadata block - "
+ "bh = %llu (bad magic number), "
+ "function = %s, file = %s, line = %u\n",
+ (unsigned long long)bh->b_blocknr,
function, file, line);
- me = gfs2_withdraw(sdp);
- return (me) ? -1 : -2;
+ gfs2_withdraw(sdp);
}
/*
* gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- * -2 if it was already withdrawn
*/
-int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
- u16 type, u16 t, const char *function,
- char *file, unsigned int line)
+void gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+ u16 type, u16 t, const char *function,
+ char *file, unsigned int line)
{
- int me;
-
gfs2_lm(sdp,
- "fatal: invalid metadata block\n"
- " bh = %llu (type: exp=%u, found=%u)\n"
- " function = %s, file = %s, line = %u\n",
+ "fatal: invalid metadata block - "
+ "bh = %llu (type: exp=%u, found=%u), "
+ "function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr, type, t,
function, file, line);
- me = gfs2_withdraw(sdp);
- return (me) ? -1 : -2;
+ gfs2_withdraw(sdp);
}
/*
@@ -538,33 +409,29 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
* 0 if it was already withdrawn
*/
-int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
- unsigned int line)
+void gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
+ unsigned int line)
{
gfs2_lm(sdp,
- "fatal: I/O error\n"
- " function = %s, file = %s, line = %u\n",
+ "fatal: I/O error - "
+ "function = %s, file = %s, line = %u\n",
function, file, line);
- return gfs2_withdraw(sdp);
+ gfs2_withdraw(sdp);
}
/*
- * gfs2_io_error_bh_i - Flag a buffer I/O error
- * @withdraw: withdraw the filesystem
+ * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
*/
void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
- const char *function, char *file, unsigned int line,
- bool withdraw)
+ const char *function, char *file, unsigned int line)
{
if (gfs2_withdrawn(sdp))
return;
- fs_err(sdp, "fatal: I/O error\n"
- " block = %llu\n"
- " function = %s, file = %s, line = %u\n",
+ fs_err(sdp, "fatal: I/O error - "
+ "block = %llu, "
+ "function = %s, file = %s, line = %u\n",
(unsigned long long)bh->b_blocknr, function, file, line);
- if (withdraw)
- gfs2_withdraw(sdp);
+ gfs2_withdraw(sdp);
}
-
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 78ec190f4155..ffcc47d6b0b4 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -37,24 +37,14 @@ do { \
void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
- const char *function, char *file, unsigned int line,
- bool delayed);
+ const char *function, char *file, unsigned int line);
#define gfs2_assert_withdraw(sdp, assertion) \
({ \
bool _bool = (assertion); \
if (unlikely(!_bool)) \
gfs2_assert_withdraw_i((sdp), #assertion, \
- __func__, __FILE__, __LINE__, false); \
- !_bool; \
- })
-
-#define gfs2_assert_withdraw_delayed(sdp, assertion) \
- ({ \
- bool _bool = (assertion); \
- if (unlikely(!_bool)) \
- gfs2_assert_withdraw_i((sdp), #assertion, \
- __func__, __FILE__, __LINE__, true); \
+ __func__, __FILE__, __LINE__); \
!_bool; \
})
@@ -91,9 +81,9 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__)
-int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
- const char *type, const char *function,
- char *file, unsigned int line);
+void gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+ const char *function,
+ char *file, unsigned int line);
static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
struct buffer_head *bh)
@@ -108,10 +98,10 @@ static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
return 0;
}
-int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
- u16 type, u16 t,
- const char *function,
- char *file, unsigned int line);
+void gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
+ u16 type, u16 t,
+ const char *function,
+ char *file, unsigned int line);
static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
struct buffer_head *bh,
@@ -122,12 +112,16 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
u32 magic = be32_to_cpu(mh->mh_magic);
u16 t = be32_to_cpu(mh->mh_type);
- if (unlikely(magic != GFS2_MAGIC))
- return gfs2_meta_check_ii(sdp, bh, "magic number", function,
- file, line);
- if (unlikely(t != type))
- return gfs2_metatype_check_ii(sdp, bh, type, t, function,
- file, line);
+ if (unlikely(magic != GFS2_MAGIC)) {
+ gfs2_meta_check_ii(sdp, bh, function,
+ file, line);
+ return -EIO;
+ }
+ if (unlikely(t != type)) {
+ gfs2_metatype_check_ii(sdp, bh, type, t, function,
+ file, line);
+ return -EIO;
+ }
return 0;
}
@@ -144,28 +138,23 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
}
-int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
- char *file, unsigned int line);
+void gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
+ char *file, unsigned int line);
-extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
- bool verbose);
-extern int gfs2_freeze_lock(struct gfs2_sbd *sdp,
- struct gfs2_holder *freeze_gh, int caller_flags);
-extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
+int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ bool verbose);
+int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp);
+void gfs2_freeze_unlock(struct gfs2_sbd *sdp);
#define gfs2_io_error(sdp) \
gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__)
void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
- const char *function, char *file, unsigned int line,
- bool withdraw);
-
-#define gfs2_io_error_bh_wd(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true)
+ const char *function, char *file, unsigned int line);
#define gfs2_io_error_bh(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false)
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__)
extern struct kmem_cache *gfs2_glock_cachep;
@@ -190,37 +179,12 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
}
/**
- * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks
- * @sdp: the superblock
- */
-static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp)
-{
- set_bit(SDF_WITHDRAWING, &sdp->sd_flags);
-}
-
-/**
- * gfs2_withdrawn - test whether the file system is withdrawing or withdrawn
+ * gfs2_withdrawn - test whether the file system is withdrawn
* @sdp: the superblock
*/
static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp)
{
- return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
- test_bit(SDF_WITHDRAWING, &sdp->sd_flags);
-}
-
-/**
- * gfs2_withdrawing - check if a withdraw is pending
- * @sdp: the superblock
- */
-static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp)
-{
- return test_bit(SDF_WITHDRAWING, &sdp->sd_flags) &&
- !test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
-}
-
-static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp)
-{
- return test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+ return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags));
}
#define gfs2_tune_get(sdp, field) \
@@ -228,6 +192,8 @@ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
__printf(2, 3)
void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...);
-int gfs2_withdraw(struct gfs2_sbd *sdp);
+
+void gfs2_withdraw_func(struct work_struct *work);
+void gfs2_withdraw(struct gfs2_sbd *sdp);
#endif /* __UTIL_DOT_H__ */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 518c0677e12a..df9c93de94c7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -96,30 +96,34 @@ static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
return -EIO;
for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
- if (!GFS2_EA_REC_LEN(ea))
- goto fail;
+ if (!GFS2_EA_REC_LEN(ea)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <=
- bh->b_data + bh->b_size))
- goto fail;
- if (!gfs2_eatype_valid(sdp, ea->ea_type))
- goto fail;
+ bh->b_data + bh->b_size)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+ if (!gfs2_eatype_valid(sdp, ea->ea_type)) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
error = ea_call(ip, bh, ea, prev, data);
if (error)
return error;
if (GFS2_EA_IS_LAST(ea)) {
if ((char *)GFS2_EA2NEXT(ea) !=
- bh->b_data + bh->b_size)
- goto fail;
+ bh->b_data + bh->b_size) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
break;
}
}
return error;
-
-fail:
- gfs2_consist_inode(ip);
- return -EIO;
}
static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
@@ -311,7 +315,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
ea->ea_num_ptrs = 0;
}
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(sdp);
@@ -639,7 +643,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
u64 block;
int error;
- error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
+ error = gfs2_alloc_blocks(ip, &block, &n, 0);
if (error)
return error;
gfs2_trans_remove_revoke(sdp, block, 1);
@@ -701,7 +705,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
int mh_size = sizeof(struct gfs2_meta_header);
unsigned int n = 1;
- error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
+ error = gfs2_alloc_blocks(ip, &block, &n, 0);
if (error)
return error;
gfs2_trans_remove_revoke(sdp, block, 1);
@@ -763,7 +767,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
goto out_end_trans;
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
out_end_trans:
@@ -888,7 +892,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
if (es->es_el)
ea_set_remove_stuffed(ip, es->es_el);
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1002,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
} else {
u64 blk;
unsigned int n = 1;
- error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
+ error = gfs2_alloc_blocks(ip, &blk, &n, 0);
if (error)
return error;
gfs2_trans_remove_revoke(sdp, blk, 1);
@@ -1106,7 +1110,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
ea->ea_type = GFS2_EATYPE_UNUSED;
}
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1225,7 +1229,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
}
static int gfs2_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *name, const void *value,
size_t size, int flags)
@@ -1379,7 +1383,7 @@ out:
return error;
}
-static int ea_dealloc_block(struct gfs2_inode *ip)
+static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1412,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
ip->i_eattr = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
- if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+ if (initialized) {
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -1431,11 +1435,12 @@ out_gunlock:
/**
* gfs2_ea_dealloc - deallocate the extended attribute fork
* @ip: the inode
+ * @initialized: xattrs have been initialized
*
* Returns: errno
*/
-int gfs2_ea_dealloc(struct gfs2_inode *ip)
+int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized)
{
int error;
@@ -1447,7 +1452,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
if (error)
return error;
- if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+ if (initialized) {
error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
if (error)
goto out_quota;
@@ -1459,7 +1464,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
}
}
- error = ea_dealloc_block(ip);
+ error = ea_dealloc_block(ip, initialized);
out_quota:
gfs2_quota_unhold(ip);
@@ -1494,16 +1499,14 @@ static const struct xattr_handler gfs2_xattr_trusted_handler = {
.set = gfs2_xattr_set,
};
-const struct xattr_handler *gfs2_xattr_handlers_max[] = {
+const struct xattr_handler * const gfs2_xattr_handlers_max[] = {
/* GFS2_FS_FORMAT_MAX */
&gfs2_xattr_trusted_handler,
/* GFS2_FS_FORMAT_MIN */
&gfs2_xattr_user_handler,
&gfs2_xattr_security_handler,
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
NULL,
};
-const struct xattr_handler **gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1;
+const struct xattr_handler * const *gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1;
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 2aed9d7d483d..3c9788e0e137 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -50,14 +50,14 @@ struct gfs2_ea_location {
struct gfs2_ea_header *el_prev;
};
-extern int __gfs2_xattr_set(struct inode *inode, const char *name,
- const void *value, size_t size,
- int flags, int type);
-extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
-extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
+int __gfs2_xattr_set(struct inode *inode, const char *name,
+ const void *value, size_t size,
+ int flags, int type);
+ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
+int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized);
/* Exported to acl.c */
-extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
+int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
#endif /* __EATTR_DOT_H__ */