summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-23 11:47:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-23 11:47:42 -0700
commit0adc313c4f20639f7e235b8d6719d96a2024cf91 (patch)
tree3100bd1b91229bc53a8eb838a1788299837bb056
parent0613ed91901b5f87afcd28b4560fb0aa37a0db13 (diff)
parentbedb0f056faa94e953e7b3da5a77d25e0008364b (diff)
Merge tag 'gfs2-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2
Pull gfs2 updates from Andreas Gruenbacher: - Use iomap for non-journaled buffered I/O. This largely eliminates buffer heads on filesystems where the block size matches the page size. Many thanks to Christoph Hellwig for this patch! - Fixes for some more journaled data filesystem bugs, found by running xfstests with data journaling on for all files (chattr +j $MNT) (Bob Peterson) - gfs2_evict_inode refactoring (Bob Peterson) - Use the statfs data in the journal during recovery instead of reading it in from the local statfs inodes (Abhi Das) - Several other minor fixes by various people * tag 'gfs2-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2: (30 commits) gfs2: Recover statfs info in journal head gfs2: lookup local statfs inodes prior to journal recovery gfs2: Add fields for statfs info in struct gfs2_log_header_host gfs2: Ignore subsequent errors after withdraw in rgrp_go_sync gfs2: Eliminate gl_vm gfs2: Only access gl_delete for iopen glocks gfs2: Fix comments to glock_hash_walk gfs2: eliminate GLF_QUEUED flag in favor of list_empty(gl_holders) gfs2: Ignore journal log writes for jdata holes gfs2: simplify gfs2_block_map gfs2: Only set PageChecked if we have a transaction gfs2: don't lock sd_ail_lock in gfs2_releasepage gfs2: make gfs2_ail1_empty_one return the count of active items gfs2: Wipe jdata and ail1 in gfs2_journal_wipe, formerly gfs2_meta_wipe gfs2: enhance log_blocks trace point to show log blocks free gfs2: add missing log_blocks trace points in gfs2_write_revokes gfs2: rename gfs2_write_full_page to gfs2_write_jdata_page, remove parm gfs2: add validation checks for size of superblock gfs2: use-after-free in sysfs deregistration gfs2: Fix NULL pointer dereference in gfs2_rgrp_dump ...
-rw-r--r--fs/gfs2/aops.c68
-rw-r--r--fs/gfs2/bmap.c62
-rw-r--r--fs/gfs2/bmap.h1
-rw-r--r--fs/gfs2/glock.c52
-rw-r--r--fs/gfs2/glops.c36
-rw-r--r--fs/gfs2/incore.h29
-rw-r--r--fs/gfs2/log.c89
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/lops.h1
-rw-r--r--fs/gfs2/meta_io.c81
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c173
-rw-r--r--fs/gfs2/recovery.c108
-rw-r--r--fs/gfs2/rgrp.c19
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/gfs2/super.c220
-rw-r--r--fs/gfs2/super.h5
-rw-r--r--fs/gfs2/sys.c5
-rw-r--r--fs/gfs2/trace_gfs2.h7
-rw-r--r--fs/gfs2/util.c2
-rw-r--r--fs/gfs2/util.h10
22 files changed, 675 insertions, 301 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d4af283fc888..9cd2ecad07db 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -91,22 +91,13 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned offset;
+ struct iomap_writepage_ctx wpc = { };
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
goto out;
if (current->journal_info)
goto redirty;
- /* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_SIZE-1);
- if (page->index > end_index || (page->index == end_index && !offset)) {
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
- goto out;
- }
-
- return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
+ return iomap_writepage(page, wbc, &wpc, &gfs2_writeback_ops);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -115,11 +106,16 @@ out:
return 0;
}
-/* This is the same as calling block_write_full_page, but it also
+/**
+ * gfs2_write_jdata_page - gfs2 jdata-specific version of block_write_full_page
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ * This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
-static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
+static int gfs2_write_jdata_page(struct page *page,
+ struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
@@ -137,7 +133,7 @@ static int gfs2_write_full_page(struct page *page, get_block_t *get_block,
if (page->index == end_index && offset)
zero_user_segment(page, offset, PAGE_SIZE);
- return __block_write_full_page(inode, page, get_block, wbc,
+ return __block_write_full_page(inode, page, gfs2_get_block_noalloc, wbc,
end_buffer_async_write);
}
@@ -166,7 +162,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
}
gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
}
- return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
+ return gfs2_write_jdata_page(page, wbc);
}
/**
@@ -208,7 +204,8 @@ static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
- int ret = mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+ struct iomap_writepage_ctx wpc = { };
+ int ret;
/*
* Even if we didn't write any pages here, we might still be holding
@@ -216,9 +213,9 @@ static int gfs2_writepages(struct address_space *mapping,
* want balance_dirty_pages() to loop indefinitely trying to write out
* pages held in the ail that it can't find.
*/
+ ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
if (ret == 0)
set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
-
return ret;
}
@@ -470,12 +467,13 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
static int __gfs2_readpage(void *file, struct page *page)
{
- struct gfs2_inode *ip = GFS2_I(page->mapping->host);
- struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+ struct inode *inode = page->mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
- if (i_blocksize(page->mapping->host) == PAGE_SIZE &&
- !page_has_buffers(page)) {
+ if (!gfs2_is_jdata(ip) ||
+ (i_blocksize(inode) == PAGE_SIZE && !page_has_buffers(page))) {
error = iomap_readpage(page, &gfs2_iomap_ops);
} else if (gfs2_is_stuffed(ip)) {
error = stuffed_readpage(ip, page);
@@ -563,8 +561,12 @@ static void gfs2_readahead(struct readahead_control *rac)
struct inode *inode = rac->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- if (!gfs2_is_stuffed(ip))
+ if (gfs2_is_stuffed(ip))
+ ;
+ else if (gfs2_is_jdata(ip))
mpage_readahead(rac, gfs2_block_map);
+ else
+ iomap_readahead(rac, &gfs2_iomap_ops);
}
/**
@@ -621,7 +623,8 @@ out:
static int jdata_set_page_dirty(struct page *page)
{
- SetPageChecked(page);
+ if (current->journal_info)
+ SetPageChecked(page);
return __set_page_dirty_buffers(page);
}
@@ -663,8 +666,11 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (bd) {
if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
list_del_init(&bd->bd_list);
- else
+ else {
+ spin_lock(&sdp->sd_ail_lock);
gfs2_remove_from_journal(bh, REMOVE_JDATA);
+ spin_unlock(&sdp->sd_ail_lock);
+ }
}
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
@@ -736,7 +742,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
*/
gfs2_log_lock(sdp);
- spin_lock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
do {
if (atomic_read(&bh->b_count))
@@ -748,7 +753,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
goto cannot_release;
bh = bh->b_this_page;
} while(bh != head);
- spin_unlock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
do {
@@ -774,7 +778,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
return try_to_free_buffers(page);
cannot_release:
- spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
return 0;
}
@@ -784,12 +787,13 @@ static const struct address_space_operations gfs2_aops = {
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readahead = gfs2_readahead,
+ .set_page_dirty = iomap_set_page_dirty,
+ .releasepage = iomap_releasepage,
+ .invalidatepage = iomap_invalidatepage,
.bmap = gfs2_bmap,
- .invalidatepage = gfs2_invalidatepage,
- .releasepage = gfs2_releasepage,
.direct_IO = noop_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
+ .migratepage = iomap_migrate_page,
+ .is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0f69fbd4af66..8dff9cbd0a87 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -56,7 +56,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
u64 block, struct page *page)
{
struct inode *inode = &ip->i_inode;
- struct buffer_head *bh;
int release = 0;
if (!page || page->index) {
@@ -80,20 +79,21 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
SetPageUptodate(page);
}
- if (!page_has_buffers(page))
- create_empty_buffers(page, BIT(inode->i_blkbits),
- BIT(BH_Uptodate));
+ if (gfs2_is_jdata(ip)) {
+ struct buffer_head *bh;
- bh = page_buffers(page);
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, BIT(inode->i_blkbits),
+ BIT(BH_Uptodate));
- if (!buffer_mapped(bh))
- map_bh(bh, inode->i_sb, block);
+ bh = page_buffers(page);
+ if (!buffer_mapped(bh))
+ map_bh(bh, inode->i_sb, block);
- set_buffer_uptodate(bh);
- if (gfs2_is_jdata(ip))
+ set_buffer_uptodate(bh);
gfs2_trans_add_data(ip->i_gl, bh);
- else {
- mark_buffer_dirty(bh);
+ } else {
+ set_page_dirty(page);
gfs2_ordered_add_inode(ip);
}
@@ -1158,7 +1158,8 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
struct metapath mp = { .mp_aheight = 1, };
int ret;
- iomap->flags |= IOMAP_F_BUFFER_HEAD;
+ if (gfs2_is_jdata(ip))
+ iomap->flags |= IOMAP_F_BUFFER_HEAD;
trace_gfs2_iomap_start(ip, pos, length, flags);
if (gfs2_iomap_need_write_lock(flags)) {
@@ -1291,6 +1292,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
loff_t length = bh_map->b_size;
struct metapath mp = { .mp_aheight = 1, };
struct iomap iomap = { };
+ int flags = create ? IOMAP_WRITE : 0;
int ret;
clear_buffer_mapped(bh_map);
@@ -1298,15 +1300,14 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
clear_buffer_boundary(bh_map);
trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
- if (create) {
- ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
- if (!ret && iomap.type == IOMAP_HOLE)
+ ret = gfs2_iomap_get(inode, pos, length, flags, &iomap, &mp);
+ if (!ret && iomap.type == IOMAP_HOLE) {
+ if (create)
ret = gfs2_iomap_alloc(inode, &iomap, &mp);
- release_metapath(&mp);
- } else {
- ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
- release_metapath(&mp);
+ else
+ ret = -ENODATA;
}
+ release_metapath(&mp);
if (ret)
goto out;
@@ -2518,3 +2519,26 @@ out:
gfs2_trans_end(sdp);
return error;
}
+
+static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
+ loff_t offset)
+{
+ struct metapath mp = { .mp_aheight = 1, };
+ int ret;
+
+ if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
+ return -EIO;
+
+ if (offset >= wpc->iomap.offset &&
+ offset < wpc->iomap.offset + wpc->iomap.length)
+ return 0;
+
+ memset(&wpc->iomap, 0, sizeof(wpc->iomap));
+ ret = gfs2_iomap_get(inode, offset, INT_MAX, 0, &wpc->iomap, &mp);
+ release_metapath(&mp);
+ return ret;
+}
+
+const struct iomap_writeback_ops gfs2_writeback_ops = {
+ .map_blocks = gfs2_map_blocks,
+};
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index b88fd45ab79f..aed4632d47d3 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -44,6 +44,7 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
}
extern const struct iomap_ops gfs2_iomap_ops;
+extern const struct iomap_writeback_ops gfs2_writeback_ops;
extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
extern int gfs2_block_map(struct inode *inode, sector_t lblock,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f13b136654ca..5441c17562c5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -270,7 +270,12 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
- GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp));
+ if (mapping) {
+ truncate_inode_pages_final(mapping);
+ if (!gfs2_withdrawn(sdp))
+ GLOCK_BUG_ON(gl, mapping->nrpages ||
+ mapping->nrexceptional);
+ }
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
@@ -453,9 +458,6 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
else
gl->gl_lockref.count--;
}
- if (held1 && held2 && list_empty(&gl->gl_holders))
- clear_bit(GLF_QUEUED, &gl->gl_flags);
-
if (new_state != gl->gl_target)
/* shorten our minimum hold time */
gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
@@ -1049,7 +1051,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_object = NULL;
gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
- INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
+ if (gl->gl_name.ln_type == LM_TYPE_IOPEN)
+ INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
mapping = gfs2_glock2aspace(gl);
if (mapping) {
@@ -1345,7 +1348,6 @@ fail:
if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
insert_pt = &gh2->gh_list;
}
- set_bit(GLF_QUEUED, &gl->gl_flags);
trace_gfs2_glock_queue(gh, 1);
gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
@@ -1646,16 +1648,15 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
unsigned long now = jiffies;
gfs2_glock_hold(gl);
+ spin_lock(&gl->gl_lockref.lock);
holdtime = gl->gl_tchange + gl->gl_hold_time;
- if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
+ if (!list_empty(&gl->gl_holders) &&
gl->gl_name.ln_type == LM_TYPE_INODE) {
if (time_before(now, holdtime))
delay = holdtime - now;
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
delay = gl->gl_hold_time;
}
-
- spin_lock(&gl->gl_lockref.lock);
handle_callback(gl, state, delay, true);
__gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock);
@@ -1842,10 +1843,9 @@ static struct shrinker glock_shrinker = {
};
/**
- * examine_bucket - Call a function for glock in a hash bucket
+ * glock_hash_walk - Call a function for glock in a hash bucket
* @examiner: the function
* @sdp: the filesystem
- * @bucket: the bucket
*
* Note that the function can be called multiple times on the same
* object. So the user must ensure that the function can cope with
@@ -1901,9 +1901,11 @@ bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
static void flush_delete_work(struct gfs2_glock *gl)
{
- if (cancel_delayed_work(&gl->gl_delete)) {
- queue_delayed_work(gfs2_delete_workqueue,
- &gl->gl_delete, 0);
+ if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
+ if (cancel_delayed_work(&gl->gl_delete)) {
+ queue_delayed_work(gfs2_delete_workqueue,
+ &gl->gl_delete, 0);
+ }
}
gfs2_glock_queue_work(gl, 0);
}
@@ -2100,7 +2102,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'I';
if (test_bit(GLF_FROZEN, gflags))
*p++ = 'F';
- if (test_bit(GLF_QUEUED, gflags))
+ if (!list_empty(&gl->gl_holders))
*p++ = 'q';
if (test_bit(GLF_LRU, gflags))
*p++ = 'L';
@@ -2415,7 +2417,7 @@ static const struct seq_operations gfs2_glstats_seq_ops = {
.show = gfs2_glstats_seq_show,
};
-static const struct seq_operations gfs2_sbstats_seq_ops = {
+static const struct seq_operations gfs2_sbstats_sops = {
.start = gfs2_sbstats_seq_start,
.next = gfs2_sbstats_seq_next,
.stop = gfs2_sbstats_seq_stop,
@@ -2468,16 +2470,6 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
}
-static int gfs2_sbstats_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &gfs2_sbstats_seq_ops);
- if (ret == 0) {
- struct seq_file *seq = file->private_data;
- seq->private = inode->i_private; /* sdp */
- }
- return ret;
-}
-
static const struct file_operations gfs2_glocks_fops = {
.owner = THIS_MODULE,
.open = gfs2_glocks_open,
@@ -2494,13 +2486,7 @@ static const struct file_operations gfs2_glstats_fops = {
.release = gfs2_glocks_release,
};
-static const struct file_operations gfs2_sbstats_fops = {
- .owner = THIS_MODULE,
- .open = gfs2_sbstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
+DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats);
void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index de1d5f1d9ff8..aa3f5236befb 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -178,6 +178,9 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+ const unsigned bsize = sdp->sd_sb.sb_bsize;
+ loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
int error;
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
@@ -186,18 +189,13 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_RGRP_GO_SYNC);
- filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
- error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
- WARN_ON_ONCE(error);
+ filemap_fdatawrite_range(mapping, start, end);
+ error = filemap_fdatawait_range(mapping, start, end);
+ WARN_ON_ONCE(error && !gfs2_withdrawn(sdp));
mapping_set_error(mapping, error);
if (!error)
error = gfs2_ail_empty_gl(gl);
-
- spin_lock(&gl->gl_lockref.lock);
- rgd = gl->gl_object;
- if (rgd)
- gfs2_free_clones(rgd);
- spin_unlock(&gl->gl_lockref.lock);
+ gfs2_free_clones(rgd);
return error;
}
@@ -216,15 +214,23 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+ const unsigned bsize = sdp->sd_sb.sb_bsize;
+ loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
- if (rgd)
- gfs2_rgrp_brelse(rgd);
-
+ gfs2_rgrp_brelse(rgd);
WARN_ON_ONCE(!(flags & DIO_METADATA));
- truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+ truncate_inode_pages_range(mapping, start, end);
+ rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+}
+
+static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+ const char *fs_id_buf)
+{
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
if (rgd)
- rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+ gfs2_rgrp_dump(seq, rgd, fs_id_buf);
}
static struct gfs2_inode *gfs2_glock2inode(struct gfs2_glock *gl)
@@ -712,7 +718,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_sync = rgrp_go_sync,
.go_inval = rgrp_go_inval,
.go_lock = gfs2_rgrp_go_lock,
- .go_dump = gfs2_rgrp_dump,
+ .go_dump = gfs2_rgrp_go_dump,
.go_type = LM_TYPE_RGRP,
.go_flags = GLOF_LVB,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index ca2ec02436ec..d7707307f4b1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -41,6 +41,10 @@ struct gfs2_log_header_host {
u32 lh_flags; /* GFS2_LOG_HEAD_... */
u32 lh_tail; /* Block number of log tail */
u32 lh_blkno;
+
+ s64 lh_local_total;
+ s64 lh_local_free;
+ s64 lh_local_dinodes;
};
/*
@@ -340,7 +344,6 @@ enum {
GLF_REPLY_PENDING = 9,
GLF_INITIAL = 10,
GLF_FROZEN = 11,
- GLF_QUEUED = 12,
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
@@ -378,17 +381,10 @@ struct gfs2_glock {
atomic_t gl_ail_count;
atomic_t gl_revokes;
struct delayed_work gl_work;
- union {
- /* For iopen glocks only */
- struct {
- struct delayed_work gl_delete;
- u64 gl_no_formal_ino;
- };
- /* For rgrp glocks only */
- struct {
- loff_t start;
- loff_t end;
- } gl_vm;
+ /* For iopen glocks only */
+ struct {
+ struct delayed_work gl_delete;
+ u64 gl_no_formal_ino;
};
struct rcu_head gl_rcu;
struct rhash_head gl_node;
@@ -701,10 +697,18 @@ struct gfs2_pcpu_lkstats {
struct gfs2_lkstats lkstats[10];
};
+/* List of local (per node) statfs inodes */
+struct local_statfs_inode {
+ struct list_head si_list;
+ struct inode *si_sc_inode;
+ unsigned int si_jid; /* journal id this statfs inode corresponds to */
+};
+
struct gfs2_sbd {
struct super_block *sd_vfs;
struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
struct kobject sd_kobj;
+ struct completion sd_kobj_unregister;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
@@ -751,6 +755,7 @@ struct gfs2_sbd {
struct inode *sd_jindex;
struct inode *sd_statfs_inode;
struct inode *sd_sc_inode;
+ struct list_head sd_sc_inodes_list;
struct inode *sd_qc_inode;
struct inode *sd_rindex;
struct inode *sd_quota_inode;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 3763c9ff1406..9133b3178677 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -70,7 +70,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
*
*/
-static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
{
bd->bd_tr = NULL;
list_del_init(&bd->bd_ail_st_list);
@@ -244,13 +244,15 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
* @tr: the transaction
* @max_revokes: If nonzero, issue revokes for the bd items for written buffers
*
+ * returns: the transaction's count of remaining active items
*/
-static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
int *max_revokes)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
+ int active_count = 0;
list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
bd_ail_st_list) {
@@ -265,8 +267,10 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
* If the ail buffer is not busy and caught an error, flag it
* for others.
*/
- if (!sdp->sd_log_error && buffer_busy(bh))
+ if (!sdp->sd_log_error && buffer_busy(bh)) {
+ active_count++;
continue;
+ }
if (!buffer_uptodate(bh) &&
!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
gfs2_io_error_bh(sdp, bh);
@@ -285,6 +289,7 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
}
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
}
+ return active_count;
}
/**
@@ -303,8 +308,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
- gfs2_ail1_empty_one(sdp, tr, &max_revokes);
- if (list_empty(&tr->tr_ail1_list) && oldest_tr)
+ if (!gfs2_ail1_empty_one(sdp, tr, &max_revokes) && oldest_tr)
list_move(&tr->tr_list, &sdp->sd_ail2_list);
else
oldest_tr = 0;
@@ -716,16 +720,24 @@ void gfs2_write_revokes(struct gfs2_sbd *sdp)
atomic_dec(&sdp->sd_log_blks_free);
/* If no blocks have been reserved, we need to also
* reserve a block for the header */
- if (!sdp->sd_log_blks_reserved)
+ if (!sdp->sd_log_blks_reserved) {
atomic_dec(&sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, -2);
+ } else {
+ trace_gfs2_log_blocks(sdp, -1);
+ }
}
gfs2_ail1_empty(sdp, max_revokes);
gfs2_log_unlock(sdp);
if (!sdp->sd_log_num_revoke) {
atomic_inc(&sdp->sd_log_blks_free);
- if (!sdp->sd_log_blks_reserved)
+ if (!sdp->sd_log_blks_reserved) {
atomic_inc(&sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, 2);
+ } else {
+ trace_gfs2_log_blocks(sdp, 1);
+ }
}
}
@@ -902,7 +914,7 @@ static void empty_ail1_list(struct gfs2_sbd *sdp)
}
/**
- * drain_bd - drain the buf and databuf queue for a failed transaction
+ * trans_drain - drain the buf and databuf queue for a failed transaction
* @tr: the transaction to drain
*
* When this is called, we're taking an error exit for a log write that failed
@@ -954,10 +966,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
goto out;
/* Log might have been flushed while we waited for the flush lock */
- if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
- up_write(&sdp->sd_log_flush_lock);
- return;
- }
+ if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
+ goto out;
trace_gfs2_log_flush(sdp, 1, flags);
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
@@ -971,25 +981,25 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
if (unlikely (state == SFS_FROZEN))
if (gfs2_assert_withdraw_delayed(sdp,
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
- goto out;
+ goto out_withdraw;
}
if (unlikely(state == SFS_FROZEN))
if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
- goto out;
+ goto out_withdraw;
if (gfs2_assert_withdraw_delayed(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
- goto out;
+ goto out_withdraw;
gfs2_ordered_write(sdp);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
lops_before_commit(sdp, tr);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
@@ -1000,7 +1010,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
log_write_header(sdp, flags);
}
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
lops_after_commit(sdp, tr);
gfs2_log_lock(sdp);
@@ -1020,7 +1030,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
if (!sdp->sd_log_idle) {
empty_ail1_list(sdp);
if (gfs2_withdrawn(sdp))
- goto out;
+ goto out_withdraw;
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, flags);
@@ -1033,27 +1043,30 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
-out:
- if (gfs2_withdrawn(sdp)) {
- trans_drain(tr);
- /**
- * If the tr_list is empty, we're withdrawing during a log
- * flush that targets a transaction, but the transaction was
- * never queued onto any of the ail lists. Here we add it to
- * ail1 just so that ail_drain() will find and free it.
- */
- spin_lock(&sdp->sd_ail_lock);
- if (tr && list_empty(&tr->tr_list))
- list_add(&tr->tr_list, &sdp->sd_ail1_list);
- spin_unlock(&sdp->sd_ail_lock);
- ail_drain(sdp); /* frees all transactions */
- tr = NULL;
- }
-
+out_end:
trace_gfs2_log_flush(sdp, 0, flags);
+out:
up_write(&sdp->sd_log_flush_lock);
-
gfs2_trans_free(sdp, tr);
+ if (gfs2_withdrawing(sdp))
+ gfs2_withdraw(sdp);
+ return;
+
+out_withdraw:
+ trans_drain(tr);
+ /**
+ * If the tr_list is empty, we're withdrawing during a log
+ * flush that targets a transaction, but the transaction was
+ * never queued onto any of the ail lists. Here we add it to
+ * ail1 just so that ail_drain() will find and free it.
+ */
+ spin_lock(&sdp->sd_ail_lock);
+ if (tr && list_empty(&tr->tr_list))
+ list_add(&tr->tr_list, &sdp->sd_ail1_list);
+ spin_unlock(&sdp->sd_ail_lock);
+ ail_drain(sdp); /* frees all transactions */
+ tr = NULL;
+ goto out_end;
}
/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 8965c751a303..79f97290146e 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -63,7 +63,7 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
-
+extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ed1da4323967..ed69298dd824 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -823,7 +823,7 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
*
*/
-static void gfs2_meta_sync(struct gfs2_glock *gl)
+void gfs2_meta_sync(struct gfs2_glock *gl)
{
struct address_space *mapping = gfs2_glock2aspace(gl);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 9c5e4e491e03..4a3d8aecdf82 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,6 +27,7 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, bool keep_cache);
+extern void gfs2_meta_sync(struct gfs2_glock *gl);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 9856cc2e0795..2db573e31f78 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -348,38 +348,109 @@ void gfs2_remove_from_journal(struct buffer_head *bh, int meta)
brelse(bh);
}
if (bd) {
- spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd);
} else if (was_pinned) {
bh->b_private = NULL;
kmem_cache_free(gfs2_bufdata_cachep, bd);
+ } else if (!list_empty(&bd->bd_ail_st_list) &&
+ !list_empty(&bd->bd_ail_gl_list)) {
+ gfs2_remove_from_ail(bd);
}
- spin_unlock(&sdp->sd_ail_lock);
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
}
/**
- * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
+ * gfs2_ail1_wipe - remove deleted/freed buffers from the ail1 list
+ * @sdp: superblock
+ * @bstart: starting block address of buffers to remove
+ * @blen: length of buffers to be removed
+ *
+ * This function is called from gfs2_journal wipe, whose job is to remove
+ * buffers, corresponding to deleted blocks, from the journal. If we find any
+ * bufdata elements on the system ail1 list, they haven't been written to
+ * the journal yet. So we remove them.
+ */
+static void gfs2_ail1_wipe(struct gfs2_sbd *sdp, u64 bstart, u32 blen)
+{
+ struct gfs2_trans *tr, *s;
+ struct gfs2_bufdata *bd, *bs;
+ struct buffer_head *bh;
+ u64 end = bstart + blen;
+
+ gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
+ list_for_each_entry_safe(tr, s, &sdp->sd_ail1_list, tr_list) {
+ list_for_each_entry_safe(bd, bs, &tr->tr_ail1_list,
+ bd_ail_st_list) {
+ bh = bd->bd_bh;
+ if (bh->b_blocknr < bstart || bh->b_blocknr >= end)
+ continue;
+
+ gfs2_remove_from_journal(bh, REMOVE_JDATA);
+ }
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+ gfs2_log_unlock(sdp);
+}
+
+static struct buffer_head *gfs2_getjdatabuf(struct gfs2_inode *ip, u64 blkno)
+{
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct page *page;
+ struct buffer_head *bh;
+ unsigned int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+ unsigned long index = blkno >> shift; /* convert block to page */
+ unsigned int bufnum = blkno - (index << shift);
+
+ page = find_get_page_flags(mapping, index, FGP_LOCK|FGP_ACCESSED);
+ if (!page)
+ return NULL;
+ if (!page_has_buffers(page)) {
+ unlock_page(page);
+ put_page(page);
+ return NULL;
+ }
+ /* Locate header for our buffer within our page */
+ for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
+ /* Do nothing */;
+ get_bh(bh);
+ unlock_page(page);
+ put_page(page);
+ return bh;
+}
+
+/**
+ * gfs2_journal_wipe - make inode's buffers so they aren't dirty/pinned anymore
* @ip: the inode who owns the buffers
* @bstart: the first buffer in the run
* @blen: the number of buffers in the run
*
*/
-void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *bh;
+ int ty;
+ gfs2_ail1_wipe(sdp, bstart, blen);
while (blen) {
+ ty = REMOVE_META;
bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
+ if (!bh && gfs2_is_jdata(ip)) {
+ bh = gfs2_getjdatabuf(ip, bstart);
+ ty = REMOVE_JDATA;
+ }
if (bh) {
lock_buffer(bh);
gfs2_log_lock(sdp);
- gfs2_remove_from_journal(bh, REMOVE_META);
+ spin_lock(&sdp->sd_ail_lock);
+ gfs2_remove_from_journal(bh, ty);
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
brelse(bh);
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index eafb74e861c6..4a8c01929b79 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -60,7 +60,7 @@ enum {
};
extern void gfs2_remove_from_journal(struct buffer_head *bh, int meta);
-extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head **bhp);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6d18d2c91add..7a7e3c10a9a9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -110,6 +110,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_trunc_lock);
spin_lock_init(&sdp->sd_bitmap_lock);
+ INIT_LIST_HEAD(&sdp->sd_sc_inodes_list);
+
mapping = &sdp->sd_aspace;
address_space_init_once(mapping);
@@ -169,15 +171,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
+ if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ return -EINVAL;
+ }
- fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ (sb->sb_bsize & (sb->sb_bsize - 1))) {
+ pr_warn("Invalid superblock size\n");
+ return -EINVAL;
+ }
- return -EINVAL;
+ return 0;
}
static void end_bio_io_page(struct bio *bio)
@@ -604,6 +610,90 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
return error;
}
+/**
+ * init_statfs - look up and initialize master and local (per node) statfs inodes
+ * @sdp: The GFS2 superblock
+ *
+ * This should be called after the jindex is initialized in init_journal() and
+ * before gfs2_journal_recovery() is called because we need to be able to write
+ * to these inodes during recovery.
+ *
+ * Returns: errno
+ */
+static int init_statfs(struct gfs2_sbd *sdp)
+{
+ int error = 0;
+ struct inode *master = d_inode(sdp->sd_master_dir);
+ struct inode *pn = NULL;
+ char buf[30];
+ struct gfs2_jdesc *jd;
+ struct gfs2_inode *ip;
+
+ sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
+ if (IS_ERR(sdp->sd_statfs_inode)) {
+ error = PTR_ERR(sdp->sd_statfs_inode);
+ fs_err(sdp, "can't read in statfs inode: %d\n", error);
+ goto fail;
+ }
+
+ pn = gfs2_lookup_simple(master, "per_node");
+ if (IS_ERR(pn)) {
+ error = PTR_ERR(pn);
+ fs_err(sdp, "can't find per_node directory: %d\n", error);
+ goto put_statfs;
+ }
+
+ /* For each jid, lookup the corresponding local statfs inode in the
+ * per_node metafs directory and save it in the sdp->sd_sc_inodes_list. */
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ struct local_statfs_inode *lsi =
+ kmalloc(sizeof(struct local_statfs_inode), GFP_NOFS);
+ if (!lsi) {
+ error = -ENOMEM;
+ goto free_local;
+ }
+ sprintf(buf, "statfs_change%u", jd->jd_jid);
+ lsi->si_sc_inode = gfs2_lookup_simple(pn, buf);
+ if (IS_ERR(lsi->si_sc_inode)) {
+ error = PTR_ERR(lsi->si_sc_inode);
+ fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
+ jd->jd_jid, error);
+ goto free_local;
+ }
+ lsi->si_jid = jd->jd_jid;
+ if (jd->jd_jid == sdp->sd_jdesc->jd_jid)
+ sdp->sd_sc_inode = lsi->si_sc_inode;
+
+ list_add_tail(&lsi->si_list, &sdp->sd_sc_inodes_list);
+ }
+
+ iput(pn);
+ ip = GFS2_I(sdp->sd_sc_inode);
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
+ &sdp->sd_sc_gh);
+ if (error) {
+ fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
+ goto free_local;
+ }
+ return 0;
+
+free_local:
+ free_local_statfs_inodes(sdp);
+ iput(pn);
+put_statfs:
+ iput(sdp->sd_statfs_inode);
+fail:
+ return error;
+}
+
+/* Uninitialize and free up memory used by the list of statfs inodes */
+static void uninit_statfs(struct gfs2_sbd *sdp)
+{
+ gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+ free_local_statfs_inodes(sdp);
+ iput(sdp->sd_statfs_inode);
+}
+
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
struct inode *master = d_inode(sdp->sd_master_dir);
@@ -690,6 +780,11 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
}
trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
+ /* Lookup statfs inodes here so journal recovery can use them. */
+ error = init_statfs(sdp);
+ if (error)
+ goto fail_jinode_gh;
+
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
for (x = 0; x < sdp->sd_journals; x++) {
@@ -698,14 +793,14 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
if (sdp->sd_args.ar_spectator) {
error = check_journal_clean(sdp, jd, true);
if (error)
- goto fail_jinode_gh;
+ goto fail_statfs;
continue;
}
error = gfs2_recover_journal(jd, true);
if (error) {
fs_err(sdp, "error recovering journal %u: %d\n",
x, error);
- goto fail_jinode_gh;
+ goto fail_statfs;
}
}
@@ -714,7 +809,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
error = gfs2_recover_journal(sdp->sd_jdesc, true);
if (error) {
fs_err(sdp, "error recovering my journal: %d\n", error);
- goto fail_jinode_gh;
+ goto fail_statfs;
}
}
@@ -725,6 +820,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
return 0;
+fail_statfs:
+ uninit_statfs(sdp);
fail_jinode_gh:
/* A withdraw may have done dq/uninit so now we need to check it */
if (!sdp->sd_args.ar_spectator &&
@@ -758,20 +855,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
if (error)
goto fail;
- /* Read in the master statfs inode */
- sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
- if (IS_ERR(sdp->sd_statfs_inode)) {
- error = PTR_ERR(sdp->sd_statfs_inode);
- fs_err(sdp, "can't read in statfs inode: %d\n", error);
- goto fail_journal;
- }
-
/* Read in the resource index inode */
sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
if (IS_ERR(sdp->sd_rindex)) {
error = PTR_ERR(sdp->sd_rindex);
fs_err(sdp, "can't get resource index inode: %d\n", error);
- goto fail_statfs;
+ goto fail_journal;
}
sdp->sd_rindex_uptodate = 0;
@@ -800,8 +889,6 @@ fail_qinode:
fail_rindex:
gfs2_clear_rgrpd(sdp);
iput(sdp->sd_rindex);
-fail_statfs:
- iput(sdp->sd_statfs_inode);
fail_journal:
init_journal(sdp, UNDO);
fail:
@@ -829,14 +916,6 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
return error;
}
- sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
- sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
- if (IS_ERR(sdp->sd_sc_inode)) {
- error = PTR_ERR(sdp->sd_sc_inode);
- fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
- goto fail;
- }
-
sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
if (IS_ERR(sdp->sd_qc_inode)) {
@@ -848,33 +927,21 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
iput(pn);
pn = NULL;
- ip = GFS2_I(sdp->sd_sc_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
- &sdp->sd_sc_gh);
- if (error) {
- fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
- goto fail_qc_i;
- }
-
ip = GFS2_I(sdp->sd_qc_inode);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
&sdp->sd_qc_gh);
if (error) {
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
- goto fail_ut_gh;
+ goto fail_qc_i;
}
return 0;
fail_qc_gh:
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
-fail_ut_gh:
- gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
fail_qc_i:
iput(sdp->sd_qc_inode);
fail_ut_i:
- iput(sdp->sd_sc_inode);
-fail:
iput(pn);
return error;
}
@@ -1062,26 +1129,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
}
error = init_names(sdp, silent);
- if (error) {
- /* In this case, we haven't initialized sysfs, so we have to
- manually free the sdp. */
- free_sbd(sdp);
- sb->s_fs_info = NULL;
- return error;
- }
+ if (error)
+ goto fail_free;
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
error = gfs2_sys_fs_add(sdp);
- /*
- * If we hit an error here, gfs2_sys_fs_add will have called function
- * kobject_put which causes the sysfs usage count to go to zero, which
- * causes sysfs to call function gfs2_sbd_release, which frees sdp.
- * Subsequent error paths here will call gfs2_sys_fs_del, which also
- * kobject_put to free sdp.
- */
if (error)
- return error;
+ goto fail_free;
gfs2_create_debugfs_file(sdp);
@@ -1179,9 +1234,9 @@ fail_lm:
gfs2_lm_unmount(sdp);
fail_debug:
gfs2_delete_debugfs_file(sdp);
- /* gfs2_sys_fs_del must be the last thing we do, since it causes
- * sysfs to call function gfs2_sbd_release, which frees sdp. */
gfs2_sys_fs_del(sdp);
+fail_free:
+ free_sbd(sdp);
sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 390ea79d682c..b5cbe21efdfb 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -144,6 +144,10 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
head->lh_tail = be32_to_cpu(lh->lh_tail);
head->lh_blkno = be32_to_cpu(lh->lh_blkno);
+ head->lh_local_total = be64_to_cpu(lh->lh_local_total);
+ head->lh_local_free = be64_to_cpu(lh->lh_local_free);
+ head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
+
return 0;
}
/**
@@ -292,6 +296,109 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
}
+/**
+ * update_statfs_inode - Update the master statfs inode or zero out the local
+ * statfs inode for a given journal.
+ * @jd: The journal
+ * @head: If NULL, @inode is the local statfs inode and we need to zero it out.
+ * Otherwise, it @head contains the statfs change info that needs to be
+ * synced to the master statfs inode (pointed to by @inode).
+ * @inode: statfs inode to update.
+ */
+static int update_statfs_inode(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head,
+ struct inode *inode)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_inode *ip;
+ struct buffer_head *bh;
+ struct gfs2_statfs_change_host sc;
+ int error = 0;
+
+ BUG_ON(!inode);
+ ip = GFS2_I(inode);
+
+ error = gfs2_meta_inode_buffer(ip, &bh);
+ if (error)
+ goto out;
+
+ spin_lock(&sdp->sd_statfs_spin);
+
+ if (head) { /* Update the master statfs inode */
+ gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
+ sc.sc_total += head->lh_local_total;
+ sc.sc_free += head->lh_local_free;
+ sc.sc_dinodes += head->lh_local_dinodes;
+ gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
+
+ fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
+ "Free:%lld, Dinodes:%lld after change "
+ "[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
+ sc.sc_free, sc.sc_dinodes, head->lh_local_total,
+ head->lh_local_free, head->lh_local_dinodes);
+ } else { /* Zero out the local statfs inode */
+ memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
+ sizeof(struct gfs2_statfs_change));
+ /* If it's our own journal, reset any in-memory changes too */
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
+ memset(&sdp->sd_statfs_local, 0,
+ sizeof(struct gfs2_statfs_change_host));
+ }
+ }
+ spin_unlock(&sdp->sd_statfs_spin);
+
+ mark_buffer_dirty(bh);
+ brelse(bh);
+ gfs2_meta_sync(ip->i_gl);
+
+out:
+ return error;
+}
+
+/**
+ * recover_local_statfs - Update the master and local statfs changes for this
+ * journal.
+ *
+ * Previously, statfs updates would be read in from the local statfs inode and
+ * synced to the master statfs inode during recovery.
+ *
+ * We now use the statfs updates in the journal head to update the master statfs
+ * inode instead of reading in from the local statfs inode. To preserve backward
+ * compatibility with kernels that can't do this, we still need to keep the
+ * local statfs inode up to date by writing changes to it. At some point in the
+ * future, we can do away with the local statfs inodes altogether and keep the
+ * statfs changes solely in the journal.
+ *
+ * @jd: the journal
+ * @head: the journal head
+ *
+ * Returns: errno
+ */
+static void recover_local_statfs(struct gfs2_jdesc *jd,
+ struct gfs2_log_header_host *head)
+{
+ int error;
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+
+ if (!head->lh_local_total && !head->lh_local_free
+ && !head->lh_local_dinodes) /* No change */
+ goto zero_local;
+
+ /* First update the master statfs inode with the changes we
+ * found in the journal. */
+ error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
+ if (error)
+ goto out;
+
+zero_local:
+ /* Zero out the local statfs inode so any changes in there
+ * are not re-recovered. */
+ error = update_statfs_inode(jd, NULL,
+ find_local_statfs_inode(sdp, jd->jd_jid));
+out:
+ return;
+}
+
void gfs2_recover_func(struct work_struct *work)
{
struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
@@ -411,6 +518,7 @@ void gfs2_recover_func(struct work_struct *work)
goto fail_gunlock_thaw;
}
+ recover_local_statfs(jd, &head);
clean_journal(jd, &head);
up_read(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 074f228ea839..ee491bb9c1cc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -878,7 +878,6 @@ static int rgd_insert(struct gfs2_rgrpd *rgd)
static int read_rindex_entry(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
struct gfs2_rindex buf;
int error;
@@ -924,9 +923,6 @@ static int read_rindex_entry(struct gfs2_inode *ip)
spin_unlock(&sdp->sd_rindex_spin);
if (!error) {
glock_set_object(rgd->rd_gl, rgd);
- rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
- rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
- rgd->rd_length) * bsize) - 1;
return 0;
}
@@ -2209,20 +2205,17 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
/**
* gfs2_rgrp_dump - print out an rgrp
* @seq: The iterator
- * @gl: The glock in question
+ * @rgd: The rgrp in question
* @fs_id_buf: pointer to file system id (if requested)
*
*/
-void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
const char *fs_id_buf)
{
- struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
const struct rb_node *n;
- if (rgd == NULL)
- return;
gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
@@ -2253,7 +2246,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
(unsigned long long)rgd->rd_addr);
fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
- gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
+ gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
rgd->rd_flags |= GFS2_RDF_ERROR;
}
@@ -2445,8 +2438,8 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
/* Directories keep their data in the metadata address space */
- if (meta || ip->i_depth)
- gfs2_meta_wipe(ip, bstart, blen);
+ if (meta || ip->i_depth || gfs2_is_jdata(ip))
+ gfs2_journal_wipe(ip, bstart, blen);
}
/**
@@ -2502,7 +2495,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
gfs2_statfs_change(sdp, 0, +1, -1);
trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
- gfs2_meta_wipe(ip, ip->i_no_addr, 1);
+ gfs2_journal_wipe(ip, ip->i_no_addr, 1);
}
/**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index a1d7e14fc55b..9a587ada51ed 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -67,7 +67,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
-extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl,
+extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
const char *fs_id_buf);
extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
struct buffer_head *bh,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9f4d9e7be839..b285192bd6b3 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -44,6 +44,12 @@
#include "xattr.h"
#include "lops.h"
+enum dinode_demise {
+ SHOULD_DELETE_DINODE,
+ SHOULD_NOT_DELETE_DINODE,
+ SHOULD_DEFER_EVICTION,
+};
+
/**
* gfs2_jindex_free - Clear all the journal index information
* @sdp: The GFS2 superblock
@@ -224,7 +230,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
}
-static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
{
struct gfs2_statfs_change *str = buf;
@@ -702,6 +708,8 @@ restart:
if (error)
gfs2_io_error(sdp);
}
+ WARN_ON(gfs2_withdrawing(sdp));
+
/* At this point, we're through modifying the disk */
/* Release stuff */
@@ -721,7 +729,7 @@ restart:
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
- iput(sdp->sd_sc_inode);
+ free_local_statfs_inodes(sdp);
iput(sdp->sd_qc_inode);
}
@@ -736,6 +744,7 @@ restart:
/* At this point, we're through participating in the lockspace */
gfs2_sys_fs_del(sdp);
+ free_sbd(sdp);
}
/**
@@ -1309,109 +1318,98 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
}
/**
- * gfs2_evict_inode - Remove an inode from cache
+ * evict_should_delete - determine whether the inode is eligible for deletion
* @inode: The inode to evict
*
- * There are three cases to consider:
- * 1. i_nlink == 0, we are final opener (and must deallocate)
- * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
- * 3. i_nlink > 0
- *
- * If the fs is read only, then we have to treat all cases as per #3
- * since we are unable to do any deallocation. The inode will be
- * deallocated by the next read/write node to attempt an allocation
- * in the same resource group
+ * This function determines whether the evicted inode is eligible to be deleted
+ * and locks the inode glock.
*
- * We have to (at the moment) hold the inodes main lock to cover
- * the gap between unlocking the shared lock on the iopen lock and
- * taking the exclusive lock. I'd rather do a shared -> exclusive
- * conversion on the iopen lock, but we can change that later. This
- * is safe, just less efficient.
+ * Returns: the fate of the dinode
*/
-
-static void gfs2_evict_inode(struct inode *inode)
+static enum dinode_demise evict_should_delete(struct inode *inode,
+ struct gfs2_holder *gh)
{
+ struct gfs2_inode *ip = GFS2_I(inode);
struct super_block *sb = inode->i_sb;
struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- struct address_space *metamapping;
- int error;
-
- if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
- clear_inode(inode);
- return;
- }
-
- if (inode->i_nlink || sb_rdonly(sb))
- goto out;
+ int ret;
if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
- gfs2_holder_mark_uninitialized(&gh);
- goto out_delete;
+ goto should_delete;
}
if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
- goto out;
+ return SHOULD_DEFER_EVICTION;
/* Deletes should never happen under memory pressure anymore. */
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
- goto out;
+ return SHOULD_DEFER_EVICTION;
/* Must not read inode block until block type has been verified */
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
- if (unlikely(error)) {
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
+ if (unlikely(ret)) {
glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
- goto out;
+ return SHOULD_DEFER_EVICTION;
}
if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
- goto out_truncate;
- error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
- if (error)
- goto out_truncate;
+ return SHOULD_NOT_DELETE_DINODE;
+ ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
+ if (ret)
+ return SHOULD_NOT_DELETE_DINODE;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
- error = gfs2_inode_refresh(ip);
- if (error)
- goto out_truncate;
+ ret = gfs2_inode_refresh(ip);
+ if (ret)
+ return SHOULD_NOT_DELETE_DINODE;
}
/*
* The inode may have been recreated in the meantime.
*/
if (inode->i_nlink)
- goto out_truncate;
+ return SHOULD_NOT_DELETE_DINODE;
-out_delete:
+should_delete:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
if (!gfs2_upgrade_iopen_glock(inode)) {
gfs2_holder_uninit(&ip->i_iopen_gh);
- goto out_truncate;
+ return SHOULD_NOT_DELETE_DINODE;
}
}
+ return SHOULD_DELETE_DINODE;
+}
+
+/**
+ * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
+ * @inode: The inode to evict
+ */
+static int evict_unlinked_inode(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ int ret;
if (S_ISDIR(inode->i_mode) &&
(ip->i_diskflags & GFS2_DIF_EXHASH)) {
- error = gfs2_dir_exhash_dealloc(ip);
- if (error)
- goto out_unlock;
+ ret = gfs2_dir_exhash_dealloc(ip);
+ if (ret)
+ goto out;
}
if (ip->i_eattr) {
- error = gfs2_ea_dealloc(ip);
- if (error)
- goto out_unlock;
+ ret = gfs2_ea_dealloc(ip);
+ if (ret)
+ goto out;
}
if (!gfs2_is_stuffed(ip)) {
- error = gfs2_file_dealloc(ip);
- if (error)
- goto out_unlock;
+ ret = gfs2_file_dealloc(ip);
+ if (ret)
+ goto out;
}
/* We're about to clear the bitmap for the dinode, but as soon as we
@@ -1419,11 +1417,24 @@ out_delete:
location and try to set gl_object again. We clear gl_object here so
that subsequent inode creates don't see an old gl_object. */
glock_clear_object(ip->i_gl, ip);
- error = gfs2_dinode_dealloc(ip);
+ ret = gfs2_dinode_dealloc(ip);
gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
- goto out_unlock;
+out:
+ return ret;
+}
+
+/*
+ * evict_linked_inode - evict an inode whose dinode has not been unlinked
+ * @inode: The inode to evict
+ */
+static int evict_linked_inode(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct address_space *metamapping;
+ int ret;
-out_truncate:
gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_EVICT_INODE);
metamapping = gfs2_glock2aspace(ip->i_gl);
@@ -1434,15 +1445,63 @@ out_truncate:
write_inode_now(inode, 1);
gfs2_ail_flush(ip->i_gl, 0);
- error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
- if (error)
- goto out_unlock;
+ ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
+ if (ret)
+ return ret;
+
/* Needs to be done before glock release & also in a transaction */
truncate_inode_pages(&inode->i_data, 0);
truncate_inode_pages(metamapping, 0);
gfs2_trans_end(sdp);
+ return 0;
+}
+
+/**
+ * gfs2_evict_inode - Remove an inode from cache
+ * @inode: The inode to evict
+ *
+ * There are three cases to consider:
+ * 1. i_nlink == 0, we are final opener (and must deallocate)
+ * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
+ * 3. i_nlink > 0
+ *
+ * If the fs is read only, then we have to treat all cases as per #3
+ * since we are unable to do any deallocation. The inode will be
+ * deallocated by the next read/write node to attempt an allocation
+ * in the same resource group
+ *
+ * We have to (at the moment) hold the inodes main lock to cover
+ * the gap between unlocking the shared lock on the iopen lock and
+ * taking the exclusive lock. I'd rather do a shared -> exclusive
+ * conversion on the iopen lock, but we can change that later. This
+ * is safe, just less efficient.
+ */
+
+static void gfs2_evict_inode(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
+ clear_inode(inode);
+ return;
+ }
+
+ if (inode->i_nlink || sb_rdonly(sb))
+ goto out;
+
+ gfs2_holder_mark_uninitialized(&gh);
+ ret = evict_should_delete(inode, &gh);
+ if (ret == SHOULD_DEFER_EVICTION)
+ goto out;
+ if (ret == SHOULD_DELETE_DINODE)
+ ret = evict_unlinked_inode(inode);
+ else
+ ret = evict_linked_inode(inode);
-out_unlock:
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
@@ -1450,8 +1509,8 @@ out_unlock:
glock_clear_object(ip->i_gl, ip);
gfs2_glock_dq_uninit(&gh);
}
- if (error && error != GLR_TRYFAILED && error != -EROFS)
- fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
+ if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
+ fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
out:
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
@@ -1502,6 +1561,35 @@ static void gfs2_free_inode(struct inode *inode)
kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
}
+extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
+{
+ struct local_statfs_inode *lsi, *safe;
+
+ /* Run through the statfs inodes list to iput and free memory */
+ list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
+ if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
+ sdp->sd_sc_inode = NULL; /* belongs to this node */
+ if (lsi->si_sc_inode)
+ iput(lsi->si_sc_inode);
+ list_del(&lsi->si_list);
+ kfree(lsi);
+ }
+}
+
+extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+ unsigned int index)
+{
+ struct local_statfs_inode *lsi;
+
+ /* Return the local (per node) statfs inode in the
+ * sdp->sd_sc_inodes_list corresponding to the 'index'. */
+ list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
+ if (lsi->si_jid == index)
+ return lsi->si_sc_inode;
+ }
+ return NULL;
+}
+
const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.free_inode = gfs2_free_inode,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 51900554ed81..c9fb2a654181 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -37,11 +37,16 @@ extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes);
extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
const void *buf);
+extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
+ void *buf);
extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
struct buffer_head *l_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
+extern void free_local_statfs_inodes(struct gfs2_sbd *sdp);
+extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
+ unsigned int index);
extern void free_sbd(struct gfs2_sbd *sdp);
extern struct file_system_type gfs2_fs_type;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d28c41bd69b0..c3e72dba7418 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -303,7 +303,7 @@ static void gfs2_sbd_release(struct kobject *kobj)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
- free_sbd(sdp);
+ complete(&sdp->sd_kobj_unregister);
}
static struct kobj_type gfs2_ktype = {
@@ -655,6 +655,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
sprintf(ro, "RDONLY=%d", sb_rdonly(sb));
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
+ init_completion(&sdp->sd_kobj_unregister);
sdp->sd_kobj.kset = gfs2_kset;
error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
"%s", sdp->sd_table_name);
@@ -685,6 +686,7 @@ fail_tune:
fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
+ wait_for_completion(&sdp->sd_kobj_unregister);
sb->s_fs_info = NULL;
return error;
}
@@ -695,6 +697,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
+ wait_for_completion(&sdp->sd_kobj_unregister);
}
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index e0025258107a..0b2f858d9a8c 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -56,7 +56,6 @@
{(1UL << GLF_REPLY_PENDING), "r" }, \
{(1UL << GLF_INITIAL), "I" }, \
{(1UL << GLF_FROZEN), "F" }, \
- {(1UL << GLF_QUEUED), "q" }, \
{(1UL << GLF_LRU), "L" }, \
{(1UL << GLF_OBJECT), "o" }, \
{(1UL << GLF_BLOCKING), "b" })
@@ -388,15 +387,17 @@ TRACE_EVENT(gfs2_log_blocks,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, blocks )
+ __field( int, blks_free )
),
TP_fast_assign(
__entry->dev = sdp->sd_vfs->s_dev;
__entry->blocks = blocks;
+ __entry->blks_free = atomic_read(&sdp->sd_log_blks_free);
),
- TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
- MINOR(__entry->dev), __entry->blocks)
+ TP_printk("%u,%u log reserve %d %d", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->blocks, __entry->blks_free)
);
/* Writing back the AIL */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 1cd0328cae20..0fba3bf64189 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -419,7 +419,7 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
- gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
+ gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
gfs2_lm(sdp,
"fatal: filesystem consistency error\n"
" RG = %llu\n"
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 6d9157efe16c..d7562981b3a0 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -205,6 +205,16 @@ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp)
test_bit(SDF_WITHDRAWING, &sdp->sd_flags);
}
+/**
+ * gfs2_withdrawing - check if a withdraw is pending
+ * @sdp: the superblock
+ */
+static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp)
+{
+ return test_bit(SDF_WITHDRAWING, &sdp->sd_flags) &&
+ !test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
+}
+
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)