29 files changed, 1286 insertions, 748 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 90c6a8faaecb..3ed2b088dcfd 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -3,7 +3,9 @@ config GFS2_FS
 	depends on (64BIT || LBDAF)
 	select FS_POSIX_ACL
 	select CRC32
+	select LIBCRC32C
 	select QUOTACTL
+	select FS_IOMAP
 	help
 	  A cluster filesystem.
 
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 86128202384f..41b2aa4bc3bf 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ccflags-y := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 9d5eecb123de..776717f1eeea 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -141,6 +141,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 
 	ret = __gfs2_set_acl(inode, acl, type);
 	if (!ret && mode != inode->i_mode) {
+		inode->i_ctime = current_time(inode);
 		inode->i_mode = mode;
 		mark_inode_dirty(inode);
 	}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 68ed06962537..f58716567972 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -39,18 +39,21 @@
 
 
 static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-				   unsigned int from, unsigned int to)
+				   unsigned int from, unsigned int len)
 {
 	struct buffer_head *head = page_buffers(page);
 	unsigned int bsize = head->b_size;
 	struct buffer_head *bh;
+	unsigned int to = from + len;
 	unsigned int start, end;
 
 	for (bh = head, start = 0; bh != head || !start;
 	     bh = bh->b_this_page, start = end) {
 		end = start + bsize;
-		if (end <= from || start >= to)
+		if (end <= from)
 			continue;
+		if (start >= to)
+			break;
 		if (gfs2_is_jdata(ip))
 			set_buffer_uptodate(bh);
 		gfs2_trans_add_data(ip->i_gl, bh);
@@ -189,7 +192,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 			create_empty_buffers(page, inode->i_sb->s_blocksize,
 					     BIT(BH_Dirty)|BIT(BH_Uptodate));
 		}
-		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
 	}
 	return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
 }
@@ -255,7 +258,6 @@ static int gfs2_writepages(struct address_space *mapping,
  * @wbc: The writeback control
  * @pvec: The vector of pages
  * @nr_pages: The number of pages to write
- * @end: End position
  * @done_index: Page index
  *
  * Returns: non-zero if loop should terminate, zero otherwise
@@ -264,7 +266,7 @@ static int gfs2_writepages(struct address_space *mapping,
 static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 				    struct writeback_control *wbc,
 				    struct pagevec *pvec,
-				    int nr_pages, pgoff_t end,
+				    int nr_pages,
 				    pgoff_t *done_index)
 {
 	struct inode *inode = mapping->host;
@@ -280,22 +282,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
 	for(i = 0; i < nr_pages; i++) {
 		struct page *page = pvec->pages[i];
 
-		/*
-		 * At this point, the page may be truncated or
-		 * invalidated (changing page->mapping to NULL), or
-		 * even swizzled back from swapper_space to tmpfs file
-		 * mapping. However, page->index will not change
-		 * because we have a reference on the page.
-		 */
-		if (page->index > end) {
-			/*
-			 * can't be range_cyclic (1st pass) because
-			 * end == -1 in that case.
-			 */
-			ret = 1;
-			break;
-		}
-
 		*done_index = page->index;
 
 		lock_page(page);
@@ -387,7 +373,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
 	int range_whole = 0;
 	int tag;
 
-	pagevec_init(&pvec, 0);
+	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
 		writeback_index = mapping->writeback_index; /* prev offset */
 		index = writeback_index;
@@ -413,12 +399,12 @@ retry:
 		tag_pages_for_writeback(mapping, index, end);
 	done_index = index;
 	while (!done && (index <= end)) {
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
-			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+				tag);
 		if (nr_pages == 0)
 			break;
 
-		ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index);
+		ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, &done_index);
 		if (ret)
 			done = 1;
 		if (ret > 0)
@@ -462,7 +448,8 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
 
 	ret = gfs2_write_cache_jdata(mapping, wbc);
 	if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
-		gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
+		gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
+			       GFS2_LFC_JDATA_WPAGES);
 		ret = gfs2_write_cache_jdata(mapping, wbc);
 	}
 	return ret;
@@ -499,8 +486,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 		return error;
 
 	kaddr = kmap_atomic(page);
-	if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
-		dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
+	if (dsize > gfs2_max_stuffed_size(ip))
+		dsize = gfs2_max_stuffed_size(ip);
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 	kunmap_atomic(kaddr);
@@ -517,10 +504,9 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
  * @file: The file to read a page for
  * @page: The page to read
  *
- * This is the core of gfs2's readpage. Its used by the internal file
- * reading code as in that case we already hold the glock. Also its
+ * This is the core of gfs2's readpage. It's used by the internal file
+ * reading code as in that case we already hold the glock. Also it's
  * called by gfs2_readpage() once the required lock has been granted.
- *
  */
 
 static int __gfs2_readpage(void *file, struct page *page)
@@ -741,7 +727,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 
 	if (gfs2_is_stuffed(ip)) {
 		error = 0;
-		if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+		if (pos + len > gfs2_max_stuffed_size(ip)) {
 			error = gfs2_unstuff_dinode(ip, page);
 			if (error == 0)
 				goto prepare_write;
@@ -848,7 +834,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
 	void *kaddr;
 	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 
-	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
+	BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
+
 	kaddr = kmap_atomic(page);
 	memcpy(buf + pos, kaddr + pos, copied);
 	flush_dcache_page(page);
@@ -906,8 +893,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct buffer_head *dibh;
-	unsigned int from = pos & (PAGE_SIZE - 1);
-	unsigned int to = from + len;
 	int ret;
 	struct gfs2_trans *tr = current->journal_info;
 	BUG_ON(!tr);
@@ -925,7 +910,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
 
 	if (!gfs2_is_writeback(ip))
-		gfs2_page_add_databufs(ip, page, from, to);
+		gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 	if (tr->tr_num_buf_new)
@@ -955,13 +940,13 @@ failed:
 }
 
 /**
- * gfs2_set_page_dirty - Page dirtying function
+ * jdata_set_page_dirty - Page dirtying function
  * @page: The page to dirty
  *
  * Returns: 1 if it dirtyed the page, or 0 otherwise
  */
  
-static int gfs2_set_page_dirty(struct page *page)
+static int jdata_set_page_dirty(struct page *page)
 {
 	SetPageChecked(page);
 	return __set_page_dirty_buffers(page);
@@ -1229,7 +1214,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
 	.readpages = gfs2_readpages,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
-	.set_page_dirty = gfs2_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_buffers,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
@@ -1246,7 +1231,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
 	.readpages = gfs2_readpages,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
-	.set_page_dirty = gfs2_set_page_dirty,
+	.set_page_dirty = jdata_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3dd0cceefa43..685c305cbeb6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -13,6 +13,7 @@
 #include <linux/blkdev.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
+#include <linux/iomap.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -36,6 +37,8 @@
 struct metapath {
 	struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
 	__u16 mp_list[GFS2_MAX_META_HEIGHT];
+	int mp_fheight; /* find_metapath height */
+	int mp_aheight; /* actual height (lookup height) */
 };
 
 /**
@@ -66,8 +69,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 		void *kaddr = kmap(page);
 		u64 dsize = i_size_read(inode);
  
-		if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
-			dsize = dibh->b_size - sizeof(struct gfs2_dinode);
+		if (dsize > gfs2_max_stuffed_size(ip))
+			dsize = gfs2_max_stuffed_size(ip);
 
 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
@@ -235,9 +238,9 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 {
 	unsigned int i;
 
+	mp->mp_fheight = height;
 	for (i = height; i--;)
 		mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
-
 }
 
 static inline unsigned int metapath_branch_start(const struct metapath *mp)
@@ -248,7 +251,7 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp)
 }
 
 /**
- * metaptr1 - Return the first possible metadata pointer in a metaath buffer
+ * metaptr1 - Return the first possible metadata pointer in a metapath buffer
  * @height: The metadata height (0 = dinode)
  * @mp: The metapath
  */
@@ -276,14 +279,13 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
 	return p + mp->mp_list[height];
 }
 
-static void gfs2_metapath_ra(struct gfs2_glock *gl,
-			     const struct buffer_head *bh, const __be64 *pos)
+static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 {
-	struct buffer_head *rabh;
-	const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
 	const __be64 *t;
 
-	for (t = pos; t < endp; t++) {
+	for (t = start; t < end; t++) {
+		struct buffer_head *rabh;
+
 		if (!*t)
 			continue;
 
@@ -302,21 +304,22 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
 	}
 }
 
-/**
- * lookup_mp_height - helper function for lookup_metapath
- * @ip: the inode
- * @mp: the metapath
- * @h: the height which needs looking up
- */
-static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
+static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
+			     unsigned int x, unsigned int h)
 {
-	__be64 *ptr = metapointer(h, mp);
-	u64 dblock = be64_to_cpu(*ptr);
+	for (; x < h; x++) {
+		__be64 *ptr = metapointer(x, mp);
+		u64 dblock = be64_to_cpu(*ptr);
+		int ret;
 
-	if (!dblock)
-		return h + 1;
-
-	return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]);
+		if (!dblock)
+			break;
+		ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
+		if (ret)
+			return ret;
+	}
+	mp->mp_aheight = x + 1;
+	return 0;
 }
 
 /**
@@ -333,22 +336,12 @@ static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
  * at which it found the unallocated block. Blocks which are found are
  * added to the mp->mp_bh[] list.
  *
- * Returns: error or height of metadata tree
+ * Returns: error
  */
 
 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 {
-	unsigned int end_of_metadata = ip->i_height - 1;
-	unsigned int x;
-	int ret;
-
-	for (x = 0; x < end_of_metadata; x++) {
-		ret = lookup_mp_height(ip, mp, x);
-		if (ret)
-			return ret;
-	}
-
-	return ip->i_height;
+	return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 }
 
 /**
@@ -359,25 +352,25 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
  *
  * Similar to lookup_metapath, but does lookups for a range of heights
  *
- * Returns: error or height of metadata tree
+ * Returns: error or the number of buffers filled
  */
 
 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 {
-	unsigned int start_h = h - 1;
+	unsigned int x = 0;
 	int ret;
 
 	if (h) {
 		/* find the first buffer we need to look up. */
-		while (start_h > 0 && mp->mp_bh[start_h] == NULL)
-			start_h--;
-		for (; start_h < h; start_h++) {
-			ret = lookup_mp_height(ip, mp, start_h);
-			if (ret)
-				return ret;
+		for (x = h - 1; x > 0; x--) {
+			if (mp->mp_bh[x])
+				break;
 		}
 	}
-	return ip->i_height;
+	ret = __fillup_metapath(ip, mp, x, h);
+	if (ret)
+		return ret;
+	return mp->mp_aheight - x - 1;
 }
 
 static inline void release_metapath(struct metapath *mp)
@@ -468,22 +461,16 @@ enum alloc_state {
 	/* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 };
 
-static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
-{
-	if (hgt)
-		return sdp->sd_inptrs;
-	return sdp->sd_diptrs;
-}
-
 /**
  * gfs2_bmap_alloc - Build a metadata tree of the requested height
  * @inode: The GFS2 inode
  * @lblock: The logical starting block of the extent
  * @bh_map: This is used to return the mapping details
- * @mp: The metapath
- * @sheight: The starting height (i.e. whats already mapped)
- * @height: The height to build to
+ * @zero_new: True if newly allocated blocks should be zeroed
+ * @mp: The metapath, with proper height information calculated
  * @maxlen: The max number of data blocks to alloc
+ * @dblock: Pointer to return the resulting new block
+ * @dblks: Pointer to return the number of blocks allocated
  *
  * In this routine we may have to alloc:
  *   i) Indirect blocks to grow the metadata tree height
@@ -499,63 +486,61 @@ static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
  * Returns: errno on error
  */
 
-static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
-			   struct buffer_head *bh_map, struct metapath *mp,
-			   const unsigned int sheight,
-			   const unsigned int height,
-			   const size_t maxlen)
+static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
+			    unsigned flags, struct metapath *mp)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct super_block *sb = sdp->sd_vfs;
 	struct buffer_head *dibh = mp->mp_bh[0];
-	u64 bn, dblock = 0;
+	u64 bn;
 	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
-	const unsigned end_of_metadata = height - 1;
-	int ret;
-	int eob = 0;
+	const unsigned end_of_metadata = mp->mp_fheight - 1;
 	enum alloc_state state;
 	__be64 *ptr;
 	__be64 zero_bn = 0;
+	size_t maxlen = iomap->length >> inode->i_blkbits;
 
-	BUG_ON(sheight < 1);
+	BUG_ON(mp->mp_aheight < 1);
 	BUG_ON(dibh == NULL);
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 
-	if (height == sheight) {
+	if (mp->mp_fheight == mp->mp_aheight) {
 		struct buffer_head *bh;
+		int eob;
+
 		/* Bottom indirect block exists, find unalloced extent size */
 		ptr = metapointer(end_of_metadata, mp);
 		bh = mp->mp_bh[end_of_metadata];
-		dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen,
-					   &eob);
+		dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
+					   maxlen, &eob);
 		BUG_ON(dblks < 1);
 		state = ALLOC_DATA;
 	} else {
 		/* Need to allocate indirect blocks */
-		ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs;
+		ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
+			sdp->sd_diptrs;
 		dblks = min(maxlen, (size_t)(ptrs_per_blk -
 					     mp->mp_list[end_of_metadata]));
-		if (height == ip->i_height) {
+		if (mp->mp_fheight == ip->i_height) {
 			/* Writing into existing tree, extend tree down */
-			iblks = height - sheight;
+			iblks = mp->mp_fheight - mp->mp_aheight;
 			state = ALLOC_GROW_DEPTH;
 		} else {
 			/* Building up tree height */
 			state = ALLOC_GROW_HEIGHT;
-			iblks = height - ip->i_height;
+			iblks = mp->mp_fheight - ip->i_height;
 			branch_start = metapath_branch_start(mp);
-			iblks += (height - branch_start);
+			iblks += (mp->mp_fheight - branch_start);
 		}
 	}
 
 	/* start of the second part of the function (state machine) */
 
 	blks = dblks + iblks;
-	i = sheight;
+	i = mp->mp_aheight;
 	do {
 		int error;
 		n = blks - alloced;
@@ -573,9 +558,10 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 						 sizeof(struct gfs2_dinode));
 				zero_bn = *ptr;
 			}
-			for (; i - 1 < height - ip->i_height && n > 0; i++, n--)
+			for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
+			     i++, n--)
 				gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
-			if (i - 1 == height - ip->i_height) {
+			if (i - 1 == mp->mp_fheight - ip->i_height) {
 				i--;
 				gfs2_buffer_copy_tail(mp->mp_bh[i],
 						sizeof(struct gfs2_meta_header),
@@ -587,7 +573,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 					sizeof(struct gfs2_meta_header));
 				*ptr = zero_bn;
 				state = ALLOC_GROW_DEPTH;
-				for(i = branch_start; i < height; i++) {
+				for(i = branch_start; i < mp->mp_fheight; i++) {
 					if (mp->mp_bh[i] == NULL)
 						break;
 					brelse(mp->mp_bh[i]);
@@ -599,12 +585,12 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 				break;
 		/* Branching from existing tree */
 		case ALLOC_GROW_DEPTH:
-			if (i > 1 && i < height)
+			if (i > 1 && i < mp->mp_fheight)
 				gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
-			for (; i < height && n > 0; i++, n--)
+			for (; i < mp->mp_fheight && n > 0; i++, n--)
 				gfs2_indirect_init(mp, ip->i_gl, i,
 						   mp->mp_list[i-1], bn++);
-			if (i == height)
+			if (i == mp->mp_fheight)
 				state = ALLOC_DATA;
 			if (n == 0)
 				break;
@@ -615,119 +601,265 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 			gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 			dblks = n;
 			ptr = metapointer(end_of_metadata, mp);
-			dblock = bn;
+			iomap->addr = bn << inode->i_blkbits;
+			iomap->flags |= IOMAP_F_NEW;
 			while (n-- > 0)
 				*ptr++ = cpu_to_be64(bn++);
-			if (buffer_zeronew(bh_map)) {
-				ret = sb_issue_zeroout(sb, dblock, dblks,
-						       GFP_NOFS);
-				if (ret) {
-					fs_err(sdp,
-					       "Failed to zero data buffers\n");
-					clear_buffer_zeronew(bh_map);
-				}
-			}
 			break;
 		}
-	} while ((state != ALLOC_DATA) || !dblock);
+	} while (iomap->addr == IOMAP_NULL_ADDR);
 
-	ip->i_height = height;
+	iomap->length = (u64)dblks << inode->i_blkbits;
+	ip->i_height = mp->mp_fheight;
 	gfs2_add_inode_blocks(&ip->i_inode, alloced);
 	gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
-	map_bh(bh_map, inode->i_sb, dblock);
-	bh_map->b_size = dblks << inode->i_blkbits;
-	set_buffer_new(bh_map);
 	return 0;
 }
 
 /**
- * gfs2_block_map - Map a block from an inode to a disk block
+ * hole_size - figure out the size of a hole
  * @inode: The inode
- * @lblock: The logical block number
- * @bh_map: The bh to be mapped
- * @create: True if its ok to alloc blocks to satify the request
+ * @lblock: The logical starting block number
+ * @mp: The metapath
  *
- * Sets buffer_mapped() if successful, sets buffer_boundary() if a
- * read of metadata will be required before the next block can be
- * mapped. Sets buffer_new() if new blocks were allocated.
+ * Returns: The hole size in bytes
  *
- * Returns: errno
  */
+static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct metapath mp_eof;
+	u64 factor = 1;
+	int hgt;
+	u64 holesz = 0;
+	const __be64 *first, *end, *ptr;
+	const struct buffer_head *bh;
+	u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
+	int zeroptrs;
+	bool done = false;
+
+	/* Get another metapath, to the very last byte */
+	find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
+	for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
+		bh = mp->mp_bh[hgt];
+		if (bh) {
+			zeroptrs = 0;
+			first = metapointer(hgt, mp);
+			end = (const __be64 *)(bh->b_data + bh->b_size);
+
+			for (ptr = first; ptr < end; ptr++) {
+				if (*ptr) {
+					done = true;
+					break;
+				} else {
+					zeroptrs++;
+				}
+			}
+		} else {
+			zeroptrs = sdp->sd_inptrs;
+		}
+		if (factor * zeroptrs >= lblock_stop - lblock + 1) {
+			holesz = lblock_stop - lblock + 1;
+			break;
+		}
+		holesz += factor * zeroptrs;
 
-int gfs2_block_map(struct inode *inode, sector_t lblock,
-		   struct buffer_head *bh_map, int create)
+		factor *= sdp->sd_inptrs;
+		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
+			(mp->mp_list[hgt - 1])++;
+	}
+	return holesz << inode->i_blkbits;
+}
+
+static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
+		      sizeof(struct gfs2_dinode);
+	iomap->offset = 0;
+	iomap->length = i_size_read(inode);
+	iomap->type = IOMAP_MAPPED;
+	iomap->flags = IOMAP_F_DATA_INLINE;
+}
+
+/**
+ * gfs2_iomap_begin - Map blocks from an inode to disk blocks
+ * @inode: The inode
+ * @pos: Starting position in bytes
+ * @length: Length to map, in bytes
+ * @flags: iomap flags
+ * @iomap: The iomap structure
+ *
+ * Returns: errno
+ */
+int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+		     unsigned flags, struct iomap *iomap)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	unsigned int bsize = sdp->sd_sb.sb_bsize;
-	const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
+	struct metapath mp = { .mp_aheight = 1, };
+	unsigned int factor = sdp->sd_sb.sb_bsize;
 	const u64 *arr = sdp->sd_heightsize;
 	__be64 *ptr;
-	u64 size;
-	struct metapath mp;
-	int ret;
+	sector_t lblock;
+	sector_t lend;
+	int ret = 0;
 	int eob;
 	unsigned int len;
 	struct buffer_head *bh;
 	u8 height;
 
-	BUG_ON(maxlen == 0);
+	trace_gfs2_iomap_start(ip, pos, length, flags);
+	if (!length) {
+		ret = -EINVAL;
+		goto out;
+	}
 
-	memset(&mp, 0, sizeof(mp));
-	bmap_lock(ip, create);
-	clear_buffer_mapped(bh_map);
-	clear_buffer_new(bh_map);
-	clear_buffer_boundary(bh_map);
-	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
+	if (gfs2_is_stuffed(ip)) {
+		if (flags & IOMAP_REPORT) {
+			gfs2_stuffed_iomap(inode, iomap);
+			if (pos >= iomap->length)
+				ret = -ENOENT;
+			goto out;
+		}
+		BUG_ON(!(flags & IOMAP_WRITE));
+	}
+
+	lblock = pos >> inode->i_blkbits;
+	lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
+
+	iomap->offset = lblock << inode->i_blkbits;
+	iomap->addr = IOMAP_NULL_ADDR;
+	iomap->type = IOMAP_HOLE;
+	iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
+	iomap->flags = IOMAP_F_MERGED;
+	bmap_lock(ip, flags & IOMAP_WRITE);
+
+	/*
+	 * Directory data blocks have a struct gfs2_meta_header header, so the
+	 * remaining size is smaller than the filesystem block size.  Logical
+	 * block numbers for directories are in units of this remaining size!
+	 */
 	if (gfs2_is_dir(ip)) {
-		bsize = sdp->sd_jbsize;
+		factor = sdp->sd_jbsize;
 		arr = sdp->sd_jheightsize;
 	}
 
 	ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
 	if (ret)
-		goto out;
+		goto out_release;
 
 	height = ip->i_height;
-	size = (lblock + 1) * bsize;
-	while (size > arr[height])
+	while ((lblock + 1) * factor > arr[height])
 		height++;
 	find_metapath(sdp, lblock, &mp, height);
-	ret = 1;
 	if (height > ip->i_height || gfs2_is_stuffed(ip))
 		goto do_alloc;
+
 	ret = lookup_metapath(ip, &mp);
-	if (ret < 0)
-		goto out;
-	if (ret != ip->i_height)
+	if (ret)
+		goto out_release;
+
+	if (mp.mp_aheight != ip->i_height)
 		goto do_alloc;
+
 	ptr = metapointer(ip->i_height - 1, &mp);
 	if (*ptr == 0)
 		goto do_alloc;
-	map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr));
+
+	iomap->type = IOMAP_MAPPED;
+	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
+
 	bh = mp.mp_bh[ip->i_height - 1];
-	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob);
-	bh_map->b_size = (len << inode->i_blkbits);
+	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
 	if (eob)
-		set_buffer_boundary(bh_map);
-	ret = 0;
-out:
+		iomap->flags |= IOMAP_F_BOUNDARY;
+	iomap->length = (u64)len << inode->i_blkbits;
+
+out_release:
 	release_metapath(&mp);
-	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
-	bmap_unlock(ip, create);
+	bmap_unlock(ip, flags & IOMAP_WRITE);
+out:
+	trace_gfs2_iomap_end(ip, iomap, ret);
 	return ret;
 
 do_alloc:
-	/* All allocations are done here, firstly check create flag */
-	if (!create) {
-		BUG_ON(gfs2_is_stuffed(ip));
-		ret = 0;
+	if (flags & IOMAP_WRITE) {
+		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+	} else if (flags & IOMAP_REPORT) {
+		loff_t size = i_size_read(inode);
+		if (pos >= size)
+			ret = -ENOENT;
+		else if (height <= ip->i_height)
+			iomap->length = hole_size(inode, lblock, &mp);
+		else
+			iomap->length = size - pos;
+	}
+	goto out_release;
+}
+
+/**
+ * gfs2_block_map - Map one or more blocks of an inode to a disk block
+ * @inode: The inode
+ * @lblock: The logical block number
+ * @bh_map: The bh to be mapped
+ * @create: True if its ok to alloc blocks to satify the request
+ *
+ * The size of the requested mapping is defined in bh_map->b_size.
+ *
+ * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
+ * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
+ * bh_map->b_size to indicate the size of the mapping when @lblock and
+ * successive blocks are mapped, up to the requested size.
+ *
+ * Sets buffer_boundary() if a read of metadata will be required
+ * before the next block can be mapped. Sets buffer_new() if new
+ * blocks were allocated.
+ *
+ * Returns: errno
+ */
+
+int gfs2_block_map(struct inode *inode, sector_t lblock,
+		   struct buffer_head *bh_map, int create)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct iomap iomap;
+	int ret, flags = 0;
+
+	clear_buffer_mapped(bh_map);
+	clear_buffer_new(bh_map);
+	clear_buffer_boundary(bh_map);
+	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
+
+	if (create)
+		flags |= IOMAP_WRITE;
+	ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
+			       bh_map->b_size, flags, &iomap);
+	if (ret) {
+		if (!create && ret == -ENOENT) {
+			/* Return unmapped buffer beyond the end of file.  */
+			ret = 0;
+		}
 		goto out;
 	}
 
-	/* At this point ret is the tree depth of already allocated blocks */
-	ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen);
-	goto out;
+	if (iomap.length > bh_map->b_size) {
+		iomap.length = bh_map->b_size;
+		iomap.flags &= ~IOMAP_F_BOUNDARY;
+	}
+	if (iomap.addr != IOMAP_NULL_ADDR)
+		map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
+	bh_map->b_size = iomap.length;
+	if (iomap.flags & IOMAP_F_BOUNDARY)
+		set_buffer_boundary(bh_map);
+	if (iomap.flags & IOMAP_F_NEW)
+		set_buffer_new(bh_map);
+
+out:
+	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
+	return ret;
 }
 
 /*
@@ -755,17 +887,18 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
 }
 
 /**
- * gfs2_block_truncate_page - Deal with zeroing out data for truncate
+ * gfs2_block_zero_range - Deal with zeroing out data
  *
  * This is partly borrowed from ext3.
  */
-static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
+static int gfs2_block_zero_range(struct inode *inode, loff_t from,
+				 unsigned int length)
 {
-	struct inode *inode = mapping->host;
+	struct address_space *mapping = inode->i_mapping;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned long index = from >> PAGE_SHIFT;
 	unsigned offset = from & (PAGE_SIZE-1);
-	unsigned blocksize, iblock, length, pos;
+	unsigned blocksize, iblock, pos;
 	struct buffer_head *bh;
 	struct page *page;
 	int err;
@@ -775,7 +908,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
 		return 0;
 
 	blocksize = inode->i_sb->s_blocksize;
-	length = blocksize - (offset & (blocksize - 1));
 	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 
 	if (!page_has_buffers(page))
@@ -845,11 +977,24 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize
 	int error;
 
 	while (oldsize != newsize) {
+		struct gfs2_trans *tr;
+		unsigned int offs;
+
 		chunk = oldsize - newsize;
 		if (chunk > max_chunk)
 			chunk = max_chunk;
+
+		offs = oldsize & ~PAGE_MASK;
+		if (offs && chunk > PAGE_SIZE)
+			chunk = offs + ((chunk - offs) & PAGE_MASK);
+
 		truncate_pagecache(inode, oldsize - chunk);
 		oldsize -= chunk;
+
+		tr = current->journal_info;
+		if (!test_bit(TR_TOUCHED, &tr->tr_flags))
+			continue;
+
 		gfs2_trans_end(sdp);
 		error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
 		if (error)
@@ -859,13 +1004,13 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize
 	return 0;
 }
 
-static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
+static int trunc_start(struct inode *inode, u64 newsize)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct address_space *mapping = inode->i_mapping;
-	struct buffer_head *dibh;
+	struct buffer_head *dibh = NULL;
 	int journaled = gfs2_is_jdata(ip);
+	u64 oldsize = inode->i_size;
 	int error;
 
 	if (journaled)
@@ -884,10 +1029,13 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
 	if (gfs2_is_stuffed(ip)) {
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
 	} else {
-		if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) {
-			error = gfs2_block_truncate_page(mapping, newsize);
+		unsigned int blocksize = i_blocksize(inode);
+		unsigned int offs = newsize & (blocksize - 1);
+		if (offs) {
+			error = gfs2_block_zero_range(inode, newsize,
+						      blocksize - offs);
 			if (error)
-				goto out_brelse;
+				goto out;
 		}
 		ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
 	}
@@ -901,15 +1049,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
 	else
 		truncate_pagecache(inode, newsize);
 
-	if (error) {
-		brelse(dibh);
-		return error;
-	}
-
-out_brelse:
-	brelse(dibh);
 out:
-	gfs2_trans_end(sdp);
+	brelse(dibh);
+	if (current->journal_info)
+		gfs2_trans_end(sdp);
 	return error;
 }
 
@@ -917,10 +1060,11 @@ out:
  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
  * @ip: inode
  * @rg_gh: holder of resource group glock
- * @mp: current metapath fully populated with buffers
+ * @bh: buffer head to sweep
+ * @start: starting point in bh
+ * @end: end point in bh
+ * @meta: true if bh points to metadata (rather than data)
  * @btotal: place to keep count of total blocks freed
- * @hgt: height we're processing
- * @first: true if this is the first call to this function for this height
  *
  * We sweep a metadata buffer (provided by the metapath) for blocks we need to
  * free, and free them all. However, we do it one rgrp at a time. If this
@@ -935,47 +1079,46 @@ out:
  *          *btotal has the total number of blocks freed
  */
 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
-			      const struct metapath *mp, u32 *btotal, int hgt,
-			      bool preserve1)
+			      struct buffer_head *bh, __be64 *start, __be64 *end,
+			      bool meta, u32 *btotal)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_trans *tr;
-	struct buffer_head *bh = mp->mp_bh[hgt];
-	__be64 *top, *bottom, *p;
+	__be64 *p;
 	int blks_outside_rgrp;
 	u64 bn, bstart, isize_blks;
 	s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
-	int meta = ((hgt != ip->i_height - 1) ? 1 : 0);
 	int ret = 0;
 	bool buf_in_tr = false; /* buffer was added to transaction */
 
-	if (gfs2_metatype_check(sdp, bh,
-				(hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)))
-		return -EIO;
-
 more_rgrps:
+	rgd = NULL;
+	if (gfs2_holder_initialized(rd_gh)) {
+		rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
+		gfs2_assert_withdraw(sdp,
+			     gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
+	}
 	blks_outside_rgrp = 0;
 	bstart = 0;
 	blen = 0;
-	top = metapointer(hgt, mp); /* first ptr from metapath */
-	/* If we're keeping some data at the truncation point, we've got to
-	   preserve the metadata tree by adding 1 to the starting metapath. */
-	if (preserve1)
-		top++;
-
-	bottom = (__be64 *)(bh->b_data + bh->b_size);
 
-	for (p = top; p < bottom; p++) {
+	for (p = start; p < end; p++) {
 		if (!*p)
 			continue;
 		bn = be64_to_cpu(*p);
-		if (gfs2_holder_initialized(rd_gh)) {
-			rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
-			gfs2_assert_withdraw(sdp,
-				     gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
+
+		if (rgd) {
+			if (!rgrp_contains_block(rgd, bn)) {
+				blks_outside_rgrp++;
+				continue;
+			}
 		} else {
-			rgd = gfs2_blk2rgrpd(sdp, bn, false);
+			rgd = gfs2_blk2rgrpd(sdp, bn, true);
+			if (unlikely(!rgd)) {
+				ret = -EIO;
+				goto out;
+			}
 			ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
 						 0, rd_gh);
 			if (ret)
@@ -987,11 +1130,6 @@ more_rgrps:
 				gfs2_rs_deltree(&ip->i_res);
 		}
 
-		if (!rgrp_contains_block(rgd, bn)) {
-			blks_outside_rgrp++;
-			continue;
-		}
-
 		/* The size of our transactions will be unknown until we
 		   actually process all the metadata blocks that relate to
 		   the rgrp. So we estimate. We know it can't be more than
@@ -1010,7 +1148,7 @@ more_rgrps:
 				jblocks_rqsted += isize_blks;
 			revokes = jblocks_rqsted;
 			if (meta)
-				revokes += hptrs(sdp, hgt);
+				revokes += end - start;
 			else if (ip->i_depth)
 				revokes += sdp->sd_inptrs;
 			ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
@@ -1068,7 +1206,11 @@ out_unlock:
 					    outside the rgrp we just processed,
 					    do it all over again. */
 		if (current->journal_info) {
-			struct buffer_head *dibh = mp->mp_bh[0];
+			struct buffer_head *dibh;
+
+			ret = gfs2_meta_inode_buffer(ip, &dibh);
+			if (ret)
+				goto out;
 
 			/* Every transaction boundary, we rewrite the dinode
 			   to keep its di_blocks current in case of failure. */
@@ -1076,6 +1218,7 @@ out_unlock:
 				current_time(&ip->i_inode);
 			gfs2_trans_add_meta(ip->i_gl, dibh);
 			gfs2_dinode_out(ip, dibh->b_data);
+			brelse(dibh);
 			up_write(&ip->i_rw_mutex);
 			gfs2_trans_end(sdp);
 		}
@@ -1087,38 +1230,48 @@ out:
 	return ret;
 }
 
+static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
+{
+	if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
+		return false;
+	return true;
+}
+
 /**
  * find_nonnull_ptr - find a non-null pointer given a metapath and height
- * assumes the metapath is valid (with buffers) out to height h
  * @mp: starting metapath
  * @h: desired height to search
  *
+ * Assumes the metapath is valid (with buffers) out to height h.
  * Returns: true if a non-null pointer was found in the metapath buffer
  *          false if all remaining pointers are NULL in the buffer
  */
 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
-			     unsigned int h)
+			     unsigned int h,
+			     __u16 *end_list, unsigned int end_aligned)
 {
-	__be64 *ptr;
-	unsigned int ptrs = hptrs(sdp, h) - 1;
+	struct buffer_head *bh = mp->mp_bh[h];
+	__be64 *first, *ptr, *end;
+
+	first = metaptr1(h, mp);
+	ptr = first + mp->mp_list[h];
+	end = (__be64 *)(bh->b_data + bh->b_size);
+	if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
+		bool keep_end = h < end_aligned;
+		end = first + end_list[h] + keep_end;
+	}
 
-	while (true) {
-		ptr = metapointer(h, mp);
+	while (ptr < end) {
 		if (*ptr) { /* if we have a non-null pointer */
-			/* Now zero the metapath after the current height. */
+			mp->mp_list[h] = ptr - first;
 			h++;
 			if (h < GFS2_MAX_META_HEIGHT)
-				memset(&mp->mp_list[h], 0,
-				       (GFS2_MAX_META_HEIGHT - h) *
-				       sizeof(mp->mp_list[0]));
+				mp->mp_list[h] = 0;
 			return true;
 		}
-
-		if (mp->mp_list[h] < ptrs)
-			mp->mp_list[h]++;
-		else
-			return false; /* no more pointers in this buffer */
+		ptr++;
 	}
+	return false;
 }
 
 enum dealloc_states {
@@ -1128,49 +1281,134 @@ enum dealloc_states {
 	DEALLOC_DONE = 3,       /* process complete */
 };
 
-static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h)
+static inline void
+metapointer_range(struct metapath *mp, int height,
+		  __u16 *start_list, unsigned int start_aligned,
+		  __u16 *end_list, unsigned int end_aligned,
+		  __be64 **start, __be64 **end)
 {
-	if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0])))
-		return false;
-	return true;
+	struct buffer_head *bh = mp->mp_bh[height];
+	__be64 *first;
+
+	first = metaptr1(height, mp);
+	*start = first;
+	if (mp_eq_to_hgt(mp, start_list, height)) {
+		bool keep_start = height < start_aligned;
+		*start = first + start_list[height] + keep_start;
+	}
+	*end = (__be64 *)(bh->b_data + bh->b_size);
+	if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
+		bool keep_end = height < end_aligned;
+		*end = first + end_list[height] + keep_end;
+	}
+}
+
+static inline bool walk_done(struct gfs2_sbd *sdp,
+			     struct metapath *mp, int height,
+			     __u16 *end_list, unsigned int end_aligned)
+{
+	__u16 end;
+
+	if (end_list) {
+		bool keep_end = height < end_aligned;
+		if (!mp_eq_to_hgt(mp, end_list, height))
+			return false;
+		end = end_list[height] + keep_end;
+	} else
+		end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
+	return mp->mp_list[height] >= end;
 }
 
 /**
- * trunc_dealloc - truncate a file down to a desired size
+ * punch_hole - deallocate blocks in a file
  * @ip: inode to truncate
- * @newsize: The desired size of the file
+ * @offset: the start of the hole
+ * @length: the size of the hole (or 0 for truncate)
+ *
+ * Punch a hole into a file or truncate a file at a given position.  This
+ * function operates in whole blocks (@offset and @length are rounded
+ * accordingly); partially filled blocks must be cleared otherwise.
  *
- * This function truncates a file to newsize. It works from the
- * bottom up, and from the right to the left. In other words, it strips off
- * the highest layer (data) before stripping any of the metadata. Doing it
- * this way is best in case the operation is interrupted by power failure, etc.
- * The dinode is rewritten in every transaction to guarantee integrity.
+ * This function works from the bottom up, and from the right to the left. In
+ * other words, it strips off the highest layer (data) before stripping any of
+ * the metadata. Doing it this way is best in case the operation is interrupted
+ * by power failure, etc.  The dinode is rewritten in every transaction to
+ * guarantee integrity.
  */
-static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
+static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct metapath mp;
+	u64 maxsize = sdp->sd_heightsize[ip->i_height];
+	struct metapath mp = {};
 	struct buffer_head *dibh, *bh;
 	struct gfs2_holder rd_gh;
-	u64 lblock;
-	__u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */
+	unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
+	u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
+	__u16 start_list[GFS2_MAX_META_HEIGHT];
+	__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
+	unsigned int start_aligned, uninitialized_var(end_aligned);
 	unsigned int strip_h = ip->i_height - 1;
 	u32 btotal = 0;
 	int ret, state;
 	int mp_h; /* metapath buffers are read in to this height */
-	sector_t last_ra = 0;
 	u64 prev_bnr = 0;
-	bool preserve1; /* need to preserve the first meta pointer? */
+	__be64 *start, *end;
 
-	if (!newsize)
-		lblock = 0;
-	else
-		lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift;
+	if (offset >= maxsize) {
+		/*
+		 * The starting point lies beyond the allocated meta-data;
+		 * there are no blocks do deallocate.
+		 */
+		return 0;
+	}
+
+	/*
+	 * The start position of the hole is defined by lblock, start_list, and
+	 * start_aligned.  The end position of the hole is defined by lend,
+	 * end_list, and end_aligned.
+	 *
+	 * start_aligned and end_aligned define down to which height the start
+	 * and end positions are aligned to the metadata tree (i.e., the
+	 * position is a multiple of the metadata granularity at the height
+	 * above).  This determines at which heights additional meta pointers
+	 * needs to be preserved for the remaining data.
+	 */
+
+	if (length) {
+		u64 end_offset = offset + length;
+		u64 lend;
+
+		/*
+		 * Clip the end at the maximum file size for the given height:
+		 * that's how far the metadata goes; files bigger than that
+		 * will have additional layers of indirection.
+		 */
+		if (end_offset > maxsize)
+			end_offset = maxsize;
+		lend = end_offset >> bsize_shift;
+
+		if (lblock >= lend)
+			return 0;
+
+		find_metapath(sdp, lend, &mp, ip->i_height);
+		end_list = __end_list;
+		memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
+
+		for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
+			if (end_list[mp_h])
+				break;
+		}
+		end_aligned = mp_h;
+	}
 
-	memset(&mp, 0, sizeof(mp));
 	find_metapath(sdp, lblock, &mp, ip->i_height);
+	memcpy(start_list, mp.mp_list, sizeof(start_list));
 
-	memcpy(&nbof, &mp.mp_list, sizeof(nbof));
+	for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
+		if (start_list[mp_h])
+			break;
+	}
+	start_aligned = mp_h;
 
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (ret)
@@ -1178,7 +1416,17 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
 
 	mp.mp_bh[0] = dibh;
 	ret = lookup_metapath(ip, &mp);
-	if (ret == ip->i_height)
+	if (ret)
+		goto out_metapath;
+
+	/* issue read-ahead on metadata */
+	for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
+		metapointer_range(&mp, mp_h, start_list, start_aligned,
+				  end_list, end_aligned, &start, &end);
+		gfs2_metapath_ra(ip->i_gl, start, end);
+	}
+
+	if (mp.mp_aheight == ip->i_height)
 		state = DEALLOC_MP_FULL; /* We have a complete metapath */
 	else
 		state = DEALLOC_FILL_MP; /* deal with partial metapath */
@@ -1199,20 +1447,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
 		/* Truncate a full metapath at the given strip height.
 		 * Note that strip_h == mp_h in order to be in this state. */
 		case DEALLOC_MP_FULL:
-			if (mp_h > 0) { /* issue read-ahead on metadata */
-				__be64 *top;
-
-				bh = mp.mp_bh[mp_h - 1];
-				if (bh->b_blocknr != last_ra) {
-					last_ra = bh->b_blocknr;
-					top = metaptr1(mp_h - 1, &mp);
-					gfs2_metapath_ra(ip->i_gl, bh, top);
-				}
-			}
-			/* If we're truncating to a non-zero size and the mp is
-			   at the beginning of file for the strip height, we
-			   need to preserve the first metadata pointer. */
-			preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
 			bh = mp.mp_bh[mp_h];
 			gfs2_assert_withdraw(sdp, bh);
 			if (gfs2_assert_withdraw(sdp,
@@ -1224,8 +1458,28 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
 				       prev_bnr, ip->i_height, strip_h, mp_h);
 			}
 			prev_bnr = bh->b_blocknr;
-			ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal,
-						 mp_h, preserve1);
+
+			if (gfs2_metatype_check(sdp, bh,
+						(mp_h ? GFS2_METATYPE_IN :
+							GFS2_METATYPE_DI))) {
+				ret = -EIO;
+				goto out;
+			}
+
+			/*
+			 * Below, passing end_aligned as 0 gives us the
+			 * metapointer range excluding the end point: the end
+			 * point is the first metapath we must not deallocate!
+			 */
+
+			metapointer_range(&mp, mp_h, start_list, start_aligned,
+					  end_list, 0 /* end_aligned */,
+					  &start, &end);
+			ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
+						 start, end,
+						 mp_h != ip->i_height - 1,
+						 &btotal);
+
 			/* If we hit an error or just swept dinode buffer,
 			   just exit. */
 			if (ret || !mp_h) {
@@ -1249,20 +1503,20 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
 			   stripping the previous level of metadata. */
 			if (mp_h == 0) {
 				strip_h--;
-				memcpy(&mp.mp_list, &nbof, sizeof(nbof));
+				memcpy(mp.mp_list, start_list, sizeof(start_list));
 				mp_h = strip_h;
 				state = DEALLOC_FILL_MP;
 				break;
 			}
 			mp.mp_list[mp_h] = 0;
 			mp_h--; /* search one metadata height down */
-			if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1)
-				break; /* loop around in the same state */
 			mp.mp_list[mp_h]++;
+			if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
+				break;
 			/* Here we've found a part of the metapath that is not
 			 * allocated. We need to search at that height for the
 			 * next non-null pointer. */
-			if (find_nonnull_ptr(sdp, &mp, mp_h)) {
+			if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
 				state = DEALLOC_FILL_MP;
 				mp_h++;
 			}
@@ -1277,18 +1531,29 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
 			if (ret < 0)
 				goto out;
 
+			/* issue read-ahead on metadata */
+			if (mp.mp_aheight > 1) {
+				for (; ret > 1; ret--) {
+					metapointer_range(&mp, mp.mp_aheight - ret,
+							  start_list, start_aligned,
+							  end_list, end_aligned,
+							  &start, &end);
+					gfs2_metapath_ra(ip->i_gl, start, end);
+				}
+			}
+
 			/* If buffers found for the entire strip height */
-			if ((ret == ip->i_height) && (mp_h == strip_h)) {
+			if (mp.mp_aheight - 1 == strip_h) {
 				state = DEALLOC_MP_FULL;
 				break;
 			}
-			if (ret < ip->i_height) /* We have a partial height */
-				mp_h = ret - 1;
+			if (mp.mp_aheight < ip->i_height) /* We have a partial height */
+				mp_h = mp.mp_aheight - 1;
 
 			/* If we find a non-null block pointer, crawl a bit
 			   higher up in the metapath and try again, otherwise
 			   we need to look lower for a new starting point. */
-			if (find_nonnull_ptr(sdp, &mp, mp_h))
+			if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
 				mp_h++;
 			else
 				state = DEALLOC_MP_LOWER;
@@ -1366,7 +1631,6 @@ out:
 /**
  * do_shrink - make a file smaller
  * @inode: the inode
- * @oldsize: the current inode size
  * @newsize: the size to make the file
  *
  * Called with an exclusive lock on @inode. The @size must
@@ -1375,18 +1639,18 @@ out:
  * Returns: errno
  */
 
-static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize)
+static int do_shrink(struct inode *inode, u64 newsize)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	int error;
 
-	error = trunc_start(inode, oldsize, newsize);
+	error = trunc_start(inode, newsize);
 	if (error < 0)
 		return error;
 	if (gfs2_is_stuffed(ip))
 		return 0;
 
-	error = trunc_dealloc(ip, newsize);
+	error = punch_hole(ip, newsize, 0);
 	if (error == 0)
 		error = trunc_end(ip);
 
@@ -1395,10 +1659,9 @@ static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize)
 
 void gfs2_trim_blocks(struct inode *inode)
 {
-	u64 size = inode->i_size;
 	int ret;
 
-	ret = do_shrink(inode, size, size);
+	ret = do_shrink(inode, inode->i_size);
 	WARN_ON(ret != 0);
 }
 
@@ -1431,8 +1694,7 @@ static int do_grow(struct inode *inode, u64 size)
 	int error;
 	int unstuff = 0;
 
-	if (gfs2_is_stuffed(ip) &&
-	    (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
+	if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
 		error = gfs2_quota_lock_check(ip, &ap);
 		if (error)
 			return error;
@@ -1492,7 +1754,6 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	int ret;
-	u64 oldsize;
 
 	BUG_ON(!S_ISREG(inode->i_mode));
 
@@ -1506,13 +1767,12 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 	if (ret)
 		goto out;
 
-	oldsize = inode->i_size;
-	if (newsize >= oldsize) {
+	if (newsize >= inode->i_size) {
 		ret = do_grow(inode, newsize);
 		goto out;
 	}
 
-	ret = do_shrink(inode, oldsize, newsize);
+	ret = do_shrink(inode, newsize);
 out:
 	gfs2_rsqa_delete(ip, NULL);
 	return ret;
@@ -1521,7 +1781,7 @@ out:
 int gfs2_truncatei_resume(struct gfs2_inode *ip)
 {
 	int error;
-	error = trunc_dealloc(ip, i_size_read(&ip->i_inode));
+	error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
 	if (!error)
 		error = trunc_end(ip);
 	return error;
@@ -1529,7 +1789,7 @@ int gfs2_truncatei_resume(struct gfs2_inode *ip)
 
 int gfs2_file_dealloc(struct gfs2_inode *ip)
 {
-	return trunc_dealloc(ip, 0);
+	return punch_hole(ip, 0, 0);
 }
 
 /**
@@ -1669,8 +1929,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 		return 0;
 
 	if (gfs2_is_stuffed(ip)) {
-		if (offset + len >
-		    sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
+		if (offset + len > gfs2_max_stuffed_size(ip))
 			return 1;
 		return 0;
 	}
@@ -1697,3 +1956,123 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 	return 0;
 }
 
+static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct buffer_head *dibh;
+	int error;
+
+	if (offset >= inode->i_size)
+		return 0;
+	if (offset + length > inode->i_size)
+		length = inode->i_size - offset;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error)
+		return error;
+	gfs2_trans_add_meta(ip->i_gl, dibh);
+	memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
+	       length);
+	brelse(dibh);
+	return 0;
+}
+
+static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
+					 loff_t length)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
+	int error;
+
+	while (length) {
+		struct gfs2_trans *tr;
+		loff_t chunk;
+		unsigned int offs;
+
+		chunk = length;
+		if (chunk > max_chunk)
+			chunk = max_chunk;
+
+		offs = offset & ~PAGE_MASK;
+		if (offs && chunk > PAGE_SIZE)
+			chunk = offs + ((chunk - offs) & PAGE_MASK);
+
+		truncate_pagecache_range(inode, offset, chunk);
+		offset += chunk;
+		length -= chunk;
+
+		tr = current->journal_info;
+		if (!test_bit(TR_TOUCHED, &tr->tr_flags))
+			continue;
+
+		gfs2_trans_end(sdp);
+		error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
+		if (error)
+			return error;
+	}
+	return 0;
+}
+
+int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
+{
+	struct inode *inode = file_inode(file);
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	int error;
+
+	if (gfs2_is_jdata(ip))
+		error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
+					 GFS2_JTRUNC_REVOKES);
+	else
+		error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (error)
+		return error;
+
+	if (gfs2_is_stuffed(ip)) {
+		error = stuffed_zero_range(inode, offset, length);
+		if (error)
+			goto out;
+	} else {
+		unsigned int start_off, end_off, blocksize;
+
+		blocksize = i_blocksize(inode);
+		start_off = offset & (blocksize - 1);
+		end_off = (offset + length) & (blocksize - 1);
+		if (start_off) {
+			unsigned int len = length;
+			if (length > blocksize - start_off)
+				len = blocksize - start_off;
+			error = gfs2_block_zero_range(inode, offset, len);
+			if (error)
+				goto out;
+			if (start_off + length < blocksize)
+				end_off = 0;
+		}
+		if (end_off) {
+			error = gfs2_block_zero_range(inode,
+				offset + length - end_off, end_off);
+			if (error)
+				goto out;
+		}
+	}
+
+	if (gfs2_is_jdata(ip)) {
+		BUG_ON(!current->journal_info);
+		gfs2_journaled_truncate_range(inode, offset, length);
+	} else
+		truncate_pagecache_range(inode, offset, offset + length - 1);
+
+	file_update_time(file);
+	mark_inode_dirty(inode);
+
+	if (current->journal_info)
+		gfs2_trans_end(sdp);
+
+	if (!gfs2_is_stuffed(ip))
+		error = punch_hole(ip, offset, length);
+
+out:
+	if (current->journal_info)
+		gfs2_trans_end(sdp);
+	return error;
+}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 81ded5e2aaa2..c3402fe00653 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -10,6 +10,8 @@
 #ifndef __BMAP_DOT_H__
 #define __BMAP_DOT_H__
 
+#include <linux/iomap.h>
+
 #include "inode.h"
 
 struct inode;
@@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
+extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+			    unsigned flags, struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
 			   u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);
@@ -57,5 +61,6 @@ extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
 				     unsigned int len);
 extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd);
 extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd);
+extern int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length);
 
 #endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 06a0d1947c77..d9fb0ad6cc30 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -170,8 +170,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 	if (!size)
 		return 0;
 
-	if (gfs2_is_stuffed(ip) &&
-	    offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
+	if (gfs2_is_stuffed(ip) && offset + size <= gfs2_max_stuffed_size(ip))
 		return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
 					      size);
 
@@ -1941,7 +1940,6 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
-	int error;
 
 	dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
 	if (!dent) {
@@ -1954,18 +1952,10 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 	gfs2_trans_add_meta(dip->i_gl, bh);
 	gfs2_inum_out(nip, dent);
 	dent->de_type = cpu_to_be16(new_type);
-
-	if (dip->i_diskflags & GFS2_DIF_EXHASH) {
-		brelse(bh);
-		error = gfs2_meta_inode_buffer(dip, &bh);
-		if (error)
-			return error;
-		gfs2_trans_add_meta(dip->i_gl, bh);
-	}
+	brelse(bh);
 
 	dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
-	gfs2_dinode_out(dip, bh->b_data);
-	brelse(bh);
+	mark_inode_dirty_sync(&dip->i_inode);
 	return 0;
 }
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 33a0cb5701a3..4b71f021a9e2 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -60,9 +60,7 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
 	loff_t error;
 
 	switch (whence) {
-	case SEEK_END: /* These reference inode->i_size */
-	case SEEK_DATA:
-	case SEEK_HOLE:
+	case SEEK_END:
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
@@ -70,8 +68,21 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 		break;
+
+	case SEEK_DATA:
+		error = gfs2_seek_data(file, offset);
+		break;
+
+	case SEEK_HOLE:
+		error = gfs2_seek_hole(file, offset);
+		break;
+
 	case SEEK_CUR:
 	case SEEK_SET:
+		/*
+		 * These don't reference inode->i_size and don't depend on the
+		 * block mapping, so we don't need the glock.
+		 */
 		error = generic_file_llseek(file, offset, whence);
 		break;
 	default:
@@ -108,45 +119,22 @@ static int gfs2_readdir(struct file *file, struct dir_context *ctx)
 }
 
 /**
- * fsflags_cvt
- * @table: A table of 32 u32 flags
- * @val: a 32 bit value to convert
- *
- * This function can be used to convert between fsflags values and
- * GFS2's own flags values.
+ * fsflag_gfs2flag
  *
- * Returns: the converted flags
+ * The FS_JOURNAL_DATA_FL flag maps to GFS2_DIF_INHERIT_JDATA for directories,
+ * and to GFS2_DIF_JDATA for non-directories.
  */
-static u32 fsflags_cvt(const u32 *table, u32 val)
-{
-	u32 res = 0;
-	while(val) {
-		if (val & 1)
-			res |= *table;
-		table++;
-		val >>= 1;
-	}
-	return res;
-}
-
-static const u32 fsflags_to_gfs2[32] = {
-	[3] = GFS2_DIF_SYNC,
-	[4] = GFS2_DIF_IMMUTABLE,
-	[5] = GFS2_DIF_APPENDONLY,
-	[7] = GFS2_DIF_NOATIME,
-	[12] = GFS2_DIF_EXHASH,
-	[14] = GFS2_DIF_INHERIT_JDATA,
-	[17] = GFS2_DIF_TOPDIR,
-};
-
-static const u32 gfs2_to_fsflags[32] = {
-	[gfs2fl_Sync] = FS_SYNC_FL,
-	[gfs2fl_Immutable] = FS_IMMUTABLE_FL,
-	[gfs2fl_AppendOnly] = FS_APPEND_FL,
-	[gfs2fl_NoAtime] = FS_NOATIME_FL,
-	[gfs2fl_ExHash] = FS_INDEX_FL,
-	[gfs2fl_TopLevel] = FS_TOPDIR_FL,
-	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
+static struct {
+	u32 fsflag;
+	u32 gfsflag;
+} fsflag_gfs2flag[] = {
+	{FS_SYNC_FL, GFS2_DIF_SYNC},
+	{FS_IMMUTABLE_FL, GFS2_DIF_IMMUTABLE},
+	{FS_APPEND_FL, GFS2_DIF_APPENDONLY},
+	{FS_NOATIME_FL, GFS2_DIF_NOATIME},
+	{FS_INDEX_FL, GFS2_DIF_EXHASH},
+	{FS_TOPDIR_FL, GFS2_DIF_TOPDIR},
+	{FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA},
 };
 
 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
@@ -154,17 +142,23 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 	struct inode *inode = file_inode(filp);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
-	int error;
-	u32 fsflags;
+	int i, error;
+	u32 gfsflags, fsflags = 0;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 	error = gfs2_glock_nq(&gh);
 	if (error)
 		goto out_uninit;
 
-	fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_diskflags);
-	if (!S_ISDIR(inode->i_mode) && ip->i_diskflags & GFS2_DIF_JDATA)
-		fsflags |= FS_JOURNAL_DATA_FL;
+	gfsflags = ip->i_diskflags;
+	if (S_ISDIR(inode->i_mode))
+		gfsflags &= ~GFS2_DIF_JDATA;
+	else
+		gfsflags &= ~GFS2_DIF_INHERIT_JDATA;
+	for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
+		if (gfsflags & fsflag_gfs2flag[i].gfsflag)
+			fsflags |= fsflag_gfs2flag[i].fsflag;
+
 	if (put_user(fsflags, ptr))
 		error = -EFAULT;
 
@@ -199,7 +193,6 @@ void gfs2_set_inode_flags(struct inode *inode)
 			     GFS2_DIF_APPENDONLY|		\
 			     GFS2_DIF_NOATIME|			\
 			     GFS2_DIF_SYNC|			\
-			     GFS2_DIF_SYSTEM|			\
 			     GFS2_DIF_TOPDIR|			\
 			     GFS2_DIF_INHERIT_JDATA)
 
@@ -238,10 +231,6 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	if ((new_flags ^ flags) == 0)
 		goto out;
 
-	error = -EINVAL;
-	if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
-		goto out;
-
 	error = -EPERM;
 	if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
 		goto out;
@@ -256,14 +245,18 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 			goto out;
 	}
 	if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
-		if (flags & GFS2_DIF_JDATA)
-			gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
+		if (new_flags & GFS2_DIF_JDATA)
+			gfs2_log_flush(sdp, ip->i_gl,
+				       GFS2_LOG_HEAD_FLUSH_NORMAL |
+				       GFS2_LFC_SET_FLAGS);
 		error = filemap_fdatawrite(inode->i_mapping);
 		if (error)
 			goto out;
 		error = filemap_fdatawait(inode->i_mapping);
 		if (error)
 			goto out;
+		if (new_flags & GFS2_DIF_JDATA)
+			gfs2_ordered_del_inode(ip);
 	}
 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
 	if (error)
@@ -271,6 +264,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	error = gfs2_meta_inode_buffer(ip, &bh);
 	if (error)
 		goto out_trans_end;
+	inode->i_ctime = current_time(inode);
 	gfs2_trans_add_meta(ip->i_gl, bh);
 	ip->i_diskflags = new_flags;
 	gfs2_dinode_out(ip, bh->b_data);
@@ -289,19 +283,33 @@ out_drop_write:
 static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 {
 	struct inode *inode = file_inode(filp);
-	u32 fsflags, gfsflags;
+	u32 fsflags, gfsflags = 0;
+	u32 mask;
+	int i;
 
 	if (get_user(fsflags, ptr))
 		return -EFAULT;
 
-	gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
-	if (!S_ISDIR(inode->i_mode)) {
-		gfsflags &= ~GFS2_DIF_TOPDIR;
-		if (gfsflags & GFS2_DIF_INHERIT_JDATA)
-			gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
-		return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_SYSTEM);
+	for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) {
+		if (fsflags & fsflag_gfs2flag[i].fsflag) {
+			fsflags &= ~fsflag_gfs2flag[i].fsflag;
+			gfsflags |= fsflag_gfs2flag[i].gfsflag;
+		}
+	}
+	if (fsflags || gfsflags & ~GFS2_FLAGS_USER_SET)
+		return -EINVAL;
+
+	mask = GFS2_FLAGS_USER_SET;
+	if (S_ISDIR(inode->i_mode)) {
+		mask &= ~GFS2_DIF_JDATA;
+	} else {
+		/* The GFS2_DIF_TOPDIR flag is only valid for directories. */
+		if (gfsflags & GFS2_DIF_TOPDIR)
+			return -EINVAL;
+		mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA);
 	}
-	return do_gfs2_set_flags(filp, gfsflags, ~(GFS2_DIF_SYSTEM | GFS2_DIF_JDATA));
+
+	return do_gfs2_set_flags(filp, gfsflags, mask);
 }
 
 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -721,11 +729,12 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			   int mode)
 {
+	struct super_block *sb = inode->i_sb;
 	struct gfs2_inode *ip = GFS2_I(inode);
+	loff_t end = offset + len;
 	struct buffer_head *dibh;
+	struct iomap iomap;
 	int error;
-	unsigned int nr_blks;
-	sector_t lblock = offset >> inode->i_blkbits;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (unlikely(error))
@@ -739,21 +748,19 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			goto out;
 	}
 
-	while (len) {
-		struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
-		bh_map.b_size = len;
-		set_buffer_zeronew(&bh_map);
-
-		error = gfs2_block_map(inode, lblock, &bh_map, 1);
-		if (unlikely(error))
+	while (offset < end) {
+		error = gfs2_iomap_begin(inode, offset, end - offset,
+					 IOMAP_WRITE, &iomap);
+		if (error)
 			goto out;
-		len -= bh_map.b_size;
-		nr_blks = bh_map.b_size >> inode->i_blkbits;
-		lblock += nr_blks;
-		if (!buffer_new(&bh_map))
+		offset = iomap.offset + iomap.length;
+		if (iomap.type != IOMAP_HOLE)
 			continue;
-		if (unlikely(!buffer_zeronew(&bh_map))) {
-			error = -EIO;
+		error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits,
+					 iomap.length >> inode->i_blkbits,
+					 GFP_NOFS);
+		if (error) {
+			fs_err(GFS2_SB(inode), "Failed to zero data buffers\n");
 			goto out;
 		}
 	}
@@ -801,7 +808,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	loff_t bytes, max_bytes, max_blks = UINT_MAX;
+	loff_t bytes, max_bytes, max_blks;
 	int error;
 	const loff_t pos = offset;
 	const loff_t count = len;
@@ -853,7 +860,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 			return error;
 		/* ap.allowed tells us how many blocks quota will allow
 		 * us to write. Check if this reduces max_blks */
-		if (ap.allowed && ap.allowed < max_blks)
+		max_blks = UINT_MAX;
+		if (ap.allowed)
 			max_blks = ap.allowed;
 
 		error = gfs2_inplace_reserve(ip, &ap);
@@ -918,7 +926,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
 	struct gfs2_holder gh;
 	int ret;
 
-	if (mode & ~FALLOC_FL_KEEP_SIZE)
+	if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))
 		return -EOPNOTSUPP;
 	/* fallocate is needed by gfs2_grow to reserve space in the rindex */
 	if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex)
@@ -942,13 +950,18 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
 	if (ret)
 		goto out_unlock;
 
-	ret = gfs2_rsqa_alloc(ip);
-	if (ret)
-		goto out_putw;
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		ret = __gfs2_punch_hole(file, offset, len);
+	} else {
+		ret = gfs2_rsqa_alloc(ip);
+		if (ret)
+			goto out_putw;
 
-	ret = __gfs2_fallocate(file, mode, offset, len);
-	if (ret)
-		gfs2_rs_deltree(&ip->i_res);
+		ret = __gfs2_fallocate(file, mode, offset, len);
+
+		if (ret)
+			gfs2_rs_deltree(&ip->i_res);
+	}
 
 out_putw:
 	put_write_access(inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 11066d8647d2..82fb5583445c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1549,16 +1549,13 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
 	rhashtable_walk_enter(&gl_hash_table, &iter);
 
 	do {
-		gl = ERR_PTR(rhashtable_walk_start(&iter));
-		if (IS_ERR(gl))
-			goto walk_stop;
+		rhashtable_walk_start(&iter);
 
 		while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
 			if (gl->gl_name.ln_sbd == sdp &&
 			    lockref_get_not_dead(&gl->gl_lockref))
 				examiner(gl);
 
-walk_stop:
 		rhashtable_walk_stop(&iter);
 	} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
 
@@ -1924,19 +1921,29 @@ void gfs2_glock_exit(void)
 	destroy_workqueue(gfs2_delete_workqueue);
 }
 
-static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
+static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
 {
-	while ((gi->gl = rhashtable_walk_next(&gi->hti))) {
-		if (IS_ERR(gi->gl)) {
-			if (PTR_ERR(gi->gl) == -EAGAIN)
-				continue;
-			gi->gl = NULL;
-			return;
+	if (n == 0)
+		gi->gl = rhashtable_walk_peek(&gi->hti);
+	else {
+		gi->gl = rhashtable_walk_next(&gi->hti);
+		n--;
+	}
+	for (;;) {
+		if (IS_ERR_OR_NULL(gi->gl)) {
+			if (!gi->gl)
+				return;
+			if (PTR_ERR(gi->gl) != -EAGAIN) {
+				gi->gl = NULL;
+				return;
+			}
+			n = 0;
+		} else if (gi->sdp == gi->gl->gl_name.ln_sbd &&
+			   !__lockref_is_dead(&gi->gl->gl_lockref)) {
+			if (!n--)
+				break;
 		}
-		/* Skip entries for other sb and dead entries */
-		if (gi->sdp == gi->gl->gl_name.ln_sbd &&
-		    !__lockref_is_dead(&gi->gl->gl_lockref))
-			return;
+		gi->gl = rhashtable_walk_next(&gi->hti);
 	}
 }
 
@@ -1944,18 +1951,24 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
 	struct gfs2_glock_iter *gi = seq->private;
-	loff_t n = *pos;
+	loff_t n;
 
-	rhashtable_walk_enter(&gl_hash_table, &gi->hti);
-	if (rhashtable_walk_start(&gi->hti) != 0)
-		return NULL;
+	/*
+	 * We can either stay where we are, skip to the next hash table
+	 * entry, or start from the beginning.
+	 */
+	if (*pos < gi->last_pos) {
+		rhashtable_walk_exit(&gi->hti);
+		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
+		n = *pos + 1;
+	} else {
+		n = *pos - gi->last_pos;
+	}
 
-	do {
-		gfs2_glock_iter_next(gi);
-	} while (gi->gl && n--);
+	rhashtable_walk_start(&gi->hti);
 
+	gfs2_glock_iter_next(gi, n);
 	gi->last_pos = *pos;
-
 	return gi->gl;
 }
 
@@ -1966,8 +1979,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 
 	(*pos)++;
 	gi->last_pos = *pos;
-	gfs2_glock_iter_next(gi);
-
+	gfs2_glock_iter_next(gi, 1);
 	return gi->gl;
 }
 
@@ -1978,7 +1990,6 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
 
 	gi->gl = NULL;
 	rhashtable_walk_stop(&gi->hti);
-	rhashtable_walk_exit(&gi->hti);
 }
 
 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2044,7 +2055,13 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
 		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
 			seq->size = GFS2_SEQ_GOODSIZE;
+		/*
+		 * Initially, we are "before" the first hash table entry; the
+		 * first call to rhashtable_walk_next gets us the first entry.
+		 */
+		gi->last_pos = -1;
 		gi->gl = NULL;
+		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
 	}
 	return ret;
 }
@@ -2060,6 +2077,7 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
 	struct gfs2_glock_iter *gi = seq->private;
 
 	gi->gl = NULL;
+	rhashtable_walk_exit(&gi->hti);
 	return seq_release_private(inode, file);
 }
 
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cdd1c5f06f45..d8782a7a1e7d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -107,7 +107,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
 
 	gfs2_trans_end(sdp);
-	gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+		       GFS2_LFC_AIL_EMPTY_GL);
 }
 
 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -128,7 +129,8 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
 		return;
 	__gfs2_ail_flush(gl, fsync, max_revokes);
 	gfs2_trans_end(sdp);
-	gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+		       GFS2_LFC_AIL_FLUSH);
 }
 
 /**
@@ -157,7 +159,8 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
 		return;
 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
 
-	gfs2_log_flush(sdp, gl, NORMAL_FLUSH);
+	gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
+		       GFS2_LFC_RGRP_GO_SYNC);
 	filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
 	error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
 	mapping_set_error(mapping, error);
@@ -252,7 +255,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
 
 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
 
-	gfs2_log_flush(gl->gl_name.ln_sbd, gl, NORMAL_FLUSH);
+	gfs2_log_flush(gl->gl_name.ln_sbd, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
+		       GFS2_LFC_INODE_GO_SYNC);
 	filemap_fdatawrite(metamapping);
 	if (isreg) {
 		struct address_space *mapping = ip->i_inode.i_mapping;
@@ -303,7 +307,9 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 	}
 
 	if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) {
-		gfs2_log_flush(gl->gl_name.ln_sbd, NULL, NORMAL_FLUSH);
+		gfs2_log_flush(gl->gl_name.ln_sbd, NULL,
+			       GFS2_LOG_HEAD_FLUSH_NORMAL |
+			       GFS2_LFC_INODE_GO_INVAL);
 		gl->gl_name.ln_sbd->sd_rindex_uptodate = 0;
 	}
 	if (ip && S_ISREG(ip->i_inode.i_mode))
@@ -495,7 +501,8 @@ static void freeze_go_sync(struct gfs2_glock *gl)
 			gfs2_assert_withdraw(sdp, 0);
 		}
 		queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
-		gfs2_log_flush(sdp, NULL, FREEZE_FLUSH);
+		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
+			       GFS2_LFC_FREEZE_GO_SYNC);
 	}
 }
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6e18e9793ec4..1b6b1e3f5caf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -44,7 +44,6 @@ struct gfs2_log_header_host {
 	u32 lh_flags;		/* GFS2_LOG_HEAD_... */
 	u32 lh_tail;		/* Block number of log tail */
 	u32 lh_blkno;
-	u32 lh_hash;
 };
 
 /*
@@ -131,15 +130,12 @@ static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
 enum gfs2_state_bits {
 	BH_Pinned = BH_PrivateStart,
 	BH_Escaped = BH_PrivateStart + 1,
-	BH_Zeronew = BH_PrivateStart + 2,
 };
 
 BUFFER_FNS(Pinned, pinned)
 TAS_BUFFER_FNS(Pinned, pinned)
 BUFFER_FNS(Escaped, escaped)
 TAS_BUFFER_FNS(Escaped, escaped)
-BUFFER_FNS(Zeronew, zeronew)
-TAS_BUFFER_FNS(Zeronew, zeronew)
 
 struct gfs2_bufdata {
 	struct buffer_head *bd_bh;
@@ -861,5 +857,10 @@ static inline void gfs2_sbstats_inc(const struct gfs2_glock *gl, int which)
 
 extern struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl);
 
+static inline unsigned gfs2_max_stuffed_size(const struct gfs2_inode *ip)
+{
+	return GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
+}
+
 #endif /* __INCORE_DOT_H__ */
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 863749e29bf9..8700eb815638 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -18,7 +18,7 @@
 #include <linux/posix_acl.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
 
@@ -189,7 +189,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 
 		gfs2_set_iop(inode);
 
-		inode->i_atime.tv_sec = 0;
+		/* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */
+		inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1);
 		inode->i_atime.tv_nsec = 0;
 
 		unlock_new_inode(inode);
@@ -1151,12 +1152,11 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 
 	error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
 	if (error)
-		goto out_end_trans;
+		goto out_gunlock;
 
 	error = gfs2_unlink_inode(dip, dentry);
-
-out_end_trans:
 	gfs2_trans_end(sdp);
+
 out_gunlock:
 	gfs2_glock_dq(ghs + 2);
 out_rgrp:
@@ -1183,11 +1183,10 @@ out_inodes:
 static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 			const char *symname)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
 	unsigned int size;
 
 	size = strlen(symname);
-	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
+	if (size >= gfs2_max_stuffed_size(GFS2_I(dir)))
 		return -ENAMETOOLONG;
 
 	return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL);
@@ -1204,8 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 
 static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(dir);
-	unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
+	unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
 	return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL);
 }
 
@@ -1328,19 +1326,11 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
 static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip,
 			    int dir_rename)
 {
-	int error;
-	struct buffer_head *dibh;
-
 	if (dir_rename)
 		return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
 	ip->i_inode.i_ctime = current_time(&ip->i_inode);
-	gfs2_trans_add_meta(ip->i_gl, dibh);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
+	mark_inode_dirty_sync(&ip->i_inode);
 	return 0;
 }
 
@@ -1986,6 +1976,7 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
 	struct inode *inode = d_inode(path->dentry);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
+	u32 gfsflags;
 	int error;
 
 	gfs2_holder_mark_uninitialized(&gh);
@@ -1995,13 +1986,30 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
 			return error;
 	}
 
+	gfsflags = ip->i_diskflags;
+	if (gfsflags & GFS2_DIF_APPENDONLY)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (gfsflags & GFS2_DIF_IMMUTABLE)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				  STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_ENCRYPTED |
+				  STATX_ATTR_IMMUTABLE |
+				  STATX_ATTR_NODUMP);
+
 	generic_fillattr(inode, stat);
+
 	if (gfs2_holder_initialized(&gh))
 		gfs2_glock_dq_uninit(&gh);
 
 	return 0;
 }
 
+const struct iomap_ops gfs2_iomap_ops = {
+	.iomap_begin = gfs2_iomap_begin,
+};
+
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
@@ -2009,41 +2017,59 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	struct gfs2_holder gh;
 	int ret;
 
-	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
-	if (ret)
-		return ret;
-
-	inode_lock(inode);
+	inode_lock_shared(inode);
 
 	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 	if (ret)
 		goto out;
 
-	if (gfs2_is_stuffed(ip)) {
-		u64 phys = ip->i_no_addr << inode->i_blkbits;
-		u64 size = i_size_read(inode);
-		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
-			    FIEMAP_EXTENT_DATA_INLINE;
-		phys += sizeof(struct gfs2_dinode);
-		phys += start;
-		if (start + len > size)
-			len = size - start;
-		if (start < size)
-			ret = fiemap_fill_next_extent(fieinfo, start, phys,
-						      len, flags);
-		if (ret == 1)
-			ret = 0;
-	} else {
-		ret = __generic_block_fiemap(inode, fieinfo, start, len,
-					     gfs2_block_map);
-	}
+	ret = iomap_fiemap(inode, fieinfo, start, len, &gfs2_iomap_ops);
 
 	gfs2_glock_dq_uninit(&gh);
+
 out:
-	inode_unlock(inode);
+	inode_unlock_shared(inode);
 	return ret;
 }
 
+loff_t gfs2_seek_data(struct file *file, loff_t offset)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	loff_t ret;
+
+	inode_lock_shared(inode);
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (!ret)
+		ret = iomap_seek_data(inode, offset, &gfs2_iomap_ops);
+	gfs2_glock_dq_uninit(&gh);
+	inode_unlock_shared(inode);
+
+	if (ret < 0)
+		return ret;
+	return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
+}
+
+loff_t gfs2_seek_hole(struct file *file, loff_t offset)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	loff_t ret;
+
+	inode_lock_shared(inode);
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (!ret)
+		ret = iomap_seek_hole(inode, offset, &gfs2_iomap_ops);
+	gfs2_glock_dq_uninit(&gh);
+	inode_unlock_shared(inode);
+
+	if (ret < 0)
+		return ret;
+	return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
+}
+
 const struct inode_operations gfs2_file_iops = {
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index aace8ce34a18..b5b6341a4f5c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -109,6 +109,8 @@ extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
 extern int gfs2_open_common(struct inode *inode, struct file *file);
+extern loff_t gfs2_seek_data(struct file *file, loff_t offset);
+extern loff_t gfs2_seek_hole(struct file *file, loff_t offset);
 
 extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 65f33a0ac190..006c6164f759 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1091,7 +1091,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
 
 	spin_lock(&ls->ls_recover_spin);
 	if (ls->ls_recover_size < jid + 1) {
-		fs_err(sdp, "recover_slot jid %d gen %u short size %d",
+		fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
 		       jid, ls->ls_recover_block, ls->ls_recover_size);
 		spin_unlock(&ls->ls_recover_spin);
 		return;
@@ -1153,7 +1153,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
 		return;
 	}
 	if (ls->ls_recover_size < jid + 1) {
-		fs_err(sdp, "recovery_result jid %d short size %d",
+		fs_err(sdp, "recovery_result jid %d short size %d\n",
 		       jid, ls->ls_recover_size);
 		spin_unlock(&ls->ls_recover_spin);
 		return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f72c44231406..0248835625f1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -14,6 +14,7 @@
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
+#include <linux/crc32c.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -72,7 +73,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
  *
  */
 
-void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
+static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 {
 	bd->bd_tr = NULL;
 	list_del_init(&bd->bd_ail_st_list);
@@ -538,9 +539,12 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
 	while (!list_empty(&sdp->sd_log_le_ordered)) {
 		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
-		list_move(&ip->i_ordered, &written);
-		if (ip->i_inode.i_mapping->nrpages == 0)
+		if (ip->i_inode.i_mapping->nrpages == 0) {
+			test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
+			list_del(&ip->i_ordered);
 			continue;
+		}
+		list_move(&ip->i_ordered, &written);
 		spin_unlock(&sdp->sd_ordered_lock);
 		filemap_fdatawrite(ip->i_inode.i_mapping);
 		spin_lock(&sdp->sd_ordered_lock);
@@ -648,49 +652,102 @@ out_of_blocks:
 }
 
 /**
- * log_write_header - Get and initialize a journal header buffer
+ * write_log_header - Write a journal log header buffer at sd_log_flush_head
  * @sdp: The GFS2 superblock
+ * @jd: journal descriptor of the journal to which we are writing
+ * @seq: sequence number
+ * @tail: tail of the log
+ * @flags: log header flags GFS2_LOG_HEAD_*
+ * @op_flags: flags to pass to the bio
  *
  * Returns: the initialized log buffer descriptor
  */
 
-static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
+void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+			   u64 seq, u32 tail, u32 flags, int op_flags)
 {
 	struct gfs2_log_header *lh;
-	unsigned int tail;
-	u32 hash;
-	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
+	u32 hash, crc;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
-	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+	struct timespec64 tv;
+	struct super_block *sb = sdp->sd_vfs;
+	u64 addr;
+
 	lh = page_address(page);
 	clear_page(lh);
 
-	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
-
-	tail = current_tail(sdp);
-
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
 	lh->lh_header.__pad0 = cpu_to_be64(0);
 	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
 	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
-	lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
+	lh->lh_sequence = cpu_to_be64(seq);
 	lh->lh_flags = cpu_to_be32(flags);
 	lh->lh_tail = cpu_to_be32(tail);
 	lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
-	hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
+	hash = ~crc32(~0, lh, LH_V1_SIZE);
 	lh->lh_hash = cpu_to_be32(hash);
 
+	tv = current_kernel_time64();
+	lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
+	lh->lh_sec = cpu_to_be64(tv.tv_sec);
+	addr = gfs2_log_bmap(sdp);
+	lh->lh_addr = cpu_to_be64(addr);
+	lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
+
+	/* We may only write local statfs, quota, etc., when writing to our
+	   own journal. The values are left 0 when recovering a journal
+	   different from our own. */
+	if (!(flags & GFS2_LOG_HEAD_RECOVERY)) {
+		lh->lh_statfs_addr =
+			cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr);
+		lh->lh_quota_addr =
+			cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr);
+
+		spin_lock(&sdp->sd_statfs_spin);
+		lh->lh_local_total = cpu_to_be64(l_sc->sc_total);
+		lh->lh_local_free = cpu_to_be64(l_sc->sc_free);
+		lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes);
+		spin_unlock(&sdp->sd_statfs_spin);
+	}
+
+	BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE);
+
+	crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
+		     sb->s_blocksize - LH_V1_SIZE - 4);
+	lh->lh_crc = cpu_to_be32(crc);
+
+	gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
+	gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
+	log_flush_wait(sdp);
+}
+
+/**
+ * log_write_header - Get and initialize a journal header buffer
+ * @sdp: The GFS2 superblock
+ * @flags: The log header flags, including log header origin
+ *
+ * Returns: the initialized log buffer descriptor
+ */
+
+static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
+{
+	unsigned int tail;
+	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+
+	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
+	tail = current_tail(sdp);
+
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
 		gfs2_ordered_wait(sdp);
 		log_flush_wait(sdp);
 		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
 	}
-
 	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
-	gfs2_log_write_page(sdp, page);
-	gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
-	log_flush_wait(sdp);
+	gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
+			      flags, op_flags);
 
 	if (sdp->sd_log_tail != tail)
 		log_pull_tail(sdp, tail);
@@ -700,11 +757,11 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
  * gfs2_log_flush - flush incore transaction(s)
  * @sdp: the filesystem
  * @gl: The glock structure to flush.  If NULL, flush the whole incore log
+ * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags
  *
  */
 
-void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
-		    enum gfs2_flush_type type)
+void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 {
 	struct gfs2_trans *tr;
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
@@ -716,9 +773,9 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		up_write(&sdp->sd_log_flush_lock);
 		return;
 	}
-	trace_gfs2_log_flush(sdp, 1);
+	trace_gfs2_log_flush(sdp, 1, flags);
 
-	if (type == SHUTDOWN_FLUSH)
+	if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
 		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
 	sdp->sd_log_flush_head = sdp->sd_log_head;
@@ -743,11 +800,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 
 	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
 		log_flush_wait(sdp);
-		log_write_header(sdp, 0);
+		log_write_header(sdp, flags);
 	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 		trace_gfs2_log_blocks(sdp, -1);
-		log_write_header(sdp, 0);
+		log_write_header(sdp, flags);
 	}
 	lops_after_commit(sdp, tr);
 
@@ -764,7 +821,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
 
-	if (type != NORMAL_FLUSH) {
+	if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
 		if (!sdp->sd_log_idle) {
 			for (;;) {
 				gfs2_ail1_start(sdp);
@@ -774,16 +831,17 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 			}
 			atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 			trace_gfs2_log_blocks(sdp, -1);
-			log_write_header(sdp, 0);
+			log_write_header(sdp, flags);
 			sdp->sd_log_head = sdp->sd_log_flush_head;
 		}
-		if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH)
+		if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
+			     GFS2_LOG_HEAD_FLUSH_FREEZE))
 			gfs2_log_shutdown(sdp);
-		if (type == FREEZE_FLUSH)
+		if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
 			atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 	}
 
-	trace_gfs2_log_flush(sdp, 0);
+	trace_gfs2_log_flush(sdp, 0, flags);
 	up_write(&sdp->sd_log_flush_lock);
 
 	kfree(tr);
@@ -879,7 +937,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 
 	sdp->sd_log_flush_head = sdp->sd_log_head;
 
-	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
+	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
 
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
 	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
@@ -935,7 +993,8 @@ int gfs2_logd(void *data)
 		did_flush = false;
 		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
 			gfs2_ail1_empty(sdp);
-			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+				       GFS2_LFC_LOGD_JFLUSH_REQD);
 			did_flush = true;
 		}
 
@@ -943,7 +1002,8 @@ int gfs2_logd(void *data)
 			gfs2_ail1_start(sdp);
 			gfs2_ail1_wait(sdp);
 			gfs2_ail1_empty(sdp);
-			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+				       GFS2_LFC_LOGD_AIL_FLUSH_REQD);
 			did_flush = true;
 		}
 
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 9499a6049212..1862e310a067 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -65,16 +65,11 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 
 extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
 extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
-enum gfs2_flush_type {
-	NORMAL_FLUSH = 0,
-	SYNC_FLUSH,
-	SHUTDOWN_FLUSH,
-	FREEZE_FLUSH
-};
+extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+				  u64 seq, u32 tail, u32 flags, int op_flags);
 extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
-			   enum gfs2_flush_type type);
+			   u32 type);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
 extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
 
 extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c8ff7b7954f0..4d6567990baf 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/list_sort.h>
 
+#include "dir.h"
 #include "gfs2.h"
 #include "incore.h"
 #include "inode.h"
@@ -138,7 +139,7 @@ static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 		sdp->sd_log_flush_head = 0;
 }
 
-static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
+u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
 {
 	unsigned int lbn = sdp->sd_log_flush_head;
 	struct gfs2_journal_extent *je;
@@ -161,7 +162,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  * @bvec: The bio_vec
  * @error: The i/o status
  *
- * This finds the relavent buffers and unlocks then and sets the
+ * This finds the relevant buffers and unlocks them and sets the
  * error flag according to the status of the i/o request. This is
  * used when the log is writing data which has an in-place version
  * that is pinned in the pagecache.
@@ -306,23 +307,22 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
 	return gfs2_log_alloc_bio(sdp, blkno);
 }
 
-
 /**
  * gfs2_log_write - write to log
  * @sdp: the filesystem
  * @page: the page to write
  * @size: the size of the data to write
  * @offset: the offset within the page 
+ * @blkno: block number of the log entry
  *
  * Try and add the page segment to the current bio. If that fails,
  * submit the current bio to the device and create a new one, and
  * then add the page segment to that.
  */
 
-static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
-			   unsigned size, unsigned offset)
+void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
+		    unsigned size, unsigned offset, u64 blkno)
 {
-	u64 blkno = gfs2_log_bmap(sdp);
 	struct bio *bio;
 	int ret;
 
@@ -348,7 +348,8 @@ static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 
 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 {
-	gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
+	gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh),
+		       gfs2_log_bmap(sdp));
 }
 
 /**
@@ -365,7 +366,8 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 {
 	struct super_block *sb = sdp->sd_vfs;
-	gfs2_log_write(sdp, page, sb->s_blocksize, 0);
+	gfs2_log_write(sdp, page, sb->s_blocksize, 0,
+		       gfs2_log_bmap(sdp));
 }
 
 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index e529f536c117..e4949394f054 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -26,6 +26,9 @@ extern const struct gfs2_log_operations gfs2_revoke_lops;
 extern const struct gfs2_log_operations gfs2_databuf_lops;
 
 extern const struct gfs2_log_operations *gfs2_log_ops[];
+extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
+extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
+			   unsigned size, unsigned offset, u64 blkno);
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
 extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 0a89e6f7a314..2d55e2c3333c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -93,7 +93,7 @@ static int __init init_gfs2_fs(void)
 
 	error = gfs2_glock_init();
 	if (error)
-		goto fail;
+		goto fail_glock;
 
 	error = -ENOMEM;
 	gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
@@ -101,7 +101,7 @@ static int __init init_gfs2_fs(void)
 					      0, 0,
 					      gfs2_init_glock_once);
 	if (!gfs2_glock_cachep)
-		goto fail;
+		goto fail_cachep1;
 
 	gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)",
 					sizeof(struct gfs2_glock) +
@@ -109,7 +109,7 @@ static int __init init_gfs2_fs(void)
 					0, 0, gfs2_init_gl_aspace_once);
 
 	if (!gfs2_glock_aspace_cachep)
-		goto fail;
+		goto fail_cachep2;
 
 	gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
 					      sizeof(struct gfs2_inode),
@@ -118,107 +118,105 @@ static int __init init_gfs2_fs(void)
 						  SLAB_ACCOUNT,
 					      gfs2_init_inode_once);
 	if (!gfs2_inode_cachep)
-		goto fail;
+		goto fail_cachep3;
 
 	gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
 						sizeof(struct gfs2_bufdata),
 					        0, 0, NULL);
 	if (!gfs2_bufdata_cachep)
-		goto fail;
+		goto fail_cachep4;
 
 	gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd",
 					      sizeof(struct gfs2_rgrpd),
 					      0, 0, NULL);
 	if (!gfs2_rgrpd_cachep)
-		goto fail;
+		goto fail_cachep5;
 
 	gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
 					       sizeof(struct gfs2_quota_data),
 					       0, 0, NULL);
 	if (!gfs2_quotad_cachep)
-		goto fail;
+		goto fail_cachep6;
 
 	gfs2_qadata_cachep = kmem_cache_create("gfs2_qadata",
 					       sizeof(struct gfs2_qadata),
 					       0, 0, NULL);
 	if (!gfs2_qadata_cachep)
-		goto fail;
+		goto fail_cachep7;
 
 	error = register_shrinker(&gfs2_qd_shrinker);
 	if (error)
-		goto fail;
+		goto fail_shrinker;
 
 	error = register_filesystem(&gfs2_fs_type);
 	if (error)
-		goto fail;
+		goto fail_fs1;
 
 	error = register_filesystem(&gfs2meta_fs_type);
 	if (error)
-		goto fail_unregister;
+		goto fail_fs2;
 
 	error = -ENOMEM;
 	gfs_recovery_wq = alloc_workqueue("gfs_recovery",
 					  WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
 	if (!gfs_recovery_wq)
-		goto fail_wq;
+		goto fail_wq1;
 
 	gfs2_control_wq = alloc_workqueue("gfs2_control",
 					  WQ_UNBOUND | WQ_FREEZABLE, 0);
 	if (!gfs2_control_wq)
-		goto fail_recovery;
+		goto fail_wq2;
 
 	gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
 
 	if (!gfs2_freeze_wq)
-		goto fail_control;
+		goto fail_wq3;
 
 	gfs2_page_pool = mempool_create_page_pool(64, 0);
 	if (!gfs2_page_pool)
-		goto fail_freeze;
+		goto fail_mempool;
 
-	gfs2_register_debugfs();
+	error = gfs2_register_debugfs();
+	if (error)
+		goto fail_debugfs;
 
 	pr_info("GFS2 installed\n");
 
 	return 0;
 
-fail_freeze:
+fail_debugfs:
+	mempool_destroy(gfs2_page_pool);
+fail_mempool:
 	destroy_workqueue(gfs2_freeze_wq);
-fail_control:
+fail_wq3:
 	destroy_workqueue(gfs2_control_wq);
-fail_recovery:
+fail_wq2:
 	destroy_workqueue(gfs_recovery_wq);
-fail_wq:
+fail_wq1:
 	unregister_filesystem(&gfs2meta_fs_type);
-fail_unregister:
+fail_fs2:
 	unregister_filesystem(&gfs2_fs_type);
-fail:
-	list_lru_destroy(&gfs2_qd_lru);
-fail_lru:
+fail_fs1:
 	unregister_shrinker(&gfs2_qd_shrinker);
+fail_shrinker:
+	kmem_cache_destroy(gfs2_qadata_cachep);
+fail_cachep7:
+	kmem_cache_destroy(gfs2_quotad_cachep);
+fail_cachep6:
+	kmem_cache_destroy(gfs2_rgrpd_cachep);
+fail_cachep5:
+	kmem_cache_destroy(gfs2_bufdata_cachep);
+fail_cachep4:
+	kmem_cache_destroy(gfs2_inode_cachep);
+fail_cachep3:
+	kmem_cache_destroy(gfs2_glock_aspace_cachep);
+fail_cachep2:
+	kmem_cache_destroy(gfs2_glock_cachep);
+fail_cachep1:
 	gfs2_glock_exit();
-
-	if (gfs2_qadata_cachep)
-		kmem_cache_destroy(gfs2_qadata_cachep);
-
-	if (gfs2_quotad_cachep)
-		kmem_cache_destroy(gfs2_quotad_cachep);
-
-	if (gfs2_rgrpd_cachep)
-		kmem_cache_destroy(gfs2_rgrpd_cachep);
-
-	if (gfs2_bufdata_cachep)
-		kmem_cache_destroy(gfs2_bufdata_cachep);
-
-	if (gfs2_inode_cachep)
-		kmem_cache_destroy(gfs2_inode_cachep);
-
-	if (gfs2_glock_aspace_cachep)
-		kmem_cache_destroy(gfs2_glock_aspace_cachep);
-
-	if (gfs2_glock_cachep)
-		kmem_cache_destroy(gfs2_glock_cachep);
-
+fail_glock:
+	list_lru_destroy(&gfs2_qd_lru);
+fail_lru:
 	gfs2_sys_uninit();
 	return error;
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a3711f543405..e6a0a8a89ea7 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1065,15 +1065,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	sdp->sd_args = *args;
 
 	if (sdp->sd_args.ar_spectator) {
-                sb->s_flags |= MS_RDONLY;
+                sb->s_flags |= SB_RDONLY;
 		set_bit(SDF_RORECOVERY, &sdp->sd_flags);
 	}
 	if (sdp->sd_args.ar_posix_acl)
-		sb->s_flags |= MS_POSIXACL;
+		sb->s_flags |= SB_POSIXACL;
 	if (sdp->sd_args.ar_nobarrier)
 		set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
 
-	sb->s_flags |= MS_NOSEC;
+	sb->s_flags |= SB_NOSEC;
 	sb->s_magic = GFS2_MAGIC;
 	sb->s_op = &gfs2_super_ops;
 	sb->s_d_op = &gfs2_dops;
@@ -1257,7 +1257,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
 	struct gfs2_args args;
 	struct gfs2_sbd *sdp;
 
-	if (!(flags & MS_RDONLY))
+	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
 	bdev = blkdev_get_by_path(dev_name, mode, fs_type);
@@ -1313,15 +1313,15 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
 
 	if (s->s_root) {
 		error = -EBUSY;
-		if ((flags ^ s->s_flags) & MS_RDONLY)
+		if ((flags ^ s->s_flags) & SB_RDONLY)
 			goto error_super;
 	} else {
 		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
 		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
+		error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);
 		if (error)
 			goto error_super;
-		s->s_flags |= MS_ACTIVE;
+		s->s_flags |= SB_ACTIVE;
 		bdev->bd_super = s;
 	}
 
@@ -1365,7 +1365,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
 		pr_warn("gfs2 mount does not exist\n");
 		return ERR_CAST(s);
 	}
-	if ((flags ^ s->s_flags) & MS_RDONLY) {
+	if ((flags ^ s->s_flags) & SB_RDONLY) {
 		deactivate_locked_super(s);
 		return ERR_PTR(-EBUSY);
 	}
@@ -1382,7 +1382,7 @@ static void gfs2_kill_sb(struct super_block *sb)
 		return;
 	}
 
-	gfs2_log_flush(sdp, NULL, SYNC_FLUSH);
+	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SYNC | GFS2_LFC_KILL_SB);
 	dput(sdp->sd_root_dir);
 	dput(sdp->sd_master_dir);
 	sdp->sd_root_dir = NULL;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e700fb162664..7a98abd340ee 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -955,7 +955,8 @@ out:
 		gfs2_glock_dq_uninit(&ghs[qx]);
 	inode_unlock(&ip->i_inode);
 	kfree(ghs);
-	gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
+	gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
+		       GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
 	return error;
 }
 
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 5e47c935a515..836f29480be6 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -45,6 +45,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	int ret;
+
+	ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 	ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 9395a3db1a60..d8b622c375ab 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -14,12 +14,15 @@
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
+#include <linux/crc32c.h>
+#include <linux/ktime.h>
 
 #include "gfs2.h"
 #include "incore.h"
 #include "bmap.h"
 #include "glock.h"
 #include "glops.h"
+#include "log.h"
 #include "lops.h"
 #include "meta_io.h"
 #include "recovery.h"
@@ -117,22 +120,6 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 	}
 }
 
-static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
-{
-	const struct gfs2_log_header *str = buf;
-
-	if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
-	    str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
-		return 1;
-
-	lh->lh_sequence = be64_to_cpu(str->lh_sequence);
-	lh->lh_flags = be32_to_cpu(str->lh_flags);
-	lh->lh_tail = be32_to_cpu(str->lh_tail);
-	lh->lh_blkno = be32_to_cpu(str->lh_blkno);
-	lh->lh_hash = be32_to_cpu(str->lh_hash);
-	return 0;
-}
-
 /**
  * get_log_header - read the log header for a given segment
  * @jd: the journal
@@ -150,29 +137,37 @@ static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 			  struct gfs2_log_header_host *head)
 {
+	struct gfs2_log_header *lh;
 	struct buffer_head *bh;
-	struct gfs2_log_header_host uninitialized_var(lh);
-	const u32 nothing = 0;
-	u32 hash;
+	u32 hash, crc;
 	int error;
 
 	error = gfs2_replay_read_block(jd, blk, &bh);
 	if (error)
 		return error;
+	lh = (void *)bh->b_data;
 
-	hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
-					     sizeof(u32));
-	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
-	hash ^= (u32)~0;
-	error = gfs2_log_header_in(&lh, bh->b_data);
-	brelse(bh);
+	hash = crc32(~0, lh, LH_V1_SIZE - 4);
+	hash = ~crc32_le_shift(hash, 4);  /* assume lh_hash is zero */
 
-	if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
-		return 1;
+	crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
+		     bh->b_size - LH_V1_SIZE - 4);
 
-	*head = lh;
+	error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+		lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
+		be32_to_cpu(lh->lh_blkno) != blk ||
+		be32_to_cpu(lh->lh_hash) != hash ||
+		(lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
 
-	return 0;
+	brelse(bh);
+
+	if (!error) {
+		head->lh_sequence = be64_to_cpu(lh->lh_sequence);
+		head->lh_flags = be32_to_cpu(lh->lh_flags);
+		head->lh_tail = be32_to_cpu(lh->lh_tail);
+		head->lh_blkno = be32_to_cpu(lh->lh_blkno);
+	}
+	return error;
 }
 
 /**
@@ -370,62 +365,22 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 
 /**
  * clean_journal - mark a dirty journal as being clean
- * @sdp: the filesystem
  * @jd: the journal
- * @gl: the journal's glock
  * @head: the head journal to start from
  *
  * Returns: errno
  */
 
-static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
+static void clean_journal(struct gfs2_jdesc *jd,
+			  struct gfs2_log_header_host *head)
 {
-	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-	unsigned int lblock;
-	struct gfs2_log_header *lh;
-	u32 hash;
-	struct buffer_head *bh;
-	int error;
-	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
 
-	lblock = head->lh_blkno;
-	gfs2_replay_incr_blk(jd, &lblock);
-	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
-	error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
-	if (error)
-		return error;
-	if (!bh_map.b_blocknr) {
-		gfs2_consist_inode(ip);
-		return -EIO;
-	}
-
-	bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
-	lock_buffer(bh);
-	memset(bh->b_data, 0, bh->b_size);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
-	unlock_buffer(bh);
-
-	lh = (struct gfs2_log_header *)bh->b_data;
-	memset(lh, 0, sizeof(struct gfs2_log_header));
-	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
-	lh->lh_header.__pad0 = cpu_to_be64(0);
-	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
-	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
-	lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
-	lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
-	lh->lh_blkno = cpu_to_be32(lblock);
-	hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
-	lh->lh_hash = cpu_to_be32(hash);
-
-	set_buffer_dirty(bh);
-	if (sync_dirty_buffer(bh))
-		gfs2_io_error_bh(sdp, bh);
-	brelse(bh);
-
-	return error;
+	sdp->sd_log_flush_head = head->lh_blkno;
+	gfs2_replay_incr_blk(jd, &sdp->sd_log_flush_head);
+	gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0,
+			      GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
+			      REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
 }
 
 
@@ -455,12 +410,13 @@ void gfs2_recover_func(struct work_struct *work)
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct gfs2_log_header_host head;
 	struct gfs2_holder j_gh, ji_gh, thaw_gh;
-	unsigned long t;
+	ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
 	int ro = 0;
 	unsigned int pass;
 	int error;
 	int jlocked = 0;
 
+	t_start = ktime_get();
 	if (sdp->sd_args.ar_spectator ||
 	    (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) {
 		fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
@@ -492,6 +448,7 @@ void gfs2_recover_func(struct work_struct *work)
 		fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
 	}
 
+	t_jlck = ktime_get();
 	fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 
 	error = gfs2_jdesc_check(jd);
@@ -501,13 +458,12 @@ void gfs2_recover_func(struct work_struct *work)
 	error = gfs2_find_jhead(jd, &head);
 	if (error)
 		goto fail_gunlock_ji;
+	t_jhd = ktime_get();
 
 	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 		fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
 			jd->jd_jid);
 
-		t = jiffies;
-
 		/* Acquire a shared hold on the freeze lock */
 
 		error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
@@ -541,6 +497,7 @@ void gfs2_recover_func(struct work_struct *work)
 			goto fail_gunlock_thaw;
 		}
 
+		t_tlck = ktime_get();
 		fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
 
 		for (pass = 0; pass < 2; pass++) {
@@ -552,14 +509,17 @@ void gfs2_recover_func(struct work_struct *work)
 				goto fail_gunlock_thaw;
 		}
 
-		error = clean_journal(jd, &head);
-		if (error)
-			goto fail_gunlock_thaw;
+		clean_journal(jd, &head);
 
 		gfs2_glock_dq_uninit(&thaw_gh);
-		t = DIV_ROUND_UP(jiffies - t, HZ);
-		fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
-			jd->jd_jid, t);
+		t_rep = ktime_get();
+		fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
+			"jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
+			jd->jd_jid, ktime_ms_delta(t_rep, t_start),
+			ktime_ms_delta(t_jlck, t_start),
+			ktime_ms_delta(t_jhd, t_jlck),
+			ktime_ms_delta(t_tlck, t_jhd),
+			ktime_ms_delta(t_rep, t_tlck));
 	}
 
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 95b2a57ded33..8b683917a27e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -34,6 +34,7 @@
 #include "log.h"
 #include "inode.h"
 #include "trace_gfs2.h"
+#include "dir.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -489,6 +490,13 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
  * @blk: The data block number
  * @exact: True if this needs to be an exact match
  *
+ * The @exact argument should be set to true by most callers. The exception
+ * is when we need to match blocks which are not represented by the rgrp
+ * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
+ * there for alignment purposes. Another way of looking at it is that @exact
+ * matches only valid data/metadata blocks, but with @exact false, it will
+ * match any block within the extent of the rgrp.
+ *
  * Returns: The resource group, or NULL if not found
  */
 
@@ -1040,17 +1048,30 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 	rgd->rd_free = be32_to_cpu(str->rg_free);
 	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
+	/* rd_data0, rd_data and rd_bitbytes already set from rindex */
 }
 
 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 {
+	struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
 	struct gfs2_rgrp *str = buf;
+	u32 crc;
 
 	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
 	str->rg_free = cpu_to_be32(rgd->rd_free);
 	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
-	str->__pad = cpu_to_be32(0);
+	if (next == NULL)
+		str->rg_skip = 0;
+	else if (next->rd_addr > rgd->rd_addr)
+		str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
 	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
+	str->rg_data0 = cpu_to_be64(rgd->rd_data0);
+	str->rg_data = cpu_to_be32(rgd->rd_data);
+	str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
+	str->rg_crc = 0;
+	crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
+	str->rg_crc = cpu_to_be32(crc);
+
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
@@ -1318,7 +1339,7 @@ start_new_extent:
 
 fail:
 	if (sdp->sd_args.ar_discard)
-		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
+		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
 	sdp->sd_args.ar_discard = 0;
 	return -EIO;
 }
@@ -2072,7 +2093,8 @@ next_rgrp:
 		}
 		/* Flushing the log may release space */
 		if (loops == 2)
-			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+				       GFS2_LFC_INPLACE_RESERVE);
 	}
 
 	return -ENOSPC;
@@ -2453,12 +2475,12 @@ void gfs2_unlink_di(struct inode *inode)
 	update_rgrp_lvb_unlinked(rgd, 1);
 }
 
-static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
+void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_rgrpd *tmp_rgd;
 
-	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
+	tmp_rgd = rgblk_free(sdp, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	if (!tmp_rgd)
 		return;
 	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
@@ -2474,12 +2496,6 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 	update_rgrp_lvb_unlinked(rgd, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
-}
-
-
-void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
-{
-	gfs2_free_uninit_di(rgd, ip->i_no_addr);
 	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 8e54f2e3a304..cf5c7f3080d2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -754,17 +754,26 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 	struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
 	struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
 	int ret = 0;
+	bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
 
-	if (wbc->sync_mode == WB_SYNC_ALL)
-		gfs2_log_flush(GFS2_SB(inode), ip->i_gl, NORMAL_FLUSH);
+	if (flush_all)
+		gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
+			       GFS2_LOG_HEAD_FLUSH_NORMAL |
+			       GFS2_LFC_WRITE_INODE);
 	if (bdi->wb.dirty_exceeded)
 		gfs2_ail1_flush(sdp, wbc);
 	else
 		filemap_fdatawrite(metamapping);
-	if (wbc->sync_mode == WB_SYNC_ALL)
+	if (flush_all)
 		ret = filemap_fdatawait(metamapping);
 	if (ret)
 		mark_inode_dirty_sync(inode);
+	else {
+		spin_lock(&inode->i_lock);
+		if (!(inode->i_flags & I_DIRTY))
+			gfs2_ordered_del_inode(ip);
+		spin_unlock(&inode->i_lock);
+	}
 	return ret;
 }
 
@@ -791,7 +800,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
 	int need_endtrans = 0;
 	int ret;
 
-	if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC)))
+	if (!(flags & I_DIRTY_INODE))
 		return;
 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 		return;
@@ -852,7 +861,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	gfs2_quota_sync(sdp->sd_vfs, 0);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
-	gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH);
+	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
+		       GFS2_LFC_MAKE_FS_RO);
 	wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
 	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
 
@@ -945,7 +955,8 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
 
 	gfs2_quota_sync(sb, -1);
 	if (wait)
-		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+			       GFS2_LFC_SYNC_FS);
 	return sdp->sd_log_error;
 }
 
@@ -1255,10 +1266,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 
 	if (sdp->sd_args.ar_spectator)
-		*flags |= MS_RDONLY;
+		*flags |= SB_RDONLY;
 
-	if ((sb->s_flags ^ *flags) & MS_RDONLY) {
-		if (*flags & MS_RDONLY)
+	if ((sb->s_flags ^ *flags) & SB_RDONLY) {
+		if (*flags & SB_RDONLY)
 			error = gfs2_make_fs_ro(sdp);
 		else
 			error = gfs2_make_fs_rw(sdp);
@@ -1268,9 +1279,9 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 
 	sdp->sd_args = args;
 	if (sdp->sd_args.ar_posix_acl)
-		sb->s_flags |= MS_POSIXACL;
+		sb->s_flags |= SB_POSIXACL;
 	else
-		sb->s_flags &= ~MS_POSIXACL;
+		sb->s_flags &= ~SB_POSIXACL;
 	if (sdp->sd_args.ar_nobarrier)
 		set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
 	else
@@ -1649,7 +1660,8 @@ alloc_failed:
 	goto out_unlock;
 
 out_truncate:
-	gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
+	gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
+		       GFS2_LFC_EVICT_INODE);
 	metamapping = gfs2_glock2aspace(ip->i_gl);
 	if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
 		filemap_fdatawrite(metamapping);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9eb9d0a1abd9..c191fa58a1df 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -112,7 +112,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 	}
 
 	if (error) {
-		fs_warn(sdp, "freeze %d error %d", n, error);
+		fs_warn(sdp, "freeze %d error %d\n", n, error);
 		return error;
 	}
 
@@ -679,7 +679,7 @@ fail_tune:
 	sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 fail_reg:
 	free_percpu(sdp->sd_lkstats);
-	fs_err(sdp, "error %d adding sysfs files", error);
+	fs_err(sdp, "error %d adding sysfs files\n", error);
 	if (sysfs_frees_sdp)
 		kobject_put(&sdp->sd_kobj);
 	else
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 49ac55da4e33..cb10b95efe0f 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM gfs2
 
@@ -12,6 +13,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/writeback.h>
 #include <linux/ktime.h>
+#include <linux/iomap.h>
 #include "incore.h"
 #include "glock.h"
 #include "rgrp.h"
@@ -351,26 +353,29 @@ TRACE_EVENT(gfs2_pin,
 /* Flushing the log */
 TRACE_EVENT(gfs2_log_flush,
 
-	TP_PROTO(const struct gfs2_sbd *sdp, int start),
+	TP_PROTO(const struct gfs2_sbd *sdp, int start, u32 flags),
 
-	TP_ARGS(sdp, start),
+	TP_ARGS(sdp, start, flags),
 
 	TP_STRUCT__entry(
 		__field(        dev_t,  dev                     )
 		__field(	int,	start			)
 		__field(	u64,	log_seq			)
+		__field(	u32,	flags			)
 	),
 
 	TP_fast_assign(
 		__entry->dev            = sdp->sd_vfs->s_dev;
 		__entry->start		= start;
 		__entry->log_seq	= sdp->sd_log_sequence;
+		__entry->flags		= flags;
 	),
 
-	TP_printk("%u,%u log flush %s %llu",
+	TP_printk("%u,%u log flush %s %llu %llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->start ? "start" : "end",
-		  (unsigned long long)__entry->log_seq)
+		  (unsigned long long)__entry->log_seq,
+		  (unsigned long long)__entry->flags)
 );
 
 /* Reserving/releasing blocks in the log */
@@ -469,6 +474,75 @@ TRACE_EVENT(gfs2_bmap,
 		  __entry->errno)
 );
 
+TRACE_EVENT(gfs2_iomap_start,
+
+	TP_PROTO(const struct gfs2_inode *ip, loff_t pos, ssize_t length,
+		 u16 flags),
+
+	TP_ARGS(ip, pos, length, flags),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	inum			)
+		__field(	loff_t, pos			)
+		__field(	ssize_t, length			)
+		__field(	u16,	flags			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
+		__entry->inum		= ip->i_no_addr;
+		__entry->pos		= pos;
+		__entry->length		= length;
+		__entry->flags		= flags;
+	),
+
+	TP_printk("%u,%u bmap %llu iomap start %llu/%lu flags:%08x",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->pos,
+		  (unsigned long)__entry->length, (u16)__entry->flags)
+);
+
+TRACE_EVENT(gfs2_iomap_end,
+
+	TP_PROTO(const struct gfs2_inode *ip, struct iomap *iomap, int ret),
+
+	TP_ARGS(ip, iomap, ret),
+
+	TP_STRUCT__entry(
+		__field(        dev_t,  dev                     )
+		__field(	u64,	inum			)
+		__field(	loff_t, offset			)
+		__field(	ssize_t, length			)
+		__field(	sector_t, pblock		)
+		__field(	u16,	flags			)
+		__field(	u16,	type			)
+		__field(	int,	ret			)
+	),
+
+	TP_fast_assign(
+		__entry->dev            = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev;
+		__entry->inum		= ip->i_no_addr;
+		__entry->offset		= iomap->offset;
+		__entry->length		= iomap->length;
+		__entry->pblock		= iomap->addr == IOMAP_NULL_ADDR ? 0 :
+					 (iomap->addr >> ip->i_inode.i_blkbits);
+		__entry->flags		= iomap->flags;
+		__entry->type		= iomap->type;
+		__entry->ret		= ret;
+	),
+
+	TP_printk("%u,%u bmap %llu iomap end %llu/%lu to %llu ty:%d flags:%08x rc:%d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->inum,
+		  (unsigned long long)__entry->offset,
+		  (unsigned long)__entry->length,
+		  (long long)__entry->pblock,
+		  (u16)__entry->type,
+		  (u16)__entry->flags, __entry->ret)
+);
+
 /* Keep track of blocks as they are allocated/freed */
 TRACE_EVENT(gfs2_block_alloc,
 
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index affef3c066e0..c75cacaa349b 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -92,7 +92,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 	s64 nbuf;
 	int alloced = test_bit(TR_ALLOCED, &tr->tr_flags);
 
-	BUG_ON(!tr);
 	current->journal_info = NULL;
 
 	if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
@@ -117,8 +116,9 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 		kfree(tr);
 	up_read(&sdp->sd_log_flush_lock);
 
-	if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
-		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
+	if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
+		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+			       GFS2_LFC_TRANS_END);
 	if (alloced)
 		sb_end_intwrite(sdp->sd_vfs);
 }
@@ -145,7 +145,7 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
  *
  * This is used in two distinct cases:
  * i) In ordered write mode
- *    We put the data buffer on a list so that we can ensure that its
+ *    We put the data buffer on a list so that we can ensure that it's
  *    synced to disk at the right time
  * ii) In journaled data mode
  *    We need to journal the data block in the same way as metadata in
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index ea09e41dbb49..f2bce1e0f6fb 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -231,7 +231,6 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder rg_gh;
-	struct buffer_head *dibh;
 	__be64 *dataptrs;
 	u64 bn = 0;
 	u64 bstart = 0;
@@ -308,13 +307,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 		ea->ea_num_ptrs = 0;
 	}
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		ip->i_inode.i_ctime = current_time(&ip->i_inode);
-		gfs2_trans_add_meta(ip->i_gl, dibh);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
-	}
+	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 	gfs2_trans_end(sdp);
 
@@ -616,7 +610,6 @@ static int gfs2_xattr_get(const struct xattr_handler *handler,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
-	bool need_unlock = false;
 	int ret;
 
 	/* During lookup, SELinux calls this function with the glock locked. */
@@ -625,10 +618,11 @@ static int gfs2_xattr_get(const struct xattr_handler *handler,
 		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
 		if (ret)
 			return ret;
-		need_unlock = true;
+	} else {
+		gfs2_holder_mark_uninitialized(&gh);
 	}
 	ret = __gfs2_xattr_get(inode, name, buffer, size, handler->flags);
-	if (need_unlock)
+	if (gfs2_holder_initialized(&gh))
 		gfs2_glock_dq_uninit(&gh);
 	return ret;
 }
@@ -749,7 +743,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 			     ea_skeleton_call_t skeleton_call, void *private)
 {
 	struct gfs2_alloc_parms ap = { .target = blks };
-	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
@@ -774,13 +767,8 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (error)
 		goto out_end_trans;
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		ip->i_inode.i_ctime = current_time(&ip->i_inode);
-		gfs2_trans_add_meta(ip->i_gl, dibh);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
-	}
+	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 out_end_trans:
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -891,7 +879,6 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 				 struct gfs2_ea_header *ea, struct ea_set *es)
 {
 	struct gfs2_ea_request *er = es->es_er;
-	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + 2 * RES_EATTR, 0);
@@ -908,14 +895,9 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (es->es_el)
 		ea_set_remove_stuffed(ip, es->es_el);
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		goto out;
 	ip->i_inode.i_ctime = current_time(&ip->i_inode);
-	gfs2_trans_add_meta(ip->i_gl, dibh);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
-out:
+	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
+
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));
 	return error;
 }
@@ -1111,7 +1093,6 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 {
 	struct gfs2_ea_header *ea = el->el_ea;
 	struct gfs2_ea_header *prev = el->el_prev;
-	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
@@ -1132,13 +1113,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 		ea->ea_type = GFS2_EATYPE_UNUSED;
 	}
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		ip->i_inode.i_ctime = current_time(&ip->i_inode);
-		gfs2_trans_add_meta(ip->i_gl, dibh);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
-	}
+	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));
 
@@ -1268,11 +1244,20 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
 	if (ret)
 		return ret;
 
-	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	if (ret)
-		return ret;
+	/* May be called from gfs_setattr with the glock locked. */
+
+	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
+		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+		if (ret)
+			return ret;
+	} else {
+		if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
+			return -EIO;
+		gfs2_holder_mark_uninitialized(&gh);
+	}
 	ret = __gfs2_xattr_set(inode, name, value, size, flags, handler->flags);
-	gfs2_glock_dq_uninit(&gh);
+	if (gfs2_holder_initialized(&gh))
+		gfs2_glock_dq_uninit(&gh);
 	return ret;
 }