From 06734e3c95a34e4d71342f0583f1bb88c61ed9b7 Mon Sep 17 00:00:00 2001 From: Keyur Patel Date: Mon, 29 Jun 2020 14:44:35 -0700 Subject: xfs: Couple of typo fixes in comments ./xfs/libxfs/xfs_inode_buf.c:56: unnecssary ==> unnecessary ./xfs/libxfs/xfs_inode_buf.c:59: behavour ==> behaviour ./xfs/libxfs/xfs_inode_buf.c:206: unitialized ==> uninitialized Signed-off-by: Keyur Patel Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_inode_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/xfs/libxfs/xfs_inode_buf.c') diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6f84ea85fdd8..5c93e8e6de74 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -53,10 +53,10 @@ xfs_inobp_check( * If the readahead buffer is invalid, we need to mark it with an error and * clear the DONE status of the buffer so that a followup read will re-read it * from disk. We don't report the error otherwise to avoid warnings during log - * recovery and we don't get unnecssary panics on debug kernels. We use EIO here + * recovery and we don't get unnecessary panics on debug kernels. We use EIO here * because all we want to do is say readahead failed; there is no-one to report * the error to, so this will distinguish it from a non-ra verifier failure. - * Changes to this readahead error behavour also need to be reflected in + * Changes to this readahead error behaviour also need to be reflected in * xfs_dquot_buf_readahead_verify(). */ static void @@ -203,7 +203,7 @@ xfs_inode_from_disk( /* * First get the permanent information that is needed to allocate an * inode. If the inode is unused, mode is zero and we shouldn't mess - * with the unitialized part of it. + * with the uninitialized part of it. */ to->di_flushiter = be16_to_cpu(from->di_flushiter); inode->i_generation = be32_to_cpu(from->di_gen); -- cgit From 298f7bec503f30bd98242ec02df6abe13b31a677 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 29 Jun 2020 14:49:15 -0700 Subject: xfs: pin inode backing buffer to the inode log item When we dirty an inode, we are going to have to write it disk at some point in the near future. This requires the inode cluster backing buffer to be present in memory. Unfortunately, under severe memory pressure we can reclaim the inode backing buffer while the inode is dirty in memory, resulting in stalling the AIL pushing because it has to do a read-modify-write cycle on the cluster buffer. When we have no memory available, the read of the cluster buffer blocks the AIL pushing process, and this causes all sorts of issues for memory reclaim as it requires inode writeback to make forwards progress. Allocating a cluster buffer causes more memory pressure, and results in more cluster buffers to be reclaimed, resulting in more RMW cycles to be done in the AIL context and everything then backs up on AIL progress. Only the synchronous inode cluster writeback in the the inode reclaim code provides some level of forwards progress guarantees that prevent OOM-killer rampages in this situation. Fix this by pinning the inode backing buffer to the inode log item when the inode is first dirtied (i.e. in xfs_trans_log_inode()). This may mean the first modification of an inode that has been held in cache for a long time may block on a cluster buffer read, but we can do that in transaction context and block safely until the buffer has been allocated and read. Once we have the cluster buffer, the inode log item takes a reference to it, pinning it in memory, and attaches it to the log item for future reference. This means we can always grab the cluster buffer from the inode log item when we need it. When the inode is finally cleaned and removed from the AIL, we can drop the reference the inode log item holds on the cluster buffer. Once all inodes on the cluster buffer are clean, the cluster buffer will be unpinned and it will be available for memory reclaim to reclaim again. This avoids the issues with needing to do RMW cycles in the AIL pushing context, and hence allows complete non-blocking inode flushing to be performed by the AIL pushing context. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_inode_buf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/xfs/libxfs/xfs_inode_buf.c') diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 5c93e8e6de74..b4a6c091571e 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -176,7 +176,8 @@ xfs_imap_to_bp( } *bpp = bp; - *dipp = xfs_buf_offset(bp, imap->im_boffset); + if (dipp) + *dipp = xfs_buf_offset(bp, imap->im_boffset); return 0; } -- cgit From e2705b0304778916db87831217ec642e34d9d9fa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 29 Jun 2020 14:49:20 -0700 Subject: xfs: remove xfs_inobp_check() This debug code is called on every xfs_iflush() call, which then checks every inode in the buffer for non-zero unlinked list field. Hence it checks every inode in the cluster buffer every time a single inode on that cluster it flushed. This is resulting in: - 38.91% 5.33% [kernel] [k] xfs_iflush - 17.70% xfs_iflush - 9.93% xfs_inobp_check 4.36% xfs_buf_offset 10% of the CPU time spent flushing inodes is repeatedly checking unlinked fields in the buffer. We don't need to do this. The other place we call xfs_inobp_check() is xfs_iunlink_update_dinode(), and this is after we've done this assert for the agino we are about to write into that inode: ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); which means we've already checked that the agino we are about to write is not 0 on debug kernels. The inode buffer verifiers do everything else we need, so let's just remove this debug code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_inode_buf.c | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'fs/xfs/libxfs/xfs_inode_buf.c') diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index b4a6c091571e..8d5dd08eab75 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -20,30 +20,6 @@ #include -/* - * Check that none of the inode's in the buffer have a next - * unlinked field of 0. - */ -#if defined(DEBUG) -void -xfs_inobp_check( - xfs_mount_t *mp, - xfs_buf_t *bp) -{ - int i; - xfs_dinode_t *dip; - - for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { - dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); - if (!dip->di_next_unlinked) { - xfs_alert(mp, - "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", - i, (long long)bp->b_bn); - } - } -} -#endif - /* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence -- cgit