From a4454cd69c66bf3e3bbda352b049732f836fc6b2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:36:40 +1000 Subject: xfs: factor the xfs_iunlink functions Prep work that separates the locking that protects the unlinked list from the actual operations being performed. This also helps document the fact they are performing list insert and remove operations. No functional code change. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 108 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 42 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 482e1ee2d669..69bca88fc8ed 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2115,39 +2115,20 @@ out: return error; } -/* - * This is called when the inode's link count has gone to 0 or we are creating - * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. - * - * We place the on-disk inode on a list in the AGI. It will be pulled from this - * list when the inode is freed. - */ -STATIC int -xfs_iunlink( +static int +xfs_iunlink_insert_inode( struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agibp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; - struct xfs_agi *agi; - struct xfs_buf *agibp; + struct xfs_agi *agi = agibp->b_addr; xfs_agino_t next_agino; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; - ASSERT(VFS_I(ip)->i_nlink == 0); - ASSERT(VFS_I(ip)->i_mode != 0); - trace_xfs_iunlink(ip); - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(pag, tp, &agibp); - if (error) - goto out; - agi = agibp->b_addr; - /* * Get the index into the agi hash table for the list this inode will * go on. Make sure the pointer isn't garbage and that this inode @@ -2157,8 +2138,7 @@ xfs_iunlink( if (next_agino == agino || !xfs_verify_agino_or_null(pag, next_agino)) { xfs_buf_mark_corrupt(agibp); - error = -EFSCORRUPTED; - goto out; + return -EFSCORRUPTED; } if (next_agino != NULLAGINO) { @@ -2171,7 +2151,7 @@ xfs_iunlink( error = xfs_iunlink_update_inode(tp, ip, pag, next_agino, &old_agino); if (error) - goto out; + return error; ASSERT(old_agino == NULLAGINO); /* @@ -2180,11 +2160,42 @@ xfs_iunlink( */ error = xfs_iunlink_add_backref(pag, agino, next_agino); if (error) - goto out; + return error; } /* Point the head of the list to point to this inode. */ - error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); + return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); +} + +/* + * This is called when the inode's link count has gone to 0 or we are creating + * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. + * + * We place the on-disk inode on a list in the AGI. It will be pulled from this + * list when the inode is freed. + */ +STATIC int +xfs_iunlink( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_perag *pag; + struct xfs_buf *agibp; + int error; + + ASSERT(VFS_I(ip)->i_nlink == 0); + ASSERT(VFS_I(ip)->i_mode != 0); + trace_xfs_iunlink(ip); + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = xfs_read_agi(pag, tp, &agibp); + if (error) + goto out; + + error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); out: xfs_perag_put(pag); return error; @@ -2305,18 +2316,15 @@ xfs_iunlink_map_prev( return 0; } -/* - * Pull the on-disk inode from the AGI unlinked list. - */ -STATIC int -xfs_iunlink_remove( +static int +xfs_iunlink_remove_inode( struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_buf *agibp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi; - struct xfs_buf *agibp; + struct xfs_agi *agi = agibp->b_addr; struct xfs_buf *last_ibp; struct xfs_dinode *last_dip = NULL; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); @@ -2327,12 +2335,6 @@ xfs_iunlink_remove( trace_xfs_iunlink_remove(ip); - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(pag, tp, &agibp); - if (error) - return error; - agi = agibp->b_addr; - /* * Get the index into the agi hash table for the list this inode will * go on. Make sure the head pointer isn't garbage. @@ -2397,6 +2399,28 @@ xfs_iunlink_remove( next_agino); } +/* + * Pull the on-disk inode from the AGI unlinked list. + */ +STATIC int +xfs_iunlink_remove( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + struct xfs_buf *agibp; + int error; + + trace_xfs_iunlink_remove(ip); + + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = xfs_read_agi(pag, tp, &agibp); + if (error) + return error; + + return xfs_iunlink_remove_inode(tp, pag, agibp, ip); +} + /* * Look up the inode number specified and if it is not already marked XFS_ISTALE * mark it stale. We should only find clean inodes in this lookup that aren't -- cgit From 4fcc94d653270fcc7800dbaf3b11f78cb462b293 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:38:54 +1000 Subject: xfs: track the iunlink list pointer in the xfs_inode Having direct access to the i_next_unlinked pointer in unlinked inodes greatly simplifies the processing of inodes on the unlinked list. We no longer need to look up the inode buffer just to find next inode in the list if the xfs_inode is in memory. These improvements will be realised over upcoming patches as other dependencies on the inode buffer for unlinked list processing are removed. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 69bca88fc8ed..4055fb4aa968 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2084,7 +2084,8 @@ xfs_iunlink_update_inode( /* Make sure the old pointer isn't garbage. */ old_value = be32_to_cpu(dip->di_next_unlinked); - if (!xfs_verify_agino_or_null(pag, old_value)) { + if (old_value != ip->i_next_unlinked || + !xfs_verify_agino_or_null(pag, old_value)) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); error = -EFSCORRUPTED; @@ -2153,6 +2154,7 @@ xfs_iunlink_insert_inode( if (error) return error; ASSERT(old_agino == NULLAGINO); + ip->i_next_unlinked = next_agino; /* * agino has been unlinked, add a backref from the next inode @@ -2354,6 +2356,7 @@ xfs_iunlink_remove_inode( error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino); if (error) return error; + ip->i_next_unlinked = NULLAGINO; /* * If there was a backref pointing from the next inode back to this -- cgit From a83d5a8b1d946264e24299d6697bb03fe5198668 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:43:09 +1000 Subject: xfs: introduce xfs_iunlink_lookup When an inode is on an unlinked list during normal operation, it is guaranteed to be pinned in memory as it is either referenced by the current unlink operation or it has a open file descriptor that references it and has it pinned in memory. Hence to look up an inode on the unlinked list, we can do a direct inode cache lookup and always expect the lookup to succeed. Add a function to do this lookup based on the agino that we use to link the chain of unlinked inodes together so we can begin the conversion the unlinked list manipulations to use in-memory inodes rather than inode cluster buffers and remove the backref cache. Use this lookup function to replace the on-disk inode buffer walk when removing inodes from the unlinked list with an in-core inode unlinked list walk. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 161 ++++++++++++++++++++++------------------------------- 1 file changed, 66 insertions(+), 95 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4055fb4aa968..7153e3cc2627 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1992,6 +1992,35 @@ xfs_iunlink_destroy( ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount)); } +/* + * Find an inode on the unlinked list. This does not take references to the + * inode as we have existence guarantees by holding the AGI buffer lock and that + * only unlinked, referenced inodes can be on the unlinked inode list. If we + * don't find the inode in cache, then let the caller handle the situation. + */ +static struct xfs_inode * +xfs_iunlink_lookup( + struct xfs_perag *pag, + xfs_agino_t agino) +{ + struct xfs_inode *ip; + + rcu_read_lock(); + ip = radix_tree_lookup(&pag->pag_ici_root, agino); + + /* + * Inode not in memory or in RCU freeing limbo should not happen. + * Warn about this and let the caller handle the failure. + */ + if (WARN_ON_ONCE(!ip || !ip->i_ino)) { + rcu_read_unlock(); + return NULL; + } + ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)); + rcu_read_unlock(); + return ip; +} + /* * Point the AGI unlinked bucket at an inode and log the results. The caller * is responsible for validating the old value. @@ -2097,7 +2126,8 @@ xfs_iunlink_update_inode( * current pointer is the same as the new value, unless we're * terminating the list. */ - *old_next_agino = old_value; + if (old_next_agino) + *old_next_agino = old_value; if (old_value == next_agino) { if (next_agino != NULLAGINO) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, @@ -2203,38 +2233,6 @@ out: return error; } -/* Return the imap, dinode pointer, and buffer for an inode. */ -STATIC int -xfs_iunlink_map_ino( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agino_t agino, - struct xfs_imap *imap, - struct xfs_dinode **dipp, - struct xfs_buf **bpp) -{ - struct xfs_mount *mp = tp->t_mountp; - int error; - - imap->im_blkno = 0; - error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0); - if (error) { - xfs_warn(mp, "%s: xfs_imap returned error %d.", - __func__, error); - return error; - } - - error = xfs_imap_to_bp(mp, tp, imap, bpp); - if (error) { - xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", - __func__, error); - return error; - } - - *dipp = xfs_buf_offset(*bpp, imap->im_boffset); - return 0; -} - /* * Walk the unlinked chain from @head_agino until we find the inode that * points to @target_agino. Return the inode number, map, dinode pointer, @@ -2245,77 +2243,49 @@ xfs_iunlink_map_ino( * * Do not call this function if @target_agino is the head of the list. */ -STATIC int -xfs_iunlink_map_prev( - struct xfs_trans *tp, +static int +xfs_iunlink_lookup_prev( struct xfs_perag *pag, xfs_agino_t head_agino, xfs_agino_t target_agino, - xfs_agino_t *agino, - struct xfs_imap *imap, - struct xfs_dinode **dipp, - struct xfs_buf **bpp) + struct xfs_inode **ipp) { - struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; xfs_agino_t next_agino; - int error; - - ASSERT(head_agino != target_agino); - *bpp = NULL; - /* See if our backref cache can find it faster. */ - *agino = xfs_iunlink_lookup_backref(pag, target_agino); - if (*agino != NULLAGINO) { - error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap, - dipp, bpp); - if (error) - return error; + *ipp = NULL; - if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino) + next_agino = xfs_iunlink_lookup_backref(pag, target_agino); + if (next_agino != NULLAGINO) { + ip = xfs_iunlink_lookup(pag, next_agino); + if (ip && ip->i_next_unlinked == target_agino) { + *ipp = ip; return 0; - - /* - * If we get here the cache contents were corrupt, so drop the - * buffer and fall back to walking the bucket list. - */ - xfs_trans_brelse(tp, *bpp); - *bpp = NULL; - WARN_ON_ONCE(1); + } } - trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno); - /* Otherwise, walk the entire bucket until we find it. */ next_agino = head_agino; - while (next_agino != target_agino) { - xfs_agino_t unlinked_agino; - - if (*bpp) - xfs_trans_brelse(tp, *bpp); + while (next_agino != NULLAGINO) { + ip = xfs_iunlink_lookup(pag, next_agino); + if (!ip) + return -EFSCORRUPTED; - *agino = next_agino; - error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap, - dipp, bpp); - if (error) - return error; - - unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked); /* * Make sure this pointer is valid and isn't an obvious * infinite loop. */ - if (!xfs_verify_agino(pag, unlinked_agino) || - next_agino == unlinked_agino) { - XFS_CORRUPTION_ERROR(__func__, - XFS_ERRLEVEL_LOW, mp, - *dipp, sizeof(**dipp)); - error = -EFSCORRUPTED; - return error; + if (!xfs_verify_agino(pag, ip->i_next_unlinked) || + next_agino == ip->i_next_unlinked) + return -EFSCORRUPTED; + + if (ip->i_next_unlinked == target_agino) { + *ipp = ip; + return 0; } - next_agino = unlinked_agino; + next_agino = ip->i_next_unlinked; } - - return 0; + return -EFSCORRUPTED; } static int @@ -2327,8 +2297,6 @@ xfs_iunlink_remove_inode( { struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agibp->b_addr; - struct xfs_buf *last_ibp; - struct xfs_dinode *last_dip = NULL; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); xfs_agino_t next_agino; xfs_agino_t head_agino; @@ -2356,7 +2324,6 @@ xfs_iunlink_remove_inode( error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino); if (error) return error; - ip->i_next_unlinked = NULLAGINO; /* * If there was a backref pointing from the next inode back to this @@ -2372,18 +2339,21 @@ xfs_iunlink_remove_inode( } if (head_agino != agino) { - struct xfs_imap imap; - xfs_agino_t prev_agino; + struct xfs_inode *prev_ip; - /* We need to search the list for the inode being freed. */ - error = xfs_iunlink_map_prev(tp, pag, head_agino, agino, - &prev_agino, &imap, &last_dip, &last_ibp); + error = xfs_iunlink_lookup_prev(pag, head_agino, agino, + &prev_ip); if (error) return error; /* Point the previous inode on the list to the next inode. */ - xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp, - last_dip, &imap, next_agino); + error = xfs_iunlink_update_inode(tp, prev_ip, pag, next_agino, + NULL); + if (error) + return error; + + prev_ip->i_next_unlinked = ip->i_next_unlinked; + ip->i_next_unlinked = NULLAGINO; /* * Now we deal with the backref for this inode. If this inode @@ -2398,6 +2368,7 @@ xfs_iunlink_remove_inode( } /* Point the head of the list to the next unlinked inode. */ + ip->i_next_unlinked = NULLAGINO; return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, next_agino); } -- cgit From 2fd26cc07e9f8050e29bf314cbf1bcb64dbe088c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:46:43 +1000 Subject: xfs: double link the unlinked inode list Now we have forwards traversal via the incore inode in place, we now need to add back pointers to the incore inode to entirely replace the back reference cache. We use the same lookup semantics and constraints as for the forwards pointer lookups during unlinks, and so we can look up any inode in the unlinked list directly and update the list pointers, forwards or backwards, at any time. The only wrinkle in converting the unlinked list manipulations to use in-core previous pointers is that log recovery doesn't have the incore inode state built up so it can't just read in an inode and release it to finish off the unlink. Hence we need to modify the traversal in recovery to read one inode ahead before we release the inode at the head of the list. This populates the next->prev relationship sufficient to be able to replay the unlinked list and hence greatly simplify the runtime code. This recovery algorithm also requires that we actually remove inodes from the unlinked list one at a time as background inode inactivation will result in unlinked list removal racing with the building of the in-memory unlinked list state. We could serialise this by holding the AGI buffer lock when constructing the in memory state, but all that does is lockstep background processing with list building. It is much simpler to flush the inodegc immediately after releasing the inode so that it is unlinked immediately and there is no races present at all. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 344 +++++++++-------------------------------------------- 1 file changed, 58 insertions(+), 286 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7153e3cc2627..63af7680cf73 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1801,197 +1801,22 @@ out: * because we must walk that list to find the inode that points to the inode * being removed from the unlinked hash bucket list. * - * What if we modelled the unlinked list as a collection of records capturing - * "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd - * have a fast way to look up unlinked list predecessors, which avoids the - * slow list walk. That's exactly what we do here (in-core) with a per-AG - * rhashtable. + * Hence we keep an in-memory double linked list to link each inode on an + * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer + * based lists would require having 64 list heads in the perag, one for each + * list. This is expensive in terms of memory (think millions of AGs) and cache + * misses on lookups. Instead, use the fact that inodes on the unlinked list + * must be referenced at the VFS level to keep them on the list and hence we + * have an existence guarantee for inodes on the unlinked list. * - * Because this is a backref cache, we ignore operational failures since the - * iunlink code can fall back to the slow bucket walk. The only errors that - * should bubble out are for obviously incorrect situations. - * - * All users of the backref cache MUST hold the AGI buffer lock to serialize - * access or have otherwise provided for concurrency control. + * Given we have an existence guarantee, we can use lockless inode cache lookups + * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode + * for the double linked unlinked list, and we don't need any extra locking to + * keep the list safe as all manipulations are done under the AGI buffer lock. + * Keeping the list up to date does not require memory allocation, just finding + * the XFS inode and updating the next/prev unlinked list aginos. */ -/* Capture a "X.next_unlinked = Y" relationship. */ -struct xfs_iunlink { - struct rhash_head iu_rhash_head; - xfs_agino_t iu_agino; /* X */ - xfs_agino_t iu_next_unlinked; /* Y */ -}; - -/* Unlinked list predecessor lookup hashtable construction */ -static int -xfs_iunlink_obj_cmpfn( - struct rhashtable_compare_arg *arg, - const void *obj) -{ - const xfs_agino_t *key = arg->key; - const struct xfs_iunlink *iu = obj; - - if (iu->iu_next_unlinked != *key) - return 1; - return 0; -} - -static const struct rhashtable_params xfs_iunlink_hash_params = { - .min_size = XFS_AGI_UNLINKED_BUCKETS, - .key_len = sizeof(xfs_agino_t), - .key_offset = offsetof(struct xfs_iunlink, - iu_next_unlinked), - .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head), - .automatic_shrinking = true, - .obj_cmpfn = xfs_iunlink_obj_cmpfn, -}; - -/* - * Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such - * relation is found. - */ -static xfs_agino_t -xfs_iunlink_lookup_backref( - struct xfs_perag *pag, - xfs_agino_t agino) -{ - struct xfs_iunlink *iu; - - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, - xfs_iunlink_hash_params); - return iu ? iu->iu_agino : NULLAGINO; -} - -/* - * Take ownership of an iunlink cache entry and insert it into the hash table. - * If successful, the entry will be owned by the cache; if not, it is freed. - * Either way, the caller does not own @iu after this call. - */ -static int -xfs_iunlink_insert_backref( - struct xfs_perag *pag, - struct xfs_iunlink *iu) -{ - int error; - - error = rhashtable_insert_fast(&pag->pagi_unlinked_hash, - &iu->iu_rhash_head, xfs_iunlink_hash_params); - /* - * Fail loudly if there already was an entry because that's a sign of - * corruption of in-memory data. Also fail loudly if we see an error - * code we didn't anticipate from the rhashtable code. Currently we - * only anticipate ENOMEM. - */ - if (error) { - WARN(error != -ENOMEM, "iunlink cache insert error %d", error); - kmem_free(iu); - } - /* - * Absorb any runtime errors that aren't a result of corruption because - * this is a cache and we can always fall back to bucket list scanning. - */ - if (error != 0 && error != -EEXIST) - error = 0; - return error; -} - -/* Remember that @prev_agino.next_unlinked = @this_agino. */ -static int -xfs_iunlink_add_backref( - struct xfs_perag *pag, - xfs_agino_t prev_agino, - xfs_agino_t this_agino) -{ - struct xfs_iunlink *iu; - - if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK)) - return 0; - - iu = kmem_zalloc(sizeof(*iu), KM_NOFS); - iu->iu_agino = prev_agino; - iu->iu_next_unlinked = this_agino; - - return xfs_iunlink_insert_backref(pag, iu); -} - -/* - * Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked. - * If @next_unlinked is NULLAGINO, we drop the backref and exit. If there - * wasn't any such entry then we don't bother. - */ -static int -xfs_iunlink_change_backref( - struct xfs_perag *pag, - xfs_agino_t agino, - xfs_agino_t next_unlinked) -{ - struct xfs_iunlink *iu; - int error; - - /* Look up the old entry; if there wasn't one then exit. */ - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, - xfs_iunlink_hash_params); - if (!iu) - return 0; - - /* - * Remove the entry. This shouldn't ever return an error, but if we - * couldn't remove the old entry we don't want to add it again to the - * hash table, and if the entry disappeared on us then someone's - * violated the locking rules and we need to fail loudly. Either way - * we cannot remove the inode because internal state is or would have - * been corrupt. - */ - error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, - &iu->iu_rhash_head, xfs_iunlink_hash_params); - if (error) - return error; - - /* If there is no new next entry just free our item and return. */ - if (next_unlinked == NULLAGINO) { - kmem_free(iu); - return 0; - } - - /* Update the entry and re-add it to the hash table. */ - iu->iu_next_unlinked = next_unlinked; - return xfs_iunlink_insert_backref(pag, iu); -} - -/* Set up the in-core predecessor structures. */ -int -xfs_iunlink_init( - struct xfs_perag *pag) -{ - return rhashtable_init(&pag->pagi_unlinked_hash, - &xfs_iunlink_hash_params); -} - -/* Free the in-core predecessor structures. */ -static void -xfs_iunlink_free_item( - void *ptr, - void *arg) -{ - struct xfs_iunlink *iu = ptr; - bool *freed_anything = arg; - - *freed_anything = true; - kmem_free(iu); -} - -void -xfs_iunlink_destroy( - struct xfs_perag *pag) -{ - bool freed_anything = false; - - rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, - xfs_iunlink_free_item, &freed_anything); - - ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount)); -} - /* * Find an inode on the unlinked list. This does not take references to the * inode as we have existence guarantees by holding the AGI buffer lock and that @@ -2021,6 +1846,26 @@ xfs_iunlink_lookup( return ip; } +/* Update the prev pointer of the next agino. */ +static int +xfs_iunlink_update_backref( + struct xfs_perag *pag, + xfs_agino_t prev_agino, + xfs_agino_t next_agino) +{ + struct xfs_inode *ip; + + /* No update necessary if we are at the end of the list. */ + if (next_agino == NULLAGINO) + return 0; + + ip = xfs_iunlink_lookup(pag, next_agino); + if (!ip) + return -EFSCORRUPTED; + ip->i_prev_unlinked = prev_agino; + return 0; +} + /* * Point the AGI unlinked bucket at an inode and log the results. The caller * is responsible for validating the old value. @@ -2172,6 +2017,14 @@ xfs_iunlink_insert_inode( return -EFSCORRUPTED; } + /* + * Update the prev pointer in the next inode to point back to this + * inode. + */ + error = xfs_iunlink_update_backref(pag, agino, next_agino); + if (error) + return error; + if (next_agino != NULLAGINO) { xfs_agino_t old_agino; @@ -2185,14 +2038,6 @@ xfs_iunlink_insert_inode( return error; ASSERT(old_agino == NULLAGINO); ip->i_next_unlinked = next_agino; - - /* - * agino has been unlinked, add a backref from the next inode - * back to agino. - */ - error = xfs_iunlink_add_backref(pag, agino, next_agino); - if (error) - return error; } /* Point the head of the list to point to this inode. */ @@ -2233,61 +2078,6 @@ out: return error; } -/* - * Walk the unlinked chain from @head_agino until we find the inode that - * points to @target_agino. Return the inode number, map, dinode pointer, - * and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp. - * - * @tp, @pag, @head_agino, and @target_agino are input parameters. - * @agino, @imap, @dipp, and @bpp are all output parameters. - * - * Do not call this function if @target_agino is the head of the list. - */ -static int -xfs_iunlink_lookup_prev( - struct xfs_perag *pag, - xfs_agino_t head_agino, - xfs_agino_t target_agino, - struct xfs_inode **ipp) -{ - struct xfs_inode *ip; - xfs_agino_t next_agino; - - *ipp = NULL; - - next_agino = xfs_iunlink_lookup_backref(pag, target_agino); - if (next_agino != NULLAGINO) { - ip = xfs_iunlink_lookup(pag, next_agino); - if (ip && ip->i_next_unlinked == target_agino) { - *ipp = ip; - return 0; - } - } - - /* Otherwise, walk the entire bucket until we find it. */ - next_agino = head_agino; - while (next_agino != NULLAGINO) { - ip = xfs_iunlink_lookup(pag, next_agino); - if (!ip) - return -EFSCORRUPTED; - - /* - * Make sure this pointer is valid and isn't an obvious - * infinite loop. - */ - if (!xfs_verify_agino(pag, ip->i_next_unlinked) || - next_agino == ip->i_next_unlinked) - return -EFSCORRUPTED; - - if (ip->i_next_unlinked == target_agino) { - *ipp = ip; - return 0; - } - next_agino = ip->i_next_unlinked; - } - return -EFSCORRUPTED; -} - static int xfs_iunlink_remove_inode( struct xfs_trans *tp, @@ -2326,51 +2116,33 @@ xfs_iunlink_remove_inode( return error; /* - * If there was a backref pointing from the next inode back to this - * one, remove it because we've removed this inode from the list. - * - * Later, if this inode was in the middle of the list we'll update - * this inode's backref to point from the next inode. + * Update the prev pointer in the next inode to point back to previous + * inode in the chain. */ - if (next_agino != NULLAGINO) { - error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO); - if (error) - return error; - } + error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, + ip->i_next_unlinked); + if (error) + return error; if (head_agino != agino) { struct xfs_inode *prev_ip; - error = xfs_iunlink_lookup_prev(pag, head_agino, agino, - &prev_ip); - if (error) - return error; - - /* Point the previous inode on the list to the next inode. */ - error = xfs_iunlink_update_inode(tp, prev_ip, pag, next_agino, - NULL); - if (error) - return error; + prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); + if (!prev_ip) + return -EFSCORRUPTED; + error = xfs_iunlink_update_inode(tp, prev_ip, pag, + ip->i_next_unlinked, NULL); prev_ip->i_next_unlinked = ip->i_next_unlinked; - ip->i_next_unlinked = NULLAGINO; - - /* - * Now we deal with the backref for this inode. If this inode - * pointed at a real inode, change the backref that pointed to - * us to point to our old next. If this inode was the end of - * the list, delete the backref that pointed to us. Note that - * change_backref takes care of deleting the backref if - * next_agino is NULLAGINO. - */ - return xfs_iunlink_change_backref(agibp->b_pag, agino, - next_agino); + } else { + /* Point the head of the list to the next unlinked inode. */ + error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, + ip->i_next_unlinked); } - /* Point the head of the list to the next unlinked inode. */ ip->i_next_unlinked = NULLAGINO; - return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, - next_agino); + ip->i_prev_unlinked = NULLAGINO; + return error; } /* -- cgit From 5301f87013145a874cda4ae008b6fcc2b810a721 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:46:46 +1000 Subject: xfs: clean up xfs_iunlink_update_inode() We no longer need to have this function return the previous next agino value from the on-disk inode as we have it in the in-core inode now. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 63af7680cf73..550574b63b9a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1935,13 +1935,12 @@ xfs_iunlink_update_dinode( } /* Set an in-core inode's unlinked pointer and return the old value. */ -STATIC int +static int xfs_iunlink_update_inode( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_perag *pag, - xfs_agino_t next_agino, - xfs_agino_t *old_next_agino) + xfs_agino_t next_agino) { struct xfs_mount *mp = tp->t_mountp; struct xfs_dinode *dip; @@ -1971,8 +1970,6 @@ xfs_iunlink_update_inode( * current pointer is the same as the new value, unless we're * terminating the list. */ - if (old_next_agino) - *old_next_agino = old_value; if (old_value == next_agino) { if (next_agino != NULLAGINO) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, @@ -2026,17 +2023,13 @@ xfs_iunlink_insert_inode( return error; if (next_agino != NULLAGINO) { - xfs_agino_t old_agino; - /* * There is already another inode in the bucket, so point this * inode to the current head of the list. */ - error = xfs_iunlink_update_inode(tp, ip, pag, next_agino, - &old_agino); + error = xfs_iunlink_update_inode(tp, ip, pag, next_agino); if (error) return error; - ASSERT(old_agino == NULLAGINO); ip->i_next_unlinked = next_agino; } @@ -2088,7 +2081,6 @@ xfs_iunlink_remove_inode( struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agibp->b_addr; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - xfs_agino_t next_agino; xfs_agino_t head_agino; short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; @@ -2111,7 +2103,7 @@ xfs_iunlink_remove_inode( * the old pointer value so that we can update whatever was previous * to us in the list to point to whatever was next in the list. */ - error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino); + error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO); if (error) return error; @@ -2132,7 +2124,7 @@ xfs_iunlink_remove_inode( return -EFSCORRUPTED; error = xfs_iunlink_update_inode(tp, prev_ip, pag, - ip->i_next_unlinked, NULL); + ip->i_next_unlinked); prev_ip->i_next_unlinked = ip->i_next_unlinked; } else { /* Point the head of the list to the next unlinked inode. */ -- cgit From 062efdb0803adac3fad039d681789c5e01818bef Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:46:59 +1000 Subject: xfs: combine iunlink inode update functions Combine the logging of the inode unlink list update into the calling function that looks up the buffer we end up logging. These do not need to be separate functions as they are both short, simple operations and there's only a single call path through them. This new function will end up being the core of the iunlink log item processing... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 52 ++++++++++++++++------------------------------------ 1 file changed, 16 insertions(+), 36 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 550574b63b9a..515878399dda 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1905,38 +1905,9 @@ xfs_iunlink_update_bucket( return 0; } -/* Set an on-disk inode's next_unlinked pointer. */ -STATIC void -xfs_iunlink_update_dinode( - struct xfs_trans *tp, - struct xfs_perag *pag, - xfs_agino_t agino, - struct xfs_buf *ibp, - struct xfs_dinode *dip, - struct xfs_imap *imap, - xfs_agino_t next_agino) -{ - struct xfs_mount *mp = tp->t_mountp; - int offset; - - ASSERT(xfs_verify_agino_or_null(pag, next_agino)); - - trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino, - be32_to_cpu(dip->di_next_unlinked), next_agino); - - dip->di_next_unlinked = cpu_to_be32(next_agino); - offset = imap->im_boffset + - offsetof(struct xfs_dinode, di_next_unlinked); - - /* need to recalc the inode CRC if appropriate */ - xfs_dinode_calc_crc(mp, dip); - xfs_trans_inode_buf(tp, ibp); - xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); -} - /* Set an in-core inode's unlinked pointer and return the old value. */ static int -xfs_iunlink_update_inode( +xfs_iunlink_log_inode( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_perag *pag, @@ -1946,6 +1917,7 @@ xfs_iunlink_update_inode( struct xfs_dinode *dip; struct xfs_buf *ibp; xfs_agino_t old_value; + int offset; int error; ASSERT(xfs_verify_agino_or_null(pag, next_agino)); @@ -1979,9 +1951,17 @@ xfs_iunlink_update_inode( goto out; } - /* Ok, update the new pointer. */ - xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino), - ibp, dip, &ip->i_imap, next_agino); + trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, + XFS_INO_TO_AGINO(mp, ip->i_ino), + be32_to_cpu(dip->di_next_unlinked), next_agino); + + dip->di_next_unlinked = cpu_to_be32(next_agino); + offset = ip->i_imap.im_boffset + + offsetof(struct xfs_dinode, di_next_unlinked); + + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); + xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); return 0; out: xfs_trans_brelse(tp, ibp); @@ -2027,7 +2007,7 @@ xfs_iunlink_insert_inode( * There is already another inode in the bucket, so point this * inode to the current head of the list. */ - error = xfs_iunlink_update_inode(tp, ip, pag, next_agino); + error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); if (error) return error; ip->i_next_unlinked = next_agino; @@ -2103,7 +2083,7 @@ xfs_iunlink_remove_inode( * the old pointer value so that we can update whatever was previous * to us in the list to point to whatever was next in the list. */ - error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO); + error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); if (error) return error; @@ -2123,7 +2103,7 @@ xfs_iunlink_remove_inode( if (!prev_ip) return -EFSCORRUPTED; - error = xfs_iunlink_update_inode(tp, prev_ip, pag, + error = xfs_iunlink_log_inode(tp, prev_ip, pag, ip->i_next_unlinked); prev_ip->i_next_unlinked = ip->i_next_unlinked; } else { -- cgit From 784eb7d8dd4163b82a19b914f76b2834a58a3e4c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 11:47:42 +1000 Subject: xfs: add in-memory iunlink log item Now that we have a clean operation to update the di_next_unlinked field of inode cluster buffers, we can easily defer this operation to transaction commit time so we can order the inode cluster buffer locking consistently. To do this, we introduce a new in-memory log item to track the unlinked list item modification that we are going to make. This follows the same observations as the in-memory double linked list used to track unlinked inodes in that the inodes on the list are pinned in memory and cannot go away, and hence we can simply reference them for the duration of the transaction without needing to take active references or pin them or look them up. This allows us to pass the xfs_inode to the transaction commit code along with the modification to be made, and then order the logged modifications via the ->iop_sort and ->iop_precommit operations for the new log item type. As this is an in-memory log item, it doesn't have formatting, CIL or AIL operational hooks - it exists purely to run the inode unlink modifications and is then removed from the transaction item list and freed once the precommit operation has run. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 64 +----------------------------------------------------- 1 file changed, 1 insertion(+), 63 deletions(-) (limited to 'fs/xfs/xfs_inode.c') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 515878399dda..ace537a17d0d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -20,6 +20,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_inode_item.h" +#include "xfs_iunlink_item.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -1905,69 +1906,6 @@ xfs_iunlink_update_bucket( return 0; } -/* Set an in-core inode's unlinked pointer and return the old value. */ -static int -xfs_iunlink_log_inode( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_perag *pag, - xfs_agino_t next_agino) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_dinode *dip; - struct xfs_buf *ibp; - xfs_agino_t old_value; - int offset; - int error; - - ASSERT(xfs_verify_agino_or_null(pag, next_agino)); - - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp); - if (error) - return error; - dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset); - - /* Make sure the old pointer isn't garbage. */ - old_value = be32_to_cpu(dip->di_next_unlinked); - if (old_value != ip->i_next_unlinked || - !xfs_verify_agino_or_null(pag, old_value)) { - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, - sizeof(*dip), __this_address); - error = -EFSCORRUPTED; - goto out; - } - - /* - * Since we're updating a linked list, we should never find that the - * current pointer is the same as the new value, unless we're - * terminating the list. - */ - if (old_value == next_agino) { - if (next_agino != NULLAGINO) { - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, - dip, sizeof(*dip), __this_address); - error = -EFSCORRUPTED; - } - goto out; - } - - trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, - XFS_INO_TO_AGINO(mp, ip->i_ino), - be32_to_cpu(dip->di_next_unlinked), next_agino); - - dip->di_next_unlinked = cpu_to_be32(next_agino); - offset = ip->i_imap.im_boffset + - offsetof(struct xfs_dinode, di_next_unlinked); - - xfs_dinode_calc_crc(mp, dip); - xfs_trans_inode_buf(tp, ibp); - xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); - return 0; -out: - xfs_trans_brelse(tp, ibp); - return error; -} - static int xfs_iunlink_insert_inode( struct xfs_trans *tp, -- cgit