diff options
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/libxfs/xfs_rtgroup.h | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_zone_alloc.c | 8 | ||||
| -rw-r--r-- | fs/xfs/xfs_zone_gc.c | 27 |
3 files changed, 41 insertions, 0 deletions
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index d36a6ae0abe5..d4fcf591e63d 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -50,6 +50,12 @@ struct xfs_rtgroup { uint8_t *rtg_rsum_cache; struct xfs_open_zone *rtg_open_zone; }; + + /* + * Count of outstanding GC operations for zoned XFS. Any RTG with a + * non-zero rtg_gccount will not be picked as new GC victim. + */ + atomic_t rtg_gccount; }; /* diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 23cdab4515bb..040402240807 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -246,6 +246,14 @@ xfs_zoned_map_extent( * If a data write raced with this GC write, keep the existing data in * the data fork, mark our newly written GC extent as reclaimable, then * move on to the next extent. + * + * Note that this can also happen when racing with operations that do + * not actually invalidate the data, but just move it to a different + * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the + * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the + * data was just moved around, GC fails to free the zone, but the zone + * becomes a GC candidate again as soon as all previous GC I/O has + * finished and these blocks will be moved out eventually. */ if (old_startblock != NULLFSBLOCK && old_startblock != data.br_startblock) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 109877d9a6bf..4ade54445532 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -114,6 +114,8 @@ struct xfs_gc_bio { /* Open Zone being written to */ struct xfs_open_zone *oz; + struct xfs_rtgroup *victim_rtg; + /* Bio used for reads and writes, including the bvec used by it */ struct bio_vec bv; struct bio bio; /* must be last */ @@ -264,6 +266,7 @@ xfs_zone_gc_iter_init( iter->rec_count = 0; iter->rec_idx = 0; iter->victim_rtg = victim_rtg; + atomic_inc(&victim_rtg->rtg_gccount); } /* @@ -362,6 +365,7 @@ xfs_zone_gc_query( return 0; done: + atomic_dec(&iter->victim_rtg->rtg_gccount); xfs_rtgroup_rele(iter->victim_rtg); iter->victim_rtg = NULL; return 0; @@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from( if (!rtg) continue; + /* + * If the zone is already undergoing GC, don't pick it again. + * + * This prevents us from picking one of the zones for which we + * already submitted GC I/O, but for which the remapping hasn't + * concluded yet. This won't cause data corruption, but + * increases write amplification and slows down GC, so this is + * a bad thing. + */ + if (atomic_read(&rtg->rtg_gccount)) { + xfs_rtgroup_rele(rtg); + continue; + } + /* skip zones that are just waiting for a reset */ if (rtg_rmap(rtg)->i_used_blocks == 0 || rtg_rmap(rtg)->i_used_blocks >= victim_used) { @@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk( chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; + chunk->victim_rtg = iter->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; @@ -710,6 +731,8 @@ static void xfs_zone_gc_free_chunk( struct xfs_gc_bio *chunk) { + atomic_dec(&chunk->victim_rtg->rtg_gccount); + xfs_rtgroup_rele(chunk->victim_rtg); list_del(&chunk->entry); xfs_open_zone_put(chunk->oz); xfs_irele(chunk->ip); @@ -770,6 +793,10 @@ xfs_zone_gc_split_write( split_chunk->oz = chunk->oz; atomic_inc(&chunk->oz->oz_ref); + split_chunk->victim_rtg = chunk->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); + chunk->offset += split_len; chunk->len -= split_len; chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); |
