diff options
Diffstat (limited to 'fs/xfs/xfs_zone_alloc.c')
-rw-r--r-- | fs/xfs/xfs_zone_alloc.c | 163 |
1 files changed, 138 insertions, 25 deletions
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 52af234936a2..33f7eee521a8 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -24,6 +24,7 @@ #include "xfs_zone_priv.h" #include "xfs_zones.h" #include "xfs_trace.h" +#include "xfs_mru_cache.h" void xfs_open_zone_put( @@ -433,7 +434,7 @@ xfs_init_open_zone( spin_lock_init(&oz->oz_alloc_lock); atomic_set(&oz->oz_ref, 1); oz->oz_rtg = rtg; - oz->oz_write_pointer = write_pointer; + oz->oz_allocated = write_pointer; oz->oz_written = write_pointer; oz->oz_write_hint = write_hint; oz->oz_is_gc = is_gc; @@ -568,7 +569,7 @@ xfs_try_use_zone( struct xfs_open_zone *oz, bool lowspace) { - if (oz->oz_write_pointer == rtg_blocks(oz->oz_rtg)) + if (oz->oz_allocated == rtg_blocks(oz->oz_rtg)) return false; if (!lowspace && !xfs_good_hint_match(oz, file_hint)) return false; @@ -653,13 +654,6 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip) !(ip->i_diflags & XFS_DIFLAG_APPEND); } -/* - * Pick a new zone for writes. - * - * If we aren't using up our budget of open zones just open a new one from the - * freelist. Else try to find one that matches the expected data lifetime. If - * we don't find one that is good pick any zone that is available. - */ static struct xfs_open_zone * xfs_select_zone_nowait( struct xfs_mount *mp, @@ -687,7 +681,8 @@ xfs_select_zone_nowait( goto out_unlock; /* - * See if we can open a new zone and use that. + * See if we can open a new zone and use that so that data for different + * files is mixed as little as possible. */ oz = xfs_try_open_zone(mp, write_hint); if (oz) @@ -726,7 +721,7 @@ xfs_select_zone( for (;;) { prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); - if (oz) + if (oz || xfs_is_shutdown(mp)) break; schedule(); } @@ -743,25 +738,25 @@ xfs_zone_alloc_blocks( { struct xfs_rtgroup *rtg = oz->oz_rtg; struct xfs_mount *mp = rtg_mount(rtg); - xfs_rgblock_t rgbno; + xfs_rgblock_t allocated; spin_lock(&oz->oz_alloc_lock); count_fsb = min3(count_fsb, XFS_MAX_BMBT_EXTLEN, - (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_write_pointer); + (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_allocated); if (!count_fsb) { spin_unlock(&oz->oz_alloc_lock); return 0; } - rgbno = oz->oz_write_pointer; - oz->oz_write_pointer += count_fsb; + allocated = oz->oz_allocated; + oz->oz_allocated += count_fsb; spin_unlock(&oz->oz_alloc_lock); - trace_xfs_zone_alloc_blocks(oz, rgbno, count_fsb); + trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb); *sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector); if (!*is_seq) - *sector += XFS_FSB_TO_BB(mp, rgbno); + *sector += XFS_FSB_TO_BB(mp, allocated); return XFS_FSB_TO_B(mp, count_fsb); } @@ -776,6 +771,100 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } +/* + * Cache the last zone written to for an inode so that it is considered first + * for subsequent writes. + */ +struct xfs_zone_cache_item { + struct xfs_mru_cache_elem mru; + struct xfs_open_zone *oz; +}; + +static inline struct xfs_zone_cache_item * +xfs_zone_cache_item(struct xfs_mru_cache_elem *mru) +{ + return container_of(mru, struct xfs_zone_cache_item, mru); +} + +static void +xfs_zone_cache_free_func( + void *data, + struct xfs_mru_cache_elem *mru) +{ + struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru); + + xfs_open_zone_put(item->oz); + kfree(item); +} + +/* + * Check if we have a cached last open zone available for the inode and + * if yes return a reference to it. + */ +static struct xfs_open_zone * +xfs_cached_zone( + struct xfs_mount *mp, + struct xfs_inode *ip) +{ + struct xfs_mru_cache_elem *mru; + struct xfs_open_zone *oz; + + mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); + if (!mru) + return NULL; + oz = xfs_zone_cache_item(mru)->oz; + if (oz) { + /* + * GC only steals open zones at mount time, so no GC zones + * should end up in the cache. + */ + ASSERT(!oz->oz_is_gc); + ASSERT(atomic_read(&oz->oz_ref) > 0); + atomic_inc(&oz->oz_ref); + } + xfs_mru_cache_done(mp->m_zone_cache); + return oz; +} + +/* + * Update the last used zone cache for a given inode. + * + * The caller must have a reference on the open zone. + */ +static void +xfs_zone_cache_create_association( + struct xfs_inode *ip, + struct xfs_open_zone *oz) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_zone_cache_item *item = NULL; + struct xfs_mru_cache_elem *mru; + + ASSERT(atomic_read(&oz->oz_ref) > 0); + atomic_inc(&oz->oz_ref); + + mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); + if (mru) { + /* + * If we have an association already, update it to point to the + * new zone. + */ + item = xfs_zone_cache_item(mru); + xfs_open_zone_put(item->oz); + item->oz = oz; + xfs_mru_cache_done(mp->m_zone_cache); + return; + } + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) { + xfs_open_zone_put(oz); + return; + } + item->oz = oz; + xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); +} + static void xfs_submit_zoned_bio( struct iomap_ioend *ioend, @@ -819,11 +908,16 @@ xfs_zone_alloc_and_submit( */ if (!*oz && ioend->io_offset) *oz = xfs_last_used_zone(ioend); + if (!*oz) + *oz = xfs_cached_zone(mp, ip); + if (!*oz) { select_zone: *oz = xfs_select_zone(mp, write_hint, pack_tight); if (!*oz) goto out_error; + + xfs_zone_cache_create_association(ip, *oz); } alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size), @@ -883,7 +977,7 @@ xfs_zone_rgbno_is_valid( lockdep_assert_held(&rtg_rmap(rtg)->i_lock); if (rtg->rtg_open_zone) - return rgbno < rtg->rtg_open_zone->oz_write_pointer; + return rgbno < rtg->rtg_open_zone->oz_allocated; return !xa_get_mark(&rtg_mount(rtg)->m_groups[XG_TYPE_RTG].xa, rtg_rgno(rtg), XFS_RTG_FREE); } @@ -917,7 +1011,7 @@ xfs_init_zone( { struct xfs_mount *mp = rtg_mount(rtg); struct xfs_zone_info *zi = mp->m_zone_info; - uint64_t used = rtg_rmap(rtg)->i_used_blocks; + uint32_t used = rtg_rmap(rtg)->i_used_blocks; xfs_rgblock_t write_pointer, highest_rgbno; int error; @@ -1014,24 +1108,27 @@ xfs_get_zone_info_cb( } /* - * Calculate the max open zone limit based on the of number of - * backing zones available + * Calculate the max open zone limit based on the of number of backing zones + * available. */ static inline uint32_t xfs_max_open_zones( struct xfs_mount *mp) { unsigned int max_open, max_open_data_zones; + /* - * We need two zones for every open data zone, - * one in reserve as we don't reclaim open zones. One data zone - * and its spare is included in XFS_MIN_ZONES. + * We need two zones for every open data zone, one in reserve as we + * don't reclaim open zones. One data zone and its spare is included + * in XFS_MIN_ZONES to support at least one user data writer. */ max_open_data_zones = (mp->m_sb.sb_rgcount - XFS_MIN_ZONES) / 2 + 1; max_open = max_open_data_zones + XFS_OPEN_GC_ZONES; /* - * Cap the max open limit to 1/4 of available space + * Cap the max open limit to 1/4 of available space. Without this we'd + * run out of easy reclaim targets too quickly and storage devices don't + * handle huge numbers of concurrent write streams overly well. */ max_open = min(max_open, mp->m_sb.sb_rgcount / 4); @@ -1201,9 +1298,24 @@ xfs_mount_zones( xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, iz.available + iz.reclaimable); + /* + * The user may configure GC to free up a percentage of unused blocks. + * By default this is 0. GC will always trigger at the minimum level + * for keeping max_open_zones available for data placement. + */ + mp->m_zonegc_low_space = 0; + error = xfs_zone_gc_mount(mp); if (error) goto out_free_zone_info; + + /* + * Set up a mru cache to track inode to open zone for data placement + * purposes. The magic values for group count and life time is the + * same as the defaults for file streams, which seems sane enough. + */ + xfs_mru_cache_create(&mp->m_zone_cache, mp, + 5000, 10, xfs_zone_cache_free_func); return 0; out_free_zone_info: @@ -1217,4 +1329,5 @@ xfs_unmount_zones( { xfs_zone_gc_unmount(mp); xfs_free_zone_info(mp->m_zone_info); + xfs_mru_cache_destroy(mp->m_zone_cache); } |