summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_zone_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_zone_alloc.c')
-rw-r--r--fs/xfs/xfs_zone_alloc.c163
1 files changed, 138 insertions, 25 deletions
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 52af234936a2..33f7eee521a8 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -24,6 +24,7 @@
#include "xfs_zone_priv.h"
#include "xfs_zones.h"
#include "xfs_trace.h"
+#include "xfs_mru_cache.h"
void
xfs_open_zone_put(
@@ -433,7 +434,7 @@ xfs_init_open_zone(
spin_lock_init(&oz->oz_alloc_lock);
atomic_set(&oz->oz_ref, 1);
oz->oz_rtg = rtg;
- oz->oz_write_pointer = write_pointer;
+ oz->oz_allocated = write_pointer;
oz->oz_written = write_pointer;
oz->oz_write_hint = write_hint;
oz->oz_is_gc = is_gc;
@@ -568,7 +569,7 @@ xfs_try_use_zone(
struct xfs_open_zone *oz,
bool lowspace)
{
- if (oz->oz_write_pointer == rtg_blocks(oz->oz_rtg))
+ if (oz->oz_allocated == rtg_blocks(oz->oz_rtg))
return false;
if (!lowspace && !xfs_good_hint_match(oz, file_hint))
return false;
@@ -653,13 +654,6 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
!(ip->i_diflags & XFS_DIFLAG_APPEND);
}
-/*
- * Pick a new zone for writes.
- *
- * If we aren't using up our budget of open zones just open a new one from the
- * freelist. Else try to find one that matches the expected data lifetime. If
- * we don't find one that is good pick any zone that is available.
- */
static struct xfs_open_zone *
xfs_select_zone_nowait(
struct xfs_mount *mp,
@@ -687,7 +681,8 @@ xfs_select_zone_nowait(
goto out_unlock;
/*
- * See if we can open a new zone and use that.
+ * See if we can open a new zone and use that so that data for different
+ * files is mixed as little as possible.
*/
oz = xfs_try_open_zone(mp, write_hint);
if (oz)
@@ -726,7 +721,7 @@ xfs_select_zone(
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
- if (oz)
+ if (oz || xfs_is_shutdown(mp))
break;
schedule();
}
@@ -743,25 +738,25 @@ xfs_zone_alloc_blocks(
{
struct xfs_rtgroup *rtg = oz->oz_rtg;
struct xfs_mount *mp = rtg_mount(rtg);
- xfs_rgblock_t rgbno;
+ xfs_rgblock_t allocated;
spin_lock(&oz->oz_alloc_lock);
count_fsb = min3(count_fsb, XFS_MAX_BMBT_EXTLEN,
- (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_write_pointer);
+ (xfs_filblks_t)rtg_blocks(rtg) - oz->oz_allocated);
if (!count_fsb) {
spin_unlock(&oz->oz_alloc_lock);
return 0;
}
- rgbno = oz->oz_write_pointer;
- oz->oz_write_pointer += count_fsb;
+ allocated = oz->oz_allocated;
+ oz->oz_allocated += count_fsb;
spin_unlock(&oz->oz_alloc_lock);
- trace_xfs_zone_alloc_blocks(oz, rgbno, count_fsb);
+ trace_xfs_zone_alloc_blocks(oz, allocated, count_fsb);
*sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0);
*is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *sector);
if (!*is_seq)
- *sector += XFS_FSB_TO_BB(mp, rgbno);
+ *sector += XFS_FSB_TO_BB(mp, allocated);
return XFS_FSB_TO_B(mp, count_fsb);
}
@@ -776,6 +771,100 @@ xfs_mark_rtg_boundary(
ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
}
+/*
+ * Cache the last zone written to for an inode so that it is considered first
+ * for subsequent writes.
+ */
+struct xfs_zone_cache_item {
+ struct xfs_mru_cache_elem mru;
+ struct xfs_open_zone *oz;
+};
+
+static inline struct xfs_zone_cache_item *
+xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
+{
+ return container_of(mru, struct xfs_zone_cache_item, mru);
+}
+
+static void
+xfs_zone_cache_free_func(
+ void *data,
+ struct xfs_mru_cache_elem *mru)
+{
+ struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
+
+ xfs_open_zone_put(item->oz);
+ kfree(item);
+}
+
+/*
+ * Check if we have a cached last open zone available for the inode and
+ * if yes return a reference to it.
+ */
+static struct xfs_open_zone *
+xfs_cached_zone(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip)
+{
+ struct xfs_mru_cache_elem *mru;
+ struct xfs_open_zone *oz;
+
+ mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
+ if (!mru)
+ return NULL;
+ oz = xfs_zone_cache_item(mru)->oz;
+ if (oz) {
+ /*
+ * GC only steals open zones at mount time, so no GC zones
+ * should end up in the cache.
+ */
+ ASSERT(!oz->oz_is_gc);
+ ASSERT(atomic_read(&oz->oz_ref) > 0);
+ atomic_inc(&oz->oz_ref);
+ }
+ xfs_mru_cache_done(mp->m_zone_cache);
+ return oz;
+}
+
+/*
+ * Update the last used zone cache for a given inode.
+ *
+ * The caller must have a reference on the open zone.
+ */
+static void
+xfs_zone_cache_create_association(
+ struct xfs_inode *ip,
+ struct xfs_open_zone *oz)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_zone_cache_item *item = NULL;
+ struct xfs_mru_cache_elem *mru;
+
+ ASSERT(atomic_read(&oz->oz_ref) > 0);
+ atomic_inc(&oz->oz_ref);
+
+ mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
+ if (mru) {
+ /*
+ * If we have an association already, update it to point to the
+ * new zone.
+ */
+ item = xfs_zone_cache_item(mru);
+ xfs_open_zone_put(item->oz);
+ item->oz = oz;
+ xfs_mru_cache_done(mp->m_zone_cache);
+ return;
+ }
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ xfs_open_zone_put(oz);
+ return;
+ }
+ item->oz = oz;
+ xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
+}
+
static void
xfs_submit_zoned_bio(
struct iomap_ioend *ioend,
@@ -819,11 +908,16 @@ xfs_zone_alloc_and_submit(
*/
if (!*oz && ioend->io_offset)
*oz = xfs_last_used_zone(ioend);
+ if (!*oz)
+ *oz = xfs_cached_zone(mp, ip);
+
if (!*oz) {
select_zone:
*oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz)
goto out_error;
+
+ xfs_zone_cache_create_association(ip, *oz);
}
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -883,7 +977,7 @@ xfs_zone_rgbno_is_valid(
lockdep_assert_held(&rtg_rmap(rtg)->i_lock);
if (rtg->rtg_open_zone)
- return rgbno < rtg->rtg_open_zone->oz_write_pointer;
+ return rgbno < rtg->rtg_open_zone->oz_allocated;
return !xa_get_mark(&rtg_mount(rtg)->m_groups[XG_TYPE_RTG].xa,
rtg_rgno(rtg), XFS_RTG_FREE);
}
@@ -917,7 +1011,7 @@ xfs_init_zone(
{
struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_zone_info *zi = mp->m_zone_info;
- uint64_t used = rtg_rmap(rtg)->i_used_blocks;
+ uint32_t used = rtg_rmap(rtg)->i_used_blocks;
xfs_rgblock_t write_pointer, highest_rgbno;
int error;
@@ -1014,24 +1108,27 @@ xfs_get_zone_info_cb(
}
/*
- * Calculate the max open zone limit based on the of number of
- * backing zones available
+ * Calculate the max open zone limit based on the of number of backing zones
+ * available.
*/
static inline uint32_t
xfs_max_open_zones(
struct xfs_mount *mp)
{
unsigned int max_open, max_open_data_zones;
+
/*
- * We need two zones for every open data zone,
- * one in reserve as we don't reclaim open zones. One data zone
- * and its spare is included in XFS_MIN_ZONES.
+ * We need two zones for every open data zone, one in reserve as we
+ * don't reclaim open zones. One data zone and its spare is included
+ * in XFS_MIN_ZONES to support at least one user data writer.
*/
max_open_data_zones = (mp->m_sb.sb_rgcount - XFS_MIN_ZONES) / 2 + 1;
max_open = max_open_data_zones + XFS_OPEN_GC_ZONES;
/*
- * Cap the max open limit to 1/4 of available space
+ * Cap the max open limit to 1/4 of available space. Without this we'd
+ * run out of easy reclaim targets too quickly and storage devices don't
+ * handle huge numbers of concurrent write streams overly well.
*/
max_open = min(max_open, mp->m_sb.sb_rgcount / 4);
@@ -1201,9 +1298,24 @@ xfs_mount_zones(
xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
iz.available + iz.reclaimable);
+ /*
+ * The user may configure GC to free up a percentage of unused blocks.
+ * By default this is 0. GC will always trigger at the minimum level
+ * for keeping max_open_zones available for data placement.
+ */
+ mp->m_zonegc_low_space = 0;
+
error = xfs_zone_gc_mount(mp);
if (error)
goto out_free_zone_info;
+
+ /*
+ * Set up a mru cache to track inode to open zone for data placement
+ * purposes. The magic values for group count and life time is the
+ * same as the defaults for file streams, which seems sane enough.
+ */
+ xfs_mru_cache_create(&mp->m_zone_cache, mp,
+ 5000, 10, xfs_zone_cache_free_func);
return 0;
out_free_zone_info:
@@ -1217,4 +1329,5 @@ xfs_unmount_zones(
{
xfs_zone_gc_unmount(mp);
xfs_free_zone_info(mp->m_zone_info);
+ xfs_mru_cache_destroy(mp->m_zone_cache);
}