diff options
Diffstat (limited to 'fs/xfs/xfs_discard.c')
-rw-r--r-- | fs/xfs/xfs_discard.c | 655 |
1 files changed, 568 insertions, 87 deletions
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 268bb734dc0a..94d0873bcd62 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -20,6 +20,8 @@ #include "xfs_log.h" #include "xfs_ag.h" #include "xfs_health.h" +#include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" /* * Notes on an efficient, low latency fstrim algorithm @@ -71,6 +73,8 @@ * extent search so that it overlaps in flight discard IO. */ +#define XFS_DISCARD_MAX_EXAMINE (100) + struct workqueue_struct *xfs_discard_wq; static void @@ -80,13 +84,13 @@ xfs_discard_endio_work( struct xfs_busy_extents *extents = container_of(work, struct xfs_busy_extents, endio_work); - xfs_extent_busy_clear(extents->mount, &extents->extent_list, false); + xfs_extent_busy_clear(&extents->extent_list, false); kfree(extents->owner); } /* * Queue up the actual completion to a thread to avoid IRQ-safe locking for - * pagb_lock. + * eb_lock. */ static void xfs_discard_endio( @@ -99,6 +103,24 @@ xfs_discard_endio( bio_put(bio); } +static inline struct block_device * +xfs_group_bdev( + const struct xfs_group *xg) +{ + struct xfs_mount *mp = xg->xg_mount; + + switch (xg->xg_type) { + case XG_TYPE_AG: + return mp->m_ddev_targp->bt_bdev; + case XG_TYPE_RTG: + return mp->m_rtdev_targp->bt_bdev; + default: + ASSERT(0); + break; + } + return NULL; +} + /* * Walk the discard list and issue discards on all the busy extents in the * list. We plug and chain the bios so that we only need a single completion @@ -116,11 +138,11 @@ xfs_discard_extents( blk_start_plug(&plug); list_for_each_entry(busyp, &extents->extent_list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); + trace_xfs_discard_extent(busyp->group, busyp->bno, + busyp->length); - error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + error = __blkdev_issue_discard(xfs_group_bdev(busyp->group), + xfs_gbno_to_daddr(busyp->group, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_KERNEL, &bio); if (error && error != -EOPNOTSUPP) { @@ -145,24 +167,35 @@ xfs_discard_extents( return error; } +/* + * Care must be taken setting up the trim cursor as the perags may not have been + * initialised when the cursor is initialised. e.g. a clean mount which hasn't + * read in AGFs and the first operation run on the mounted fs is a trim. This + * can result in perag fields that aren't initialised until + * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for + * the free space search. + */ +struct xfs_trim_cur { + xfs_agblock_t start; + xfs_extlen_t count; + xfs_agblock_t end; + xfs_extlen_t minlen; + bool by_bno; +}; static int xfs_trim_gather_extents( struct xfs_perag *pag, - xfs_daddr_t start, - xfs_daddr_t end, - xfs_daddr_t minlen, - struct xfs_alloc_rec_incore *tcur, - struct xfs_busy_extents *extents, - uint64_t *blocks_trimmed) + struct xfs_trim_cur *tcur, + struct xfs_busy_extents *extents) { - struct xfs_mount *mp = pag->pag_mount; + struct xfs_mount *mp = pag_mount(pag); struct xfs_trans *tp; struct xfs_btree_cur *cur; struct xfs_buf *agbp; int error; int i; - int batch = 100; + int batch = XFS_DISCARD_MAX_EXAMINE; /* * Force out the log. This means any transactions that might have freed @@ -179,21 +212,34 @@ xfs_trim_gather_extents( if (error) goto out_trans_cancel; - cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag); - /* - * Look up the extent length requested in the AGF and start with it. + * First time through tcur->count will not have been initialised as + * pag->pagf_longest is not guaranteed to be valid before we read + * the AGF buffer above. */ - if (tcur->ar_startblock == NULLAGBLOCK) - error = xfs_alloc_lookup_ge(cur, 0, tcur->ar_blockcount, &i); - else - error = xfs_alloc_lookup_le(cur, tcur->ar_startblock, - tcur->ar_blockcount, &i); + if (!tcur->count) + tcur->count = pag->pagf_longest; + + if (tcur->by_bno) { + /* sub-AG discard request always starts at tcur->start */ + cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); + error = xfs_alloc_lookup_le(cur, tcur->start, 0, &i); + if (!error && !i) + error = xfs_alloc_lookup_ge(cur, tcur->start, 0, &i); + } else if (tcur->start == 0) { + /* first time through a by-len starts with max length */ + cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag); + error = xfs_alloc_lookup_ge(cur, 0, tcur->count, &i); + } else { + /* nth time through a by-len starts where we left off */ + cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag); + error = xfs_alloc_lookup_le(cur, tcur->start, tcur->count, &i); + } if (error) goto out_del_cursor; if (i == 0) { /* nothing of that length left in the AG, we are done */ - tcur->ar_blockcount = 0; + tcur->count = 0; goto out_del_cursor; } @@ -204,8 +250,6 @@ xfs_trim_gather_extents( while (i) { xfs_agblock_t fbno; xfs_extlen_t flen; - xfs_daddr_t dbno; - xfs_extlen_t dlen; error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); if (error) @@ -221,52 +265,62 @@ xfs_trim_gather_extents( * Update the cursor to point at this extent so we * restart the next batch from this extent. */ - tcur->ar_startblock = fbno; - tcur->ar_blockcount = flen; - break; - } - - /* - * use daddr format for all range/len calculations as that is - * the format the range/len variables are supplied in by - * userspace. - */ - dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno); - dlen = XFS_FSB_TO_BB(mp, flen); - - /* - * Too small? Give up. - */ - if (dlen < minlen) { - trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen); - tcur->ar_blockcount = 0; + tcur->start = fbno; + tcur->count = flen; break; } /* * If the extent is entirely outside of the range we are - * supposed to discard skip it. Do not bother to trim - * down partially overlapping ranges for now. + * supposed to skip it. Do not bother to trim down partially + * overlapping ranges for now. */ - if (dbno + dlen < start || dbno > end) { - trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen); + if (fbno + flen < tcur->start) { + trace_xfs_discard_exclude(pag_group(pag), fbno, flen); + goto next_extent; + } + if (fbno > tcur->end) { + trace_xfs_discard_exclude(pag_group(pag), fbno, flen); + if (tcur->by_bno) { + tcur->count = 0; + break; + } goto next_extent; } + /* Trim the extent returned to the range we want. */ + if (fbno < tcur->start) { + flen -= tcur->start - fbno; + fbno = tcur->start; + } + if (fbno + flen > tcur->end + 1) + flen = tcur->end - fbno + 1; + + /* Too small? Give up. */ + if (flen < tcur->minlen) { + trace_xfs_discard_toosmall(pag_group(pag), fbno, flen); + if (tcur->by_bno) + goto next_extent; + tcur->count = 0; + break; + } + /* * If any blocks in the range are still busy, skip the * discard and try again the next time. */ - if (xfs_extent_busy_search(mp, pag, fbno, flen)) { - trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen); + if (xfs_extent_busy_search(pag_group(pag), fbno, flen)) { + trace_xfs_discard_busy(pag_group(pag), fbno, flen); goto next_extent; } - xfs_extent_busy_insert_discard(pag, fbno, flen, + xfs_extent_busy_insert_discard(pag_group(pag), fbno, flen, &extents->extent_list); - *blocks_trimmed += flen; next_extent: - error = xfs_btree_decrement(cur, 0, &i); + if (tcur->by_bno) + error = xfs_btree_increment(cur, 0, &i); + else + error = xfs_btree_decrement(cur, 0, &i); if (error) break; @@ -276,7 +330,7 @@ next_extent: * is no more extents to search. */ if (i == 0) - tcur->ar_blockcount = 0; + tcur->count = 0; } /* @@ -284,7 +338,7 @@ next_extent: * we aren't going to issue a discard on them any more. */ if (error) - xfs_extent_busy_clear(mp, &extents->extent_list, false); + xfs_extent_busy_clear(&extents->extent_list, false); out_del_cursor: xfs_btree_del_cursor(cur, error); out_trans_cancel: @@ -304,19 +358,22 @@ xfs_trim_should_stop(void) * we found in the last batch as the key to start the next. */ static int -xfs_trim_extents( +xfs_trim_perag_extents( struct xfs_perag *pag, - xfs_daddr_t start, - xfs_daddr_t end, - xfs_daddr_t minlen, - uint64_t *blocks_trimmed) + xfs_agblock_t start, + xfs_agblock_t end, + xfs_extlen_t minlen) { - struct xfs_alloc_rec_incore tcur = { - .ar_blockcount = pag->pagf_longest, - .ar_startblock = NULLAGBLOCK, + struct xfs_trim_cur tcur = { + .start = start, + .end = end, + .minlen = minlen, }; int error = 0; + if (start != 0 || end != pag_group(pag)->xg_block_count) + tcur.by_bno = true; + do { struct xfs_busy_extents *extents; @@ -326,12 +383,10 @@ xfs_trim_extents( break; } - extents->mount = pag->pag_mount; extents->owner = extents; INIT_LIST_HEAD(&extents->extent_list); - error = xfs_trim_gather_extents(pag, start, end, minlen, - &tcur, extents, blocks_trimmed); + error = xfs_trim_gather_extents(pag, &tcur, extents); if (error) { kfree(extents); break; @@ -347,18 +402,434 @@ xfs_trim_extents( * list after this function call, as it may have been freed by * the time control returns to us. */ - error = xfs_discard_extents(pag->pag_mount, extents); + error = xfs_discard_extents(pag_mount(pag), extents); if (error) break; if (xfs_trim_should_stop()) break; - } while (tcur.ar_blockcount != 0); + } while (tcur.count != 0); + + return error; + +} + +static int +xfs_trim_datadev_extents( + struct xfs_mount *mp, + xfs_daddr_t start, + xfs_daddr_t end, + xfs_extlen_t minlen) +{ + xfs_agnumber_t start_agno, end_agno; + xfs_agblock_t start_agbno, end_agbno; + struct xfs_perag *pag = NULL; + xfs_daddr_t ddev_end; + int last_error = 0, error; + + ddev_end = min_t(xfs_daddr_t, end, + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1); + + start_agno = xfs_daddr_to_agno(mp, start); + start_agbno = xfs_daddr_to_agbno(mp, start); + end_agno = xfs_daddr_to_agno(mp, ddev_end); + end_agbno = xfs_daddr_to_agbno(mp, ddev_end); + + while ((pag = xfs_perag_next_range(mp, pag, start_agno, end_agno))) { + xfs_agblock_t agend = pag_group(pag)->xg_block_count; + + if (pag_agno(pag) == end_agno) + agend = end_agbno; + error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen); + if (error) + last_error = error; + + if (xfs_trim_should_stop()) { + xfs_perag_rele(pag); + break; + } + start_agbno = 0; + } + + return last_error; +} + +#ifdef CONFIG_XFS_RT +struct xfs_trim_rtdev { + /* list of rt extents to free */ + struct list_head extent_list; + + /* minimum length that caller allows us to trim */ + xfs_rtblock_t minlen_fsb; + + /* restart point for the rtbitmap walk */ + xfs_rtxnum_t restart_rtx; + + /* stopping point for the current rtbitmap walk */ + xfs_rtxnum_t stop_rtx; +}; + +struct xfs_rtx_busy { + struct list_head list; + xfs_rtblock_t bno; + xfs_rtblock_t length; +}; + +static void +xfs_discard_free_rtdev_extents( + struct xfs_trim_rtdev *tr) +{ + struct xfs_rtx_busy *busyp, *n; + + list_for_each_entry_safe(busyp, n, &tr->extent_list, list) { + list_del_init(&busyp->list); + kfree(busyp); + } +} + +/* + * Walk the discard list and issue discards on all the busy extents in the + * list. We plug and chain the bios so that we only need a single completion + * call to clear all the busy extents once the discards are complete. + */ +static int +xfs_discard_rtdev_extents( + struct xfs_mount *mp, + struct xfs_trim_rtdev *tr) +{ + struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; + struct xfs_rtx_busy *busyp; + struct bio *bio = NULL; + struct blk_plug plug; + xfs_rtblock_t start = NULLRTBLOCK, length = 0; + int error = 0; + + blk_start_plug(&plug); + list_for_each_entry(busyp, &tr->extent_list, list) { + if (start == NULLRTBLOCK) + start = busyp->bno; + length += busyp->length; + + trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length); + + error = __blkdev_issue_discard(bdev, + xfs_rtb_to_daddr(mp, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, &bio); + if (error) + break; + } + xfs_discard_free_rtdev_extents(tr); + + if (bio) { + error = submit_bio_wait(bio); + if (error == -EOPNOTSUPP) + error = 0; + if (error) + xfs_info(mp, + "discard failed for rtextent [0x%llx,%llu], error %d", + (unsigned long long)start, + (unsigned long long)length, + error); + bio_put(bio); + } + blk_finish_plug(&plug); + + return error; +} + +static int +xfs_trim_gather_rtextent( + struct xfs_rtgroup *rtg, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_trim_rtdev *tr = priv; + struct xfs_rtx_busy *busyp; + xfs_rtblock_t rbno, rlen; + + if (rec->ar_startext > tr->stop_rtx) { + /* + * If we've scanned a large number of rtbitmap blocks, update + * the cursor to point at this extent so we restart the next + * batch from this extent. + */ + tr->restart_rtx = rec->ar_startext; + return -ECANCELED; + } + + rbno = xfs_rtx_to_rtb(rtg, rec->ar_startext); + rlen = xfs_rtbxlen_to_blen(rtg_mount(rtg), rec->ar_extcount); + + /* Ignore too small. */ + if (rlen < tr->minlen_fsb) { + trace_xfs_discard_rttoosmall(rtg_mount(rtg), rbno, rlen); + return 0; + } + + busyp = kzalloc(sizeof(struct xfs_rtx_busy), GFP_KERNEL); + if (!busyp) + return -ENOMEM; + + busyp->bno = rbno; + busyp->length = rlen; + INIT_LIST_HEAD(&busyp->list); + list_add_tail(&busyp->list, &tr->extent_list); + + tr->restart_rtx = rec->ar_startext + rec->ar_extcount; + return 0; +} + +/* Trim extents on an !rtgroups realtime device */ +static int +xfs_trim_rtextents( + struct xfs_rtgroup *rtg, + xfs_rtxnum_t low, + xfs_rtxnum_t high, + xfs_daddr_t minlen) +{ + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_trim_rtdev tr = { + .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), + .extent_list = LIST_HEAD_INIT(tr.extent_list), + }; + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + /* + * Walk the free ranges between low and high. The query_range function + * trims the extents returned. + */ + do { + tr.stop_rtx = low + xfs_rtbitmap_rtx_per_rbmblock(mp); + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + error = xfs_rtalloc_query_range(rtg, tp, low, high, + xfs_trim_gather_rtextent, &tr); + + if (error == -ECANCELED) + error = 0; + if (error) { + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + xfs_discard_free_rtdev_extents(&tr); + break; + } + + if (list_empty(&tr.extent_list)) { + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + break; + } + + error = xfs_discard_rtdev_extents(mp, &tr); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + if (error) + break; + + low = tr.restart_rtx; + } while (!xfs_trim_should_stop() && low <= high); + + xfs_trans_cancel(tp); + return error; +} + +struct xfs_trim_rtgroup { + /* list of rtgroup extents to free */ + struct xfs_busy_extents *extents; + + /* minimum length that caller allows us to trim */ + xfs_rtblock_t minlen_fsb; + + /* restart point for the rtbitmap walk */ + xfs_rtxnum_t restart_rtx; + + /* number of extents to examine before stopping to issue discard ios */ + int batch; + + /* number of extents queued for discard */ + int queued; +}; + +static int +xfs_trim_gather_rtgroup_extent( + struct xfs_rtgroup *rtg, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_trim_rtgroup *tr = priv; + xfs_rgblock_t rgbno; + xfs_extlen_t len; + + if (--tr->batch <= 0) { + /* + * If we've checked a large number of extents, update the + * cursor to point at this extent so we restart the next batch + * from this extent. + */ + tr->restart_rtx = rec->ar_startext; + return -ECANCELED; + } + + rgbno = xfs_rtx_to_rgbno(rtg, rec->ar_startext); + len = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount); + + /* Ignore too small. */ + if (len < tr->minlen_fsb) { + trace_xfs_discard_toosmall(rtg_group(rtg), rgbno, len); + return 0; + } + + /* + * If any blocks in the range are still busy, skip the discard and try + * again the next time. + */ + if (xfs_extent_busy_search(rtg_group(rtg), rgbno, len)) { + trace_xfs_discard_busy(rtg_group(rtg), rgbno, len); + return 0; + } + + xfs_extent_busy_insert_discard(rtg_group(rtg), rgbno, len, + &tr->extents->extent_list); + + tr->queued++; + tr->restart_rtx = rec->ar_startext + rec->ar_extcount; + return 0; +} + +/* Trim extents in this rtgroup using the busy extent machinery. */ +static int +xfs_trim_rtgroup_extents( + struct xfs_rtgroup *rtg, + xfs_rtxnum_t low, + xfs_rtxnum_t high, + xfs_daddr_t minlen) +{ + struct xfs_mount *mp = rtg_mount(rtg); + struct xfs_trim_rtgroup tr = { + .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), + }; + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + /* + * Walk the free ranges between low and high. The query_range function + * trims the extents returned. + */ + do { + tr.extents = kzalloc(sizeof(*tr.extents), GFP_KERNEL); + if (!tr.extents) { + error = -ENOMEM; + break; + } + + tr.queued = 0; + tr.batch = XFS_DISCARD_MAX_EXAMINE; + tr.extents->owner = tr.extents; + INIT_LIST_HEAD(&tr.extents->extent_list); + + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + error = xfs_rtalloc_query_range(rtg, tp, low, high, + xfs_trim_gather_rtgroup_extent, &tr); + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED); + if (error == -ECANCELED) + error = 0; + if (error) { + kfree(tr.extents); + break; + } + + if (!tr.queued) + break; + + /* + * We hand the extent list to the discard function here so the + * discarded extents can be removed from the busy extent list. + * This allows the discards to run asynchronously with + * gathering the next round of extents to discard. + * + * However, we must ensure that we do not reference the extent + * list after this function call, as it may have been freed by + * the time control returns to us. + */ + error = xfs_discard_extents(rtg_mount(rtg), tr.extents); + if (error) + break; + + low = tr.restart_rtx; + } while (!xfs_trim_should_stop() && low <= high); + xfs_trans_cancel(tp); return error; +} +static int +xfs_trim_rtdev_extents( + struct xfs_mount *mp, + xfs_daddr_t start, + xfs_daddr_t end, + xfs_daddr_t minlen) +{ + xfs_rtblock_t start_rtbno, end_rtbno; + xfs_rtxnum_t start_rtx, end_rtx; + xfs_rgnumber_t start_rgno, end_rgno; + xfs_daddr_t daddr_offset; + int last_error = 0, error; + struct xfs_rtgroup *rtg = NULL; + + /* Shift the start and end downwards to match the rt device. */ + daddr_offset = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); + if (start > daddr_offset) + start -= daddr_offset; + else + start = 0; + start_rtbno = xfs_daddr_to_rtb(mp, start); + start_rtx = xfs_rtb_to_rtx(mp, start_rtbno); + start_rgno = xfs_rtb_to_rgno(mp, start_rtbno); + + if (end <= daddr_offset) + return 0; + else + end -= daddr_offset; + end_rtbno = xfs_daddr_to_rtb(mp, end); + end_rtx = xfs_rtb_to_rtx(mp, end_rtbno + mp->m_sb.sb_rextsize - 1); + end_rgno = xfs_rtb_to_rgno(mp, end_rtbno); + + while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) { + xfs_rtxnum_t rtg_end = rtg->rtg_extents; + + if (rtg_rgno(rtg) == end_rgno) + rtg_end = min(rtg_end, end_rtx); + + if (xfs_has_rtgroups(mp)) + error = xfs_trim_rtgroup_extents(rtg, start_rtx, + rtg_end, minlen); + else + error = xfs_trim_rtextents(rtg, start_rtx, rtg_end, + minlen); + if (error) + last_error = error; + + if (xfs_trim_should_stop()) { + xfs_rtgroup_rele(rtg); + break; + } + start_rtx = 0; + } + + return last_error; } +#else +# define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP) +#endif /* CONFIG_XFS_RT */ /* * trim a range of the filesystem. @@ -368,26 +839,37 @@ xfs_trim_extents( * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format * is a linear address range. Hence we need to use DADDR based conversions and * comparisons for determining the correct offset and regions to trim. + * + * The realtime device is mapped into the FITRIM "address space" immediately + * after the data device. */ int xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { - struct xfs_perag *pag; unsigned int granularity = bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); + struct block_device *rt_bdev = NULL; struct fstrim_range range; - xfs_daddr_t start, end, minlen; - xfs_agnumber_t agno; - uint64_t blocks_trimmed = 0; + xfs_daddr_t start, end; + xfs_extlen_t minlen; + xfs_rfsblock_t max_blocks; int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) + + if (mp->m_rtdev_targp && !xfs_has_zoned(mp) && + bdev_max_discard_sectors(mp->m_rtdev_targp->bt_bdev)) + rt_bdev = mp->m_rtdev_targp->bt_bdev; + if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev) && !rt_bdev) return -EOPNOTSUPP; + if (rt_bdev) + granularity = max(granularity, + bdev_discard_granularity(rt_bdev)); + /* * We haven't recovered the log, so we cannot use our bnobt-guided * storage zapping commands. @@ -399,7 +881,8 @@ xfs_ioc_trim( return -EFAULT; range.minlen = max_t(u64, granularity, range.minlen); - minlen = BTOBB(range.minlen); + minlen = XFS_B_TO_FSB(mp, range.minlen); + /* * Truncating down the len isn't actually quite correct, but using * BBTOB would mean we trivially get overflows for values @@ -407,7 +890,8 @@ xfs_ioc_trim( * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ - if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || + max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks; + if (range.start >= XFS_FSB_TO_B(mp, max_blocks) || range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) || range.len < mp->m_sb.sb_blocksize) return -EINVAL; @@ -415,26 +899,23 @@ xfs_ioc_trim( start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; - if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) - end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1; - - agno = xfs_daddr_to_agno(mp, start); - for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) { - error = xfs_trim_extents(pag, start, end, minlen, - &blocks_trimmed); + if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) { + error = xfs_trim_datadev_extents(mp, start, end, minlen); if (error) last_error = error; + } - if (xfs_trim_should_stop()) { - xfs_perag_rele(pag); - break; - } + if (rt_bdev && !xfs_trim_should_stop()) { + error = xfs_trim_rtdev_extents(mp, start, end, minlen); + if (error) + last_error = error; } if (last_error) return last_error; - range.len = XFS_FSB_TO_B(mp, blocks_trimmed); + range.len = min_t(unsigned long long, range.len, + XFS_FSB_TO_B(mp, max_blocks) - range.start); if (copy_to_user(urange, &range, sizeof(range))) return -EFAULT; return 0; |