summaryrefslogtreecommitdiff
path: root/drivers/lightnvm/pblk-read.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 19:08:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 19:08:15 -0700
commit1718de78e6235c04ecb7f87a6875fdf90aafe382 (patch)
treea8b5c2f89bd2c71bd5b1dc47a0fa46446ba2cd0f /drivers/lightnvm/pblk-read.c
parent815d469d8c9a3360ee0a8b7857dd95352a6c7bde (diff)
parent7a102d9044e720ac887c0cd82b6d5cad236f6d71 (diff)
Merge tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: "This is mainly some late lightnvm changes that came in just before the merge window, as well as fixes that have been queued up since the initial pull request was frozen. This contains: - lightnvm changes, fixing race conditions, improving memory utilization, and improving pblk compatability (Chansol, Igor, Marcin) - NVMe pull request with minor fixes all over the map (via Christoph) - remove redundant error print in sata_rcar (Geert) - struct_size() cleanup (Jackie) - dasd CONFIG_LBADF warning fix (Ming) - brd cond_resched() improvement (Mikulas)" * tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block: (41 commits) block/bio-integrity: use struct_size() in kmalloc() nvme: validate cntlid during controller initialisation nvme: change locking for the per-subsystem controller list nvme: trace all async notice events nvme: fix typos in nvme status code values nvme-fabrics: remove unused argument nvme-multipath: avoid crash on invalid subsystem cntlid enumeration nvme-fc: use separate work queue to avoid warning nvme-rdma: remove redundant reference between ib_device and tagset nvme-pci: mark expected switch fall-through nvme-pci: add known admin effects to augument admin effects log page nvme-pci: init shadow doorbell after each reset brd: add cond_resched to brd_free_pages sata_rcar: Remove ata_host_alloc() error printing s390/dasd: fix build warning in dasd_eckd_build_cp_raw lightnvm: pblk: use nvm_rq_to_ppa_list() lightnvm: pblk: simplify partial read path lightnvm: do not remove instance under global lock lightnvm: track inflight target creations lightnvm: pblk: recover only written metadata ...
Diffstat (limited to 'drivers/lightnvm/pblk-read.c')
-rw-r--r--drivers/lightnvm/pblk-read.c394
1 files changed, 108 insertions, 286 deletions
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 0b7d5fb4548d..d98ea392fe33 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -26,8 +26,7 @@
* issued.
*/
static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
- sector_t lba, struct ppa_addr ppa,
- int bio_iter, bool advanced_bio)
+ sector_t lba, struct ppa_addr ppa)
{
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Callers must ensure that the ppa points to a cache address */
@@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa,
- bio_iter, advanced_bio);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
}
-static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
+static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct bio *bio, sector_t blba,
- unsigned long *read_bitmap)
+ bool *from_cache)
{
void *meta_list = rqd->meta_list;
- struct ppa_addr ppas[NVM_MAX_VLBA];
- int nr_secs = rqd->nr_ppas;
- bool advanced_bio = false;
- int i, j = 0;
+ int nr_secs, i;
- pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs);
+retry:
+ nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
+ from_cache);
+
+ if (!*from_cache)
+ goto end;
for (i = 0; i < nr_secs; i++) {
- struct ppa_addr p = ppas[i];
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
sector_t lba = blba + i;
-retry:
- if (pblk_ppa_empty(p)) {
+ if (pblk_ppa_empty(rqd->ppa_list[i])) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- WARN_ON(test_and_set_bit(i, read_bitmap));
meta->lba = addr_empty;
-
- if (unlikely(!advanced_bio)) {
- bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
- advanced_bio = true;
+ } else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
+ /*
+ * Try to read from write buffer. The address is later
+ * checked on the write buffer to prevent retrieving
+ * overwritten data.
+ */
+ if (!pblk_read_from_cache(pblk, bio, lba,
+ rqd->ppa_list[i])) {
+ if (i == 0) {
+ /*
+ * We didn't call with bio_advance()
+ * yet, so we can just retry.
+ */
+ goto retry;
+ } else {
+ /*
+ * We already call bio_advance()
+ * so we cannot retry and we need
+ * to quit that function in order
+ * to allow caller to handle the bio
+ * splitting in the current sector
+ * position.
+ */
+ nr_secs = i;
+ goto end;
+ }
}
-
- goto next;
- }
-
- /* Try to read from write buffer. The address is later checked
- * on the write buffer to prevent retrieving overwritten data.
- */
- if (pblk_addr_in_cache(p)) {
- if (!pblk_read_from_cache(pblk, bio, lba, p, i,
- advanced_bio)) {
- pblk_lookup_l2p_seq(pblk, &p, lba, 1);
- goto retry;
- }
- WARN_ON(test_and_set_bit(i, read_bitmap));
meta->lba = cpu_to_le64(lba);
- advanced_bio = true;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
#endif
- } else {
- /* Read from media non-cached sectors */
- rqd->ppa_list[j++] = p;
}
-
-next:
- if (advanced_bio)
- bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
+ bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+end:
if (pblk_io_aligned(pblk, nr_secs))
rqd->is_seq = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
+
+ return nr_secs;
}
@@ -175,12 +176,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
}
-static void pblk_end_user_read(struct bio *bio)
+static void pblk_end_user_read(struct bio *bio, int error)
{
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
-#endif
- bio_endio(bio);
+ if (error && error != NVM_RSP_WARN_HIGHECC)
+ bio_io_error(bio);
+ else
+ bio_endio(bio);
}
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
@@ -197,9 +198,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
pblk_log_read_err(pblk, rqd);
pblk_read_check_seq(pblk, rqd, r_ctx->lba);
-
- if (int_bio)
- bio_put(int_bio);
+ bio_put(int_bio);
if (put_line)
pblk_rq_to_line_put(pblk, rqd);
@@ -219,188 +218,17 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = (struct bio *)r_ctx->private;
- pblk_end_user_read(bio);
+ pblk_end_user_read(bio, rqd->error);
__pblk_end_io_read(pblk, rqd, true);
}
-static void pblk_end_partial_read(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct pblk_pr_ctx *pr_ctx = r_ctx->private;
- struct pblk_sec_meta *meta;
- struct bio *new_bio = rqd->bio;
- struct bio *bio = pr_ctx->orig_bio;
- void *meta_list = rqd->meta_list;
- unsigned long *read_bitmap = pr_ctx->bitmap;
- struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
- struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
- int nr_secs = pr_ctx->orig_nr_secs;
- int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
- void *src_p, *dst_p;
- int bit, i;
-
- if (unlikely(nr_holes == 1)) {
- struct ppa_addr ppa;
-
- ppa = rqd->ppa_addr;
- rqd->ppa_list = pr_ctx->ppa_ptr;
- rqd->dma_ppa_list = pr_ctx->dma_ppa_list;
- rqd->ppa_list[0] = ppa;
- }
-
- for (i = 0; i < nr_secs; i++) {
- meta = pblk_get_meta(pblk, meta_list, i);
- pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba);
- meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]);
- }
-
- /* Fill the holes in the original bio */
- i = 0;
- for (bit = 0; bit < nr_secs; bit++) {
- if (!test_bit(bit, read_bitmap)) {
- struct bio_vec dst_bv, src_bv;
- struct pblk_line *line;
-
- line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
- kref_put(&line->ref, pblk_line_put);
-
- meta = pblk_get_meta(pblk, meta_list, bit);
- meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
-
- dst_bv = bio_iter_iovec(bio, orig_iter);
- src_bv = bio_iter_iovec(new_bio, new_iter);
-
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
-
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- PBLK_EXPOSED_PAGE_SIZE);
-
- kunmap_atomic(src_p);
- kunmap_atomic(dst_p);
-
- flush_dcache_page(dst_bv.bv_page);
- mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
-
- bio_advance_iter(new_bio, &new_iter,
- PBLK_EXPOSED_PAGE_SIZE);
- i++;
- }
- bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
- }
-
- bio_put(new_bio);
- kfree(pr_ctx);
-
- /* restore original request */
- rqd->bio = NULL;
- rqd->nr_ppas = nr_secs;
-
- bio_endio(bio);
- __pblk_end_io_read(pblk, rqd, false);
-}
-
-static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int bio_init_idx,
- unsigned long *read_bitmap,
- int nr_holes)
-{
- void *meta_list = rqd->meta_list;
- struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct pblk_pr_ctx *pr_ctx;
- struct bio *new_bio, *bio = r_ctx->private;
- int nr_secs = rqd->nr_ppas;
- int i;
-
- new_bio = bio_alloc(GFP_KERNEL, nr_holes);
-
- if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
- goto fail_bio_put;
-
- if (nr_holes != new_bio->bi_vcnt) {
- WARN_ONCE(1, "pblk: malformed bio\n");
- goto fail_free_pages;
- }
-
- pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
- if (!pr_ctx)
- goto fail_free_pages;
-
- for (i = 0; i < nr_secs; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
-
- pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba);
- }
-
- new_bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
-
- rqd->bio = new_bio;
- rqd->nr_ppas = nr_holes;
-
- pr_ctx->orig_bio = bio;
- bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
- pr_ctx->bio_init_idx = bio_init_idx;
- pr_ctx->orig_nr_secs = nr_secs;
- r_ctx->private = pr_ctx;
-
- if (unlikely(nr_holes == 1)) {
- pr_ctx->ppa_ptr = rqd->ppa_list;
- pr_ctx->dma_ppa_list = rqd->dma_ppa_list;
- rqd->ppa_addr = rqd->ppa_list[0];
- }
- return 0;
-
-fail_free_pages:
- pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
-fail_bio_put:
- bio_put(new_bio);
-
- return -ENOMEM;
-}
-
-static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int bio_init_idx,
- unsigned long *read_bitmap, int nr_secs)
-{
- int nr_holes;
- int ret;
-
- nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
-
- if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap,
- nr_holes))
- return NVM_IO_ERR;
-
- rqd->end_io = pblk_end_partial_read;
-
- ret = pblk_submit_io(pblk, rqd);
- if (ret) {
- bio_put(rqd->bio);
- pblk_err(pblk, "partial read IO submission failed\n");
- goto err;
- }
-
- return NVM_IO_OK;
-
-err:
- pblk_err(pblk, "failed to perform partial read\n");
-
- /* Free allocated pages in new bio */
- pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt);
- __pblk_end_io_read(pblk, rqd, false);
- return NVM_IO_ERR;
-}
-
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
- sector_t lba, unsigned long *read_bitmap)
+ sector_t lba, bool *from_cache)
{
struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
struct ppa_addr ppa;
- pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+ pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->inflight_reads);
@@ -410,7 +238,6 @@ retry:
if (pblk_ppa_empty(ppa)) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- WARN_ON(test_and_set_bit(0, read_bitmap));
meta->lba = addr_empty;
return;
}
@@ -419,12 +246,11 @@ retry:
* write buffer to prevent retrieving overwritten data.
*/
if (pblk_addr_in_cache(ppa)) {
- if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) {
- pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
+ if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
+ pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
goto retry;
}
- WARN_ON(test_and_set_bit(0, read_bitmap));
meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG
@@ -435,95 +261,92 @@ retry:
}
}
-int pblk_submit_read(struct pblk *pblk, struct bio *bio)
+void pblk_submit_read(struct pblk *pblk, struct bio *bio)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct request_queue *q = dev->q;
sector_t blba = pblk_get_lba(bio);
unsigned int nr_secs = pblk_get_secs(bio);
+ bool from_cache;
struct pblk_g_ctx *r_ctx;
struct nvm_rq *rqd;
- unsigned int bio_init_idx;
- DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA);
- int ret = NVM_IO_ERR;
+ struct bio *int_bio, *split_bio;
generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
&pblk->disk->part0);
- bitmap_zero(read_bitmap, nr_secs);
-
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
rqd->opcode = NVM_OP_PREAD;
rqd->nr_ppas = nr_secs;
- rqd->bio = NULL; /* cloned bio if needed */
rqd->private = pblk;
rqd->end_io = pblk_end_io_read;
r_ctx = nvm_rq_to_pdu(rqd);
r_ctx->start_time = jiffies;
r_ctx->lba = blba;
- r_ctx->private = bio; /* original bio */
- /* Save the index for this bio's start. This is needed in case
- * we need to fill a partial read.
- */
- bio_init_idx = pblk_get_bi_idx(bio);
+ if (pblk_alloc_rqd_meta(pblk, rqd)) {
+ bio_io_error(bio);
+ pblk_free_rqd(pblk, rqd, PBLK_READ);
+ return;
+ }
- if (pblk_alloc_rqd_meta(pblk, rqd))
- goto fail_rqd_free;
+ /* Clone read bio to deal internally with:
+ * -read errors when reading from drive
+ * -bio_advance() calls during cache reads
+ */
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
if (nr_secs > 1)
- pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap);
+ nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
+ &from_cache);
else
- pblk_read_rq(pblk, rqd, bio, blba, read_bitmap);
+ pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
- if (bitmap_full(read_bitmap, nr_secs)) {
+split_retry:
+ r_ctx->private = bio; /* original bio */
+ rqd->bio = int_bio; /* internal bio */
+
+ if (from_cache && nr_secs == rqd->nr_ppas) {
+ /* All data was read from cache, we can complete the IO. */
+ pblk_end_user_read(bio, 0);
atomic_inc(&pblk->inflight_io);
__pblk_end_io_read(pblk, rqd, false);
- return NVM_IO_DONE;
- }
-
- /* All sectors are to be read from the device */
- if (bitmap_empty(read_bitmap, rqd->nr_ppas)) {
- struct bio *int_bio = NULL;
+ } else if (nr_secs != rqd->nr_ppas) {
+ /* The read bio request could be partially filled by the write
+ * buffer, but there are some holes that need to be read from
+ * the drive. In order to handle this, we will use block layer
+ * mechanism to split this request in to smaller ones and make
+ * a chain of it.
+ */
+ split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
+ &pblk_bio_set);
+ bio_chain(split_bio, bio);
+ generic_make_request(bio);
+
+ /* New bio contains first N sectors of the previous one, so
+ * we can continue to use existing rqd, but we need to shrink
+ * the number of PPAs in it. New bio is also guaranteed that
+ * it contains only either data from cache or from drive, newer
+ * mix of them.
+ */
+ bio = split_bio;
+ rqd->nr_ppas = nr_secs;
+ if (rqd->nr_ppas == 1)
+ rqd->ppa_addr = rqd->ppa_list[0];
- /* Clone read bio to deal with read errors internally */
+ /* Recreate int_bio - existing might have some needed internal
+ * fields modified already.
+ */
+ bio_put(int_bio);
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
- if (!int_bio) {
- pblk_err(pblk, "could not clone read bio\n");
- goto fail_end_io;
- }
-
- rqd->bio = int_bio;
-
- if (pblk_submit_io(pblk, rqd)) {
- pblk_err(pblk, "read IO submission failed\n");
- ret = NVM_IO_ERR;
- goto fail_end_io;
- }
-
- return NVM_IO_OK;
+ goto split_retry;
+ } else if (pblk_submit_io(pblk, rqd)) {
+ /* Submitting IO to drive failed, let's report an error */
+ rqd->error = -ENODEV;
+ pblk_end_io_read(rqd);
}
-
- /* The read bio request could be partially filled by the write buffer,
- * but there are some holes that need to be read from the drive.
- */
- ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, read_bitmap,
- nr_secs);
- if (ret)
- goto fail_meta_free;
-
- return NVM_IO_OK;
-
-fail_meta_free:
- nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
-fail_rqd_free:
- pblk_free_rqd(pblk, rqd, PBLK_READ);
- return ret;
-fail_end_io:
- __pblk_end_io_read(pblk, rqd, false);
- return ret;
}
static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
@@ -568,7 +391,7 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
goto out;
/* logic error: lba out-of-bounds */
- if (lba >= pblk->rl.nr_secs) {
+ if (lba >= pblk->capacity) {
WARN(1, "pblk: read lba out of bounds\n");
goto out;
}
@@ -642,7 +465,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
if (pblk_submit_io_sync(pblk, &rqd)) {
ret = -EIO;
- pblk_err(pblk, "GC read request failed\n");
goto err_free_bio;
}