From 3d0e63754fa47d65edff172c1156f44b6fca5ca1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 6 Aug 2018 16:32:16 -0700 Subject: drbd: Convert from ahash to shash In preparing to remove all stack VLA usage from the kernel[1], this removes the discouraged use of AHASH_REQUEST_ON_STACK in favor of the smaller SHASH_DESC_ON_STACK by converting from ahash-wrapped-shash to direct shash. By removing a layer of indirection this both improves performance and reduces stack usage. The stack allocation will be made a fixed size in a later patch to the crypto subsystem. The bulk of the lines in this change are simple s/ahash/shash/, but the main logic differences are in drbd_csum_ee() and drbd_csum_bio(), which externalizes the page walking with k(un)map_atomic() instead of using scattergather. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Acked-by: Lars Ellenberg Signed-off-by: Kees Cook Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 13 ++++---- drivers/block/drbd/drbd_main.c | 14 ++++----- drivers/block/drbd/drbd_nl.c | 39 ++++++++--------------- drivers/block/drbd/drbd_receiver.c | 35 +++++++++++---------- drivers/block/drbd/drbd_worker.c | 63 +++++++++++++++++++------------------- 5 files changed, 76 insertions(+), 88 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e35a234b0a8f..8a4c1328e6f9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -724,10 +724,10 @@ struct drbd_connection { struct list_head transfer_log; /* all requests not yet fully processed */ struct crypto_shash *cram_hmac_tfm; - struct crypto_ahash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */ - struct crypto_ahash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */ - struct crypto_ahash *csums_tfm; - struct crypto_ahash *verify_tfm; + struct crypto_shash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */ + struct crypto_shash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */ + struct crypto_shash *csums_tfm; + struct crypto_shash *verify_tfm; void *int_dig_in; void *int_dig_vv; @@ -1531,8 +1531,9 @@ static inline void ov_out_of_sync_print(struct drbd_device *device) } -extern void drbd_csum_bio(struct crypto_ahash *, struct bio *, void *); -extern void drbd_csum_ee(struct crypto_ahash *, struct drbd_peer_request *, void *); +extern void drbd_csum_bio(struct crypto_shash *, struct bio *, void *); +extern void drbd_csum_ee(struct crypto_shash *, struct drbd_peer_request *, + void *); /* worker callbacks */ extern int w_e_end_data_req(struct drbd_work *, int); extern int w_e_end_rsdata_req(struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ef8212a4b73e..15a9ffce9012 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1377,7 +1377,7 @@ void drbd_send_ack_dp(struct drbd_peer_device *peer_device, enum drbd_packet cmd struct p_data *dp, int data_size) { if (peer_device->connection->peer_integrity_tfm) - data_size -= crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm); + data_size -= crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); _drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } @@ -1690,7 +1690,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * sock = &peer_device->connection->data; p = drbd_prepare_command(peer_device, sock); digest_size = peer_device->connection->integrity_tfm ? - crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0; + crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -1796,7 +1796,7 @@ int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd, p = drbd_prepare_command(peer_device, sock); digest_size = peer_device->connection->integrity_tfm ? - crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0; + crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0; if (!p) return -EIO; @@ -2557,11 +2557,11 @@ void conn_free_crypto(struct drbd_connection *connection) { drbd_free_sock(connection); - crypto_free_ahash(connection->csums_tfm); - crypto_free_ahash(connection->verify_tfm); + crypto_free_shash(connection->csums_tfm); + crypto_free_shash(connection->verify_tfm); crypto_free_shash(connection->cram_hmac_tfm); - crypto_free_ahash(connection->integrity_tfm); - crypto_free_ahash(connection->peer_integrity_tfm); + crypto_free_shash(connection->integrity_tfm); + crypto_free_shash(connection->peer_integrity_tfm); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b4f02768ba47..d15703b1ffe8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2303,10 +2303,10 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c } struct crypto { - struct crypto_ahash *verify_tfm; - struct crypto_ahash *csums_tfm; + struct crypto_shash *verify_tfm; + struct crypto_shash *csums_tfm; struct crypto_shash *cram_hmac_tfm; - struct crypto_ahash *integrity_tfm; + struct crypto_shash *integrity_tfm; }; static int @@ -2324,36 +2324,21 @@ alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg) return NO_ERROR; } -static int -alloc_ahash(struct crypto_ahash **tfm, char *tfm_name, int err_alg) -{ - if (!tfm_name[0]) - return NO_ERROR; - - *tfm = crypto_alloc_ahash(tfm_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(*tfm)) { - *tfm = NULL; - return err_alg; - } - - return NO_ERROR; -} - static enum drbd_ret_code alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) { char hmac_name[CRYPTO_MAX_ALG_NAME]; enum drbd_ret_code rv; - rv = alloc_ahash(&crypto->csums_tfm, new_net_conf->csums_alg, + rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg, ERR_CSUMS_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_ahash(&crypto->verify_tfm, new_net_conf->verify_alg, + rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg, ERR_VERIFY_ALG); if (rv != NO_ERROR) return rv; - rv = alloc_ahash(&crypto->integrity_tfm, new_net_conf->integrity_alg, + rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg, ERR_INTEGRITY_ALG); if (rv != NO_ERROR) return rv; @@ -2371,9 +2356,9 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf) static void free_crypto(struct crypto *crypto) { crypto_free_shash(crypto->cram_hmac_tfm); - crypto_free_ahash(crypto->integrity_tfm); - crypto_free_ahash(crypto->csums_tfm); - crypto_free_ahash(crypto->verify_tfm); + crypto_free_shash(crypto->integrity_tfm); + crypto_free_shash(crypto->csums_tfm); + crypto_free_shash(crypto->verify_tfm); } int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) @@ -2450,17 +2435,17 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(connection->net_conf, new_net_conf); if (!rsr) { - crypto_free_ahash(connection->csums_tfm); + crypto_free_shash(connection->csums_tfm); connection->csums_tfm = crypto.csums_tfm; crypto.csums_tfm = NULL; } if (!ovr) { - crypto_free_ahash(connection->verify_tfm); + crypto_free_shash(connection->verify_tfm); connection->verify_tfm = crypto.verify_tfm; crypto.verify_tfm = NULL; } - crypto_free_ahash(connection->integrity_tfm); + crypto_free_shash(connection->integrity_tfm); connection->integrity_tfm = crypto.integrity_tfm; if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100) /* Do this without trying to take connection->data.mutex again. */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 75f6b47169e6..fc67fd853375 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1732,7 +1732,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf } /* quick wrapper in case payload size != request_size (write same) */ -static void drbd_csum_ee_size(struct crypto_ahash *h, +static void drbd_csum_ee_size(struct crypto_shash *h, struct drbd_peer_request *r, void *d, unsigned int payload_size) { @@ -1769,7 +1769,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, digest_size = 0; if (!trim && peer_device->connection->peer_integrity_tfm) { - digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); /* * FIXME: Receive the incoming digest into the receive buffer * here, together with its struct p_data? @@ -1905,7 +1905,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req digest_size = 0; if (peer_device->connection->peer_integrity_tfm) { - digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm); err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size); if (err) return err; @@ -3542,7 +3542,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in int p_proto, p_discard_my_data, p_two_primaries, cf; struct net_conf *nc, *old_net_conf, *new_net_conf = NULL; char integrity_alg[SHARED_SECRET_MAX] = ""; - struct crypto_ahash *peer_integrity_tfm = NULL; + struct crypto_shash *peer_integrity_tfm = NULL; void *int_dig_in = NULL, *int_dig_vv = NULL; p_proto = be32_to_cpu(p->protocol); @@ -3623,7 +3623,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in * change. */ - peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC); + peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(peer_integrity_tfm)) { peer_integrity_tfm = NULL; drbd_err(connection, "peer data-integrity-alg %s not supported\n", @@ -3631,7 +3631,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in goto disconnect; } - hash_size = crypto_ahash_digestsize(peer_integrity_tfm); + hash_size = crypto_shash_digestsize(peer_integrity_tfm); int_dig_in = kmalloc(hash_size, GFP_KERNEL); int_dig_vv = kmalloc(hash_size, GFP_KERNEL); if (!(int_dig_in && int_dig_vv)) { @@ -3661,7 +3661,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in mutex_unlock(&connection->resource->conf_update); mutex_unlock(&connection->data.mutex); - crypto_free_ahash(connection->peer_integrity_tfm); + crypto_free_shash(connection->peer_integrity_tfm); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); connection->peer_integrity_tfm = peer_integrity_tfm; @@ -3679,7 +3679,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in disconnect_rcu_unlock: rcu_read_unlock(); disconnect: - crypto_free_ahash(peer_integrity_tfm); + crypto_free_shash(peer_integrity_tfm); kfree(int_dig_in); kfree(int_dig_vv); conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); @@ -3691,15 +3691,16 @@ disconnect: * return: NULL (alg name was "") * ERR_PTR(error) if something goes wrong * or the crypto hash ptr, if it worked out ok. */ -static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, +static struct crypto_shash *drbd_crypto_alloc_digest_safe( + const struct drbd_device *device, const char *alg, const char *name) { - struct crypto_ahash *tfm; + struct crypto_shash *tfm; if (!alg[0]) return NULL; - tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_shash(alg, 0, 0); if (IS_ERR(tfm)) { drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n", alg, name, PTR_ERR(tfm)); @@ -3752,8 +3753,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i struct drbd_device *device; struct p_rs_param_95 *p; unsigned int header_size, data_size, exp_max_sz; - struct crypto_ahash *verify_tfm = NULL; - struct crypto_ahash *csums_tfm = NULL; + struct crypto_shash *verify_tfm = NULL; + struct crypto_shash *csums_tfm = NULL; struct net_conf *old_net_conf, *new_net_conf = NULL; struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL; const int apv = connection->agreed_pro_version; @@ -3900,14 +3901,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i if (verify_tfm) { strcpy(new_net_conf->verify_alg, p->verify_alg); new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1; - crypto_free_ahash(peer_device->connection->verify_tfm); + crypto_free_shash(peer_device->connection->verify_tfm); peer_device->connection->verify_tfm = verify_tfm; drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { strcpy(new_net_conf->csums_alg, p->csums_alg); new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1; - crypto_free_ahash(peer_device->connection->csums_tfm); + crypto_free_shash(peer_device->connection->csums_tfm); peer_device->connection->csums_tfm = csums_tfm; drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg); } @@ -3951,9 +3952,9 @@ disconnect: mutex_unlock(&connection->resource->conf_update); /* just for completeness: actually not needed, * as this is not reached if csums_tfm was ok. */ - crypto_free_ahash(csums_tfm); + crypto_free_shash(csums_tfm); /* but free the verify_tfm again, if csums_tfm did not work out */ - crypto_free_ahash(verify_tfm); + crypto_free_shash(verify_tfm); conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); return -EIO; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b8f77e83d456..c7ef48efe871 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -295,60 +295,61 @@ void drbd_request_endio(struct bio *bio) complete_master_bio(device, &m); } -void drbd_csum_ee(struct crypto_ahash *tfm, struct drbd_peer_request *peer_req, void *digest) +void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest) { - AHASH_REQUEST_ON_STACK(req, tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, tfm); struct page *page = peer_req->pages; struct page *tmp; unsigned len; + void *src; - ahash_request_set_tfm(req, tfm); - ahash_request_set_callback(req, 0, NULL, NULL); + desc->tfm = tfm; + desc->flags = 0; - sg_init_table(&sg, 1); - crypto_ahash_init(req); + crypto_shash_init(desc); + src = kmap_atomic(page); while ((tmp = page_chain_next(page))) { /* all but the last page will be fully used */ - sg_set_page(&sg, page, PAGE_SIZE, 0); - ahash_request_set_crypt(req, &sg, NULL, sg.length); - crypto_ahash_update(req); + crypto_shash_update(desc, src, PAGE_SIZE); + kunmap_atomic(src); page = tmp; + src = kmap_atomic(page); } /* and now the last, possibly only partially used page */ len = peer_req->i.size & (PAGE_SIZE - 1); - sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); - ahash_request_set_crypt(req, &sg, digest, sg.length); - crypto_ahash_finup(req); - ahash_request_zero(req); + crypto_shash_update(desc, src, len ?: PAGE_SIZE); + kunmap_atomic(src); + + crypto_shash_final(desc, digest); + shash_desc_zero(desc); } -void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest) +void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest) { - AHASH_REQUEST_ON_STACK(req, tfm); - struct scatterlist sg; + SHASH_DESC_ON_STACK(desc, tfm); struct bio_vec bvec; struct bvec_iter iter; - ahash_request_set_tfm(req, tfm); - ahash_request_set_callback(req, 0, NULL, NULL); + desc->tfm = tfm; + desc->flags = 0; - sg_init_table(&sg, 1); - crypto_ahash_init(req); + crypto_shash_init(desc); bio_for_each_segment(bvec, bio, iter) { - sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); - ahash_request_set_crypt(req, &sg, NULL, sg.length); - crypto_ahash_update(req); + u8 *src; + + src = kmap_atomic(bvec.bv_page); + crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len); + kunmap_atomic(src); + /* REQ_OP_WRITE_SAME has only one segment, * checksum the payload only once. */ if (bio_op(bio) == REQ_OP_WRITE_SAME) break; } - ahash_request_set_crypt(req, NULL, digest, 0); - crypto_ahash_final(req); - ahash_request_zero(req); + crypto_shash_final(desc, digest); + shash_desc_zero(desc); } /* MAYBE merge common code with w_e_end_ov_req */ @@ -367,7 +368,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; - digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { sector_t sector = peer_req->i.sector; @@ -1205,7 +1206,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) * a real fix would be much more involved, * introducing more locking mechanisms */ if (peer_device->connection->csums_tfm) { - digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm); D_ASSERT(device, digest_size == di->digest_size); digest = kmalloc(digest_size, GFP_NOIO); } @@ -1255,7 +1256,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (!digest) { err = 1; /* terminate the connection in case the allocation failed */ @@ -1327,7 +1328,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) di = peer_req->digest; if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm); + digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest); -- cgit From 7759eb23fd9808a2e4498cf36a798ed65cde78ae Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 5 Sep 2018 15:45:54 -0600 Subject: block: remove bio_rewind_iter() It is pointed that bio_rewind_iter() is one very bad API[1]: 1) bio size may not be restored after rewinding 2) it causes some bogus change, such as 5151842b9d8732 (block: reset bi_iter.bi_done after splitting bio) 3) rewinding really makes things complicated wrt. bio splitting 4) unnecessary updating of .bi_done in fast path [1] https://marc.info/?t=153549924200005&r=1&w=2 So this patch takes Kent's suggestion to restore one bio into its original state via saving bio iterator(struct bvec_iter) in bio_integrity_prep(), given now bio_rewind_iter() is only used by bio integrity code. Cc: Dmitry Monakhov Cc: Hannes Reinecke Suggested-by: Kent Overstreet Acked-by: Kent Overstreet Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/bio-integrity.c | 12 ++++-------- block/bio.c | 1 - include/linux/bio.h | 22 ++++------------------ include/linux/bvec.h | 3 --- 4 files changed, 8 insertions(+), 30 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 67b5fb861a51..290af497997b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio) if (bio_data_dir(bio) == WRITE) { bio_integrity_process(bio, &bio->bi_iter, bi->profile->generate_fn); + } else { + bip->bio_iter = bio->bi_iter; } return true; @@ -331,20 +333,14 @@ static void bio_integrity_verify_fn(struct work_struct *work) container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); - struct bvec_iter iter = bio->bi_iter; /* * At the moment verify is called bio's iterator was advanced * during split and completion, we need to rewind iterator to * it's original position. */ - if (bio_rewind_iter(bio, &iter, iter.bi_done)) { - bio->bi_status = bio_integrity_process(bio, &iter, - bi->profile->verify_fn); - } else { - bio->bi_status = BLK_STS_IOERR; - } - + bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, + bi->profile->verify_fn); bio_integrity_free(bio); bio_endio(bio); } diff --git a/block/bio.c b/block/bio.c index 8c680a776171..f685e762809d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1807,7 +1807,6 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); - bio->bi_iter.bi_done = 0; if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); diff --git a/include/linux/bio.h b/include/linux/bio.h index 51371740d2a8..14b4fa266357 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -170,27 +170,11 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, { iter->bi_sector += bytes >> 9; - if (bio_no_advance_iter(bio)) { + if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; - iter->bi_done += bytes; - } else { + else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ - } -} - -static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter, - unsigned int bytes) -{ - iter->bi_sector -= bytes >> 9; - - if (bio_no_advance_iter(bio)) { - iter->bi_size += bytes; - iter->bi_done -= bytes; - return true; - } - - return bvec_iter_rewind(bio->bi_io_vec, iter, bytes); } #define __bio_for_each_segment(bvl, bio, iter, start) \ @@ -353,6 +337,8 @@ struct bio_integrity_payload { unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ + struct bvec_iter bio_iter; /* for rewinding parent bio */ + struct work_struct bip_work; /* I/O completion */ struct bio_vec *bip_vec; diff --git a/include/linux/bvec.h b/include/linux/bvec.h index fe7a22dd133b..02c73c6aa805 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -40,8 +40,6 @@ struct bvec_iter { unsigned int bi_idx; /* current index into bvl_vec */ - unsigned int bi_done; /* number of bytes completed */ - unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ }; @@ -85,7 +83,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, bytes -= len; iter->bi_size -= len; iter->bi_bvec_done += len; - iter->bi_done += len; if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { iter->bi_bvec_done = 0; -- cgit From 902d53914f6492b85fcdc35643b28f952dd8d6cf Mon Sep 17 00:00:00 2001 From: jun qian Date: Fri, 7 Sep 2018 10:27:20 -0700 Subject: block: umem: replace spin_lock_bh with spin_lock in tasklet callback As you are already in a tasklet, it is unnecessary to call spin_lock_bh. Signed-off-by: jun qian Signed-off-by: Jens Axboe --- drivers/block/umem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 5c7fb8cc4149..9094ca60949c 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -421,7 +421,7 @@ static void process_page(unsigned long data) struct cardinfo *card = (struct cardinfo *)data; unsigned int dma_status = card->dma_status; - spin_lock_bh(&card->lock); + spin_lock(&card->lock); if (card->Active < 0) goto out_unlock; page = &card->mm_pages[card->Active]; @@ -496,7 +496,7 @@ static void process_page(unsigned long data) mm_start_io(card); } out_unlock: - spin_unlock_bh(&card->lock); + spin_unlock(&card->lock); while (return_bio) { struct bio *bio = return_bio; -- cgit From 798ef9e70110d0245797526c930beec7fded7b15 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 11 Sep 2018 14:50:40 -0700 Subject: rsxx: Remove unnecessary parentheses Clang warns when more than one set of parentheses is used for a single conditional statement: drivers/block/rsxx/cregs.c:279:15: warning: equality comparison with extraneous parentheses [-Wparentheses-equality] if ((cmd->op == CREG_OP_READ)) { ~~~~~~~~^~~~~~~~~~~~~~~ drivers/block/rsxx/cregs.c:279:15: note: remove extraneous parentheses around the comparison to silence this warning if ((cmd->op == CREG_OP_READ)) { ~ ^ ~ drivers/block/rsxx/cregs.c:279:15: note: use '=' to turn this equality comparison into an assignment if ((cmd->op == CREG_OP_READ)) { ^~ = 1 warning generated. Reported-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jens Axboe --- drivers/block/rsxx/cregs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c index c148e83e4ed7..d9a8758682c9 100644 --- a/drivers/block/rsxx/cregs.c +++ b/drivers/block/rsxx/cregs.c @@ -276,7 +276,7 @@ static void creg_cmd_done(struct work_struct *work) st = -EIO; } - if ((cmd->op == CREG_OP_READ)) { + if (cmd->op == CREG_OP_READ) { unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); /* Paranoid Sanity Checks */ -- cgit From f8c0d7b16fd9b97036306c6af4094d51e9f72278 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 14 Sep 2018 01:35:11 +0000 Subject: blk-iolatency: remove set but not used variables 'changed' and 'blkiolat' Fixes gcc '-Wunused-but-set-variable' warning: block/blk-iolatency.c: In function 'scale_change': block/blk-iolatency.c:301:7: warning: variable 'changed' set but not used [-Wunused-but-set-variable] block/blk-iolatency.c: In function 'iolatency_set_limit': block/blk-iolatency.c:765:24: warning: variable 'blkiolat' set but not used [-Wunused-but-set-variable] Signed-off-by: YueHaibing Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 19923f8a029d..a6f21527e6c7 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -298,7 +298,6 @@ static void scale_change(struct iolatency_grp *iolat, bool up) unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q); unsigned long scale = scale_amount(qd, up); unsigned long old = iolat->rq_depth.max_depth; - bool changed = false; if (old > qd) old = qd; @@ -308,7 +307,6 @@ static void scale_change(struct iolatency_grp *iolat, bool up) return; if (old < qd) { - changed = true; old += scale; old = min(old, qd); iolat->rq_depth.max_depth = old; @@ -316,7 +314,6 @@ static void scale_change(struct iolatency_grp *iolat, bool up) } } else if (old > 1) { old >>= 1; - changed = true; iolat->rq_depth.max_depth = max(old, 1UL); } } @@ -761,7 +758,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkcg_gq *blkg; - struct blk_iolatency *blkiolat; struct blkg_conf_ctx ctx; struct iolatency_grp *iolat; char *p, *tok; @@ -774,7 +770,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, return ret; iolat = blkg_to_lat(ctx.blkg); - blkiolat = iolat->blkiolat; p = ctx.body; ret = -EINVAL; -- cgit From cbeb869a3d1110450186b738199963c5e68c2a71 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 14 Sep 2018 16:23:07 +0200 Subject: block, bfq: correctly charge and reset entity service in all cases BFQ schedules entities (which represent either per-process queues or groups of queues) as a function of their timestamps. In particular, as a function of their (virtual) finish times. The finish time of an entity is computed as a function of the budget assigned to the entity, assuming, tentatively, that the entity, once in service, will receive an amount of service equal to its budget. Then, when the entity is expired because it finishes to be served, this finish time is updated as a function of the actual service received by the entity. This allows the entity to be correctly charged with only the service received, and then to be correctly re-scheduled. Yet an entity may receive service also while not being the entity in service (in the scheduling environment of its parent entity), for several reasons. If the entity remains with no backlog while receiving this 'unofficial' service, then it is expired. Also on such an expiration, the finish time of the entity should be updated to account for only the service actually received by the entity. Unfortunately, such an update is not performed for an entity expiring without being the entity in service. In a similar vein, the service counter of the entity in service is reset when the entity is expired, to be ready to be used for next service cycle. This reset too should be performed also in case an entity is expired because it remains empty after receiving service while not being the entity in service. But in this case the reset is not performed. This commit performs the above update of the finish time and reset of the service received, also for an entity expiring while not being the entity in service. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-wf2q.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ae52bff43ce4..ff7c2d470bb8 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1181,10 +1181,17 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; - if (is_in_service) { - bfq_calc_finish(entity, entity->service); + bfq_calc_finish(entity, entity->service); + + if (is_in_service) sd->in_service_entity = NULL; - } + else + /* + * Non in-service entity: nobody will take care of + * resetting its service counter on expiration. Do it + * now. + */ + entity->service = 0; if (entity->tree == &st->active) bfq_active_extract(st, entity); -- cgit From d0edc2473be9d70f999282e1ca7863ad6ae704dc Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 14 Sep 2018 16:23:08 +0200 Subject: block, bfq: inject other-queue I/O into seeky idle queues on NCQ flash The Achilles' heel of BFQ is its failing to reach a high throughput with sync random I/O on flash storage with internal queueing, in case the processes doing I/O have differentiated weights. The cause of this failure is as follows. If at least two processes do sync I/O, and have a different weight from each other, then BFQ plugs I/O dispatching every time one of these processes, while it is being served, remains temporarily without pending I/O requests. This plugging is necessary to guarantee that every process enjoys a bandwidth proportional to its weight; but it empties the internal queue(s) of the drive. And this kills throughput with random I/O. So, if some processes have differentiated weights and do both sync and random I/O, the end result is a throughput collapse. This commit tries to counter this problem by injecting the service of other processes, in a controlled way, while the process in service happens to have no I/O. This injection is performed only if the medium is non rotational and performs internal queueing, and the process in service does random I/O (service injection might be beneficial for sequential I/O too, we'll work on that). As an example of the benefits of this commit, on a PLEXTOR PX-256M5S SSD, and with five processes having differentiated weights and doing sync random 4KB I/O, this commit makes the throughput with bfq grow by 400%, from 25 to 100MB/s. This higher throughput is 10MB/s lower than that reached with none. As some less random I/O is added to the mix, the throughput becomes equal to or higher than that with none. This commit is a very first attempt to recover throughput without losing control, and certainly has many limitations. One is, e.g., that the processes whose service is injected are not chosen so as to distribute the extra bandwidth they receive in accordance to their weights. Thus there might be loss of weighted fairness in some cases. Anyway, this loss concerns extra service, which would not have been received at all without this commit. Other limitations and issues will probably show up with usage. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++----- block/bfq-iosched.h | 26 ++++++++++++++++++++ 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 653100fb719e..d94838bcc135 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3182,6 +3182,13 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } +static bool bfq_bfqq_injectable(struct bfq_queue *bfqq) +{ + return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && + blk_queue_nonrot(bfqq->bfqd->queue) && + bfqq->bfqd->hw_tag; +} + /** * bfq_bfqq_expire - expire a queue. * @bfqd: device owning the queue. @@ -3291,6 +3298,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, if (ref == 1) /* bfqq is gone, no more actions on it */ return; + bfqq->injected_service = 0; + /* mark bfqq as waiting a request only if a bic still points to it */ if (!bfq_bfqq_busy(bfqq) && reason != BFQQE_BUDGET_TIMEOUT && @@ -3629,6 +3638,30 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq); } +static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) +{ + struct bfq_queue *bfqq; + + /* + * A linear search; but, with a high probability, very few + * steps are needed to find a candidate queue, i.e., a queue + * with enough budget left for its next request. In fact: + * - BFQ dynamically updates the budget of every queue so as + * to accommodate the expected backlog of the queue; + * - if a queue gets all its requests dispatched as injected + * service, then the queue is removed from the active list + * (and re-added only if it gets new requests, but with + * enough budget for its new backlog). + */ + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) + if (!RB_EMPTY_ROOT(&bfqq->sort_list) && + bfq_serv_to_charge(bfqq->next_rq, bfqq) <= + bfq_bfqq_budget_left(bfqq)) + return bfqq; + + return NULL; +} + /* * Select a queue for service. If we have a current queue in service, * check whether to continue servicing it, or retrieve and set a new one. @@ -3710,10 +3743,19 @@ check_queue: * No requests pending. However, if the in-service queue is idling * for a new request, or has requests waiting for a completion and * may idle after their completion, then keep it anyway. + * + * Yet, to boost throughput, inject service from other queues if + * possible. */ if (bfq_bfqq_wait_request(bfqq) || (bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) { - bfqq = NULL; + if (bfq_bfqq_injectable(bfqq) && + bfqq->injected_service * bfqq->inject_coeff < + bfqq->entity.service * 10) + bfqq = bfq_choose_bfqq_for_injection(bfqd); + else + bfqq = NULL; + goto keep_queue; } @@ -3803,6 +3845,14 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, bfq_dispatch_remove(bfqd->queue, rq); + if (bfqq != bfqd->in_service_queue) { + if (likely(bfqd->in_service_queue)) + bfqd->in_service_queue->injected_service += + bfq_serv_to_charge(rq, bfqq); + + goto return_rq; + } + /* * If weight raising has to terminate for bfqq, then next * function causes an immediate update of bfqq's weight, @@ -3821,13 +3871,12 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, * belongs to CLASS_IDLE and other queues are waiting for * service. */ - if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq)) - goto expire; - - return rq; + if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq))) + goto return_rq; -expire: bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED); + +return_rq: return rq; } @@ -4232,6 +4281,13 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_mark_bfqq_has_short_ttime(bfqq); bfq_mark_bfqq_sync(bfqq); bfq_mark_bfqq_just_created(bfqq); + /* + * Aggressively inject a lot of service: up to 90%. + * This coefficient remains constant during bfqq life, + * but this behavior might be changed, after enough + * testing and tuning. + */ + bfqq->inject_coeff = 1; } else bfq_clear_bfqq_sync(bfqq); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index a8a2e5aca4d4..37d627afdc2e 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -351,6 +351,32 @@ struct bfq_queue { unsigned long split_time; /* time of last split */ unsigned long first_IO_time; /* time of first I/O for this queue */ + + /* max service rate measured so far */ + u32 max_service_rate; + /* + * Ratio between the service received by bfqq while it is in + * service, and the cumulative service (of requests of other + * queues) that may be injected while bfqq is empty but still + * in service. To increase precision, the coefficient is + * measured in tenths of unit. Here are some example of (1) + * ratios, (2) resulting percentages of service injected + * w.r.t. to the total service dispatched while bfqq is in + * service, and (3) corresponding values of the coefficient: + * 1 (50%) -> 10 + * 2 (33%) -> 20 + * 10 (9%) -> 100 + * 9.9 (9%) -> 99 + * 1.5 (40%) -> 15 + * 0.5 (66%) -> 5 + * 0.1 (90%) -> 1 + * + * So, if the coefficient is lower than 10, then + * injected service is more than bfqq service. + */ + unsigned int inject_coeff; + /* amount of service injected in current service slot */ + unsigned int injected_service; }; /** -- cgit From c8765de0adfcaaf4ffb2d951e07444f00ffa9453 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 14 Sep 2018 16:23:09 +0200 Subject: blok, bfq: do not plug I/O if all queues are weight-raised To reduce latency for interactive and soft real-time applications, bfq privileges the bfq_queues containing the I/O of these applications. These privileged queues, referred-to as weight-raised queues, get a much higher share of the device throughput w.r.t. non-privileged queues. To preserve this higher share, the I/O of any non-weight-raised queue must be plugged whenever a sync weight-raised queue, while being served, remains temporarily empty. To attain this goal, bfq simply plugs any I/O (from any queue), if a sync weight-raised queue remains empty while in service. Unfortunately, this plugging typically lowers throughput with random I/O, on devices with internal queueing (because it reduces the filling level of the internal queues of the device). This commit addresses this issue by restricting the cases where plugging is performed: if a sync weight-raised queue remains empty while in service, then I/O plugging is performed only if some of the active bfq_queues are *not* weight-raised (which is actually the only circumstance where plugging is needed to preserve the higher share of the throughput of weight-raised queues). This restriction proved able to boost throughput in really many use cases needing only maximum throughput. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index d94838bcc135..c0b1db3afb81 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3580,7 +3580,12 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * whether bfqq is being weight-raised, because * bfq_symmetric_scenario() does not take into account also * weight-raised queues (see comments on - * bfq_weights_tree_add()). + * bfq_weights_tree_add()). In particular, if bfqq is being + * weight-raised, it is important to idle only if there are + * other, non-weight-raised queues that may steal throughput + * to bfqq. Actually, we should be even more precise, and + * differentiate between interactive weight raising and + * soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -3592,7 +3597,8 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * to let requests be served in the desired order until all * the requests already queued in the device have been served. */ - asymmetric_scenario = bfqq->wr_coeff > 1 || + asymmetric_scenario = (bfqq->wr_coeff > 1 && + bfqd->wr_busy_queues < bfqd->busy_queues) || !bfq_symmetric_scenario(bfqd); /* -- cgit From 576ed9135489c723fb39b97c4e2c73428d06dd78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Sep 2018 08:28:21 +0200 Subject: block: use bio_add_page in bio_iov_iter_get_pages Replace a nasty hack with a different nasty hack to prepare for multipage bio_vecs. By moving the temporary page array as far up as possible in the space allocated for the bio_vec array we can iterate forward over it and thus use bio_add_page. Using bio_add_page means we'll be able to merge physically contiguous pages once support for multipath bio_vecs is merged. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/block/bio.c b/block/bio.c index f685e762809d..81c05ee51d6c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -827,6 +827,8 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) + /** * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio * @bio: bio to add pages to @@ -839,38 +841,35 @@ EXPORT_SYMBOL(bio_add_page); */ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; + ssize_t size, left; + unsigned len, i; size_t offset; - ssize_t size; + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); + pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; - idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; - /* - * Deep magic below: We need to walk the pinned pages backwards - * because we are abusing the space allocated for the bio_vecs - * for the page array. Because the bio_vecs are larger than the - * page pointers by definition this will always work. But it also - * means we can't use bio_add_page, so any changes to it's semantics - * need to be reflected here as well. - */ - bio->bi_iter.bi_size += size; - bio->bi_vcnt += nr_pages; + for (left = size, i = 0; left > 0; left -= len, i++) { + struct page *page = pages[i]; - while (idx--) { - bv[idx].bv_page = pages[idx]; - bv[idx].bv_len = PAGE_SIZE; - bv[idx].bv_offset = 0; + len = min_t(size_t, PAGE_SIZE - offset, left); + if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len)) + return -EINVAL; + offset = 0; } - bv[0].bv_offset += offset; - bv[0].bv_len -= offset; - bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; - iov_iter_advance(iter, size); return 0; } -- cgit From 9ff01255a01c3d6ffc8670b358b3ac567d5646fc Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 21 Aug 2018 05:21:15 +0800 Subject: Blk-throttle: update to use rbtree with leftmost node cached As rbtree has native support of caching leftmost node, i.e. rb_root_cached, no need to do the caching by ourselves. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-throttle.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 01d0620a4e4a..db1a3a2ae006 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -84,8 +84,7 @@ struct throtl_service_queue { * RB tree of active children throtl_grp's, which are sorted by * their ->disptime. */ - struct rb_root pending_tree; /* RB tree of active tgs */ - struct rb_node *first_pending; /* first node in the tree */ + struct rb_root_cached pending_tree; /* RB tree of active tgs */ unsigned int nr_pending; /* # queued in the tree */ unsigned long first_pending_disptime; /* disptime of the first tg */ struct timer_list pending_timer; /* fires on first_pending_disptime */ @@ -475,7 +474,7 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq) { INIT_LIST_HEAD(&sq->queued[0]); INIT_LIST_HEAD(&sq->queued[1]); - sq->pending_tree = RB_ROOT; + sq->pending_tree = RB_ROOT_CACHED; timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); } @@ -616,31 +615,23 @@ static void throtl_pd_free(struct blkg_policy_data *pd) static struct throtl_grp * throtl_rb_first(struct throtl_service_queue *parent_sq) { + struct rb_node *n; /* Service tree is empty */ if (!parent_sq->nr_pending) return NULL; - if (!parent_sq->first_pending) - parent_sq->first_pending = rb_first(&parent_sq->pending_tree); - - if (parent_sq->first_pending) - return rb_entry_tg(parent_sq->first_pending); - - return NULL; -} - -static void rb_erase_init(struct rb_node *n, struct rb_root *root) -{ - rb_erase(n, root); - RB_CLEAR_NODE(n); + n = rb_first_cached(&parent_sq->pending_tree); + WARN_ON_ONCE(!n); + if (!n) + return NULL; + return rb_entry_tg(n); } static void throtl_rb_erase(struct rb_node *n, struct throtl_service_queue *parent_sq) { - if (parent_sq->first_pending == n) - parent_sq->first_pending = NULL; - rb_erase_init(n, &parent_sq->pending_tree); + rb_erase_cached(n, &parent_sq->pending_tree); + RB_CLEAR_NODE(n); --parent_sq->nr_pending; } @@ -658,11 +649,11 @@ static void update_min_dispatch_time(struct throtl_service_queue *parent_sq) static void tg_service_queue_add(struct throtl_grp *tg) { struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq; - struct rb_node **node = &parent_sq->pending_tree.rb_node; + struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node; struct rb_node *parent = NULL; struct throtl_grp *__tg; unsigned long key = tg->disptime; - int left = 1; + bool leftmost = true; while (*node != NULL) { parent = *node; @@ -672,15 +663,13 @@ static void tg_service_queue_add(struct throtl_grp *tg) node = &parent->rb_left; else { node = &parent->rb_right; - left = 0; + leftmost = false; } } - if (left) - parent_sq->first_pending = &tg->rb_node; - rb_link_node(&tg->rb_node, parent, node); - rb_insert_color(&tg->rb_node, &parent_sq->pending_tree); + rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree, + leftmost); } static void __throtl_enqueue_tg(struct throtl_grp *tg) -- cgit From 27e6fa996c534c32702aa4d32db0ffa383acd050 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:26 -0400 Subject: blkcg: fix ref count issue with bio_blkcg using task_css The accessor function bio_blkcg either returns the blkcg associated with the bio or finds one in the current context. This can cause an issue when trying to associate a bio with a blkcg. Particularly, it's the third case that is problematic: return css_to_blkcg(task_css(current, io_cgrp_id)); As the above may race against task migration and the cgroup exiting, it is not always ok to take a reference on the blkcg returned from bio_blkcg. This patch adds association ahead of calling bio_blkcg rather than after. This makes association a required and explicit step along the code paths for calling bio_blkcg. blk_get_rl is modified as well to get a reference to the blkcg it may use and blk_put_rl will always put the reference back. Association is also moved above the bio_blkcg call to ensure it will not return NULL in blk-iolatency. BFQ and CFQ utilize this flaw, but due to the complexity, I do not want to address this in this series. I've created a private version of the function with notes not to use it describing the flaw. Hopefully soon, that code can be cleaned up. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 +- block/bfq-iosched.c | 2 +- block/bio.c | 10 ++++- block/blk-iolatency.c | 2 +- block/cfq-iosched.c | 4 +- include/linux/blk-cgroup.h | 101 +++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 107 insertions(+), 16 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 9fe5952d117d..d9a7916ff0ab 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger @@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) goto out; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index c0b1db3afb81..1a1b80dfd69d 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4359,7 +4359,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, rcu_read_lock(); - bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); + bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); if (!bfqg) { bfqq = &bfqd->oom_bfqq; goto out; diff --git a/block/bio.c b/block/bio.c index 81c05ee51d6c..083f1c9cde0a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1988,13 +1988,19 @@ int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) * * This function takes an extra reference of @blkcg_css which will be put * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. + * synchronizing calls to this function. If @blkcg_css is NULL, a call to + * blkcg_get_css finds the current css from the kthread or task. */ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { if (unlikely(bio->bi_css)) return -EBUSY; - css_get(blkcg_css); + + if (blkcg_css) + css_get(blkcg_css); + else + blkcg_css = blkcg_get_css(); + bio->bi_css = blkcg_css; return 0; } diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index a6f21527e6c7..82450c37f2aa 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -401,8 +401,8 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, return; rcu_read_lock(); + bio_associate_blkcg(bio, NULL); blkcg = bio_blkcg(bio); - bio_associate_blkcg(bio, &blkcg->css); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { if (!lock) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 2eb87444b157..d219e9a1af65 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3753,7 +3753,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = bio_blkcg(bio)->css.serial_nr; + serial_nr = __bio_blkcg(bio)->css.serial_nr; rcu_read_unlock(); /* @@ -3818,7 +3818,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct cfq_group *cfqg; rcu_read_lock(); - cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio)); + cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); if (!cfqg) { cfqq = &cfqd->oom_cfqq; goto out; diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..24067a1f8b36 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -230,22 +230,100 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + +/** + * blkcg_get_css - find and get a reference to the css + * + * Find the css associated with either the kthread or the current task. + * This takes a reference on the blkcg which will need to be managed by the + * caller. + */ +static inline struct cgroup_subsys_state *blkcg_get_css(void) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + + css = kthread_blkcg(); + if (css) { + css_get(css); + } else { + /* + * This is a bit complicated. It is possible task_css is seeing + * an old css pointer here. This is caused by the current + * thread migrating away from this cgroup and this cgroup dying. + * css_tryget() will fail when trying to take a ref on a cgroup + * that's ref count has hit 0. + * + * Therefore, if it does fail, this means current must have + * been swapped away already and this is waiting for it to + * propagate on the polling cpu. Hence the use of cpu_relax(). + */ + while (true) { + css = task_css(current, io_cgrp_id); + if (likely(css_tryget(css))) + break; + cpu_relax(); + } + } + + rcu_read_unlock(); + + return css; +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal version of bio_blkcg for bfq and cfq + * + * DO NOT USE. + * There is a flaw using this version of the function. In particular, this was + * used in a broken paradigm where association was called on the given css. It + * is possible though that the returned css from task_css() is in the process + * of dying due to migration of the current task. So it is improper to assume + * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will + * take additional work to handle more gracefully. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + return css_to_blkcg(blkcg_css()); +} +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, NULL if not associated. + * Callers are expected to either handle NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ if (bio && bio->bi_css) return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); + return NULL; } static inline bool blk_cgroup_congested(void) @@ -534,6 +612,10 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); blkcg = bio_blkcg(bio); + if (blkcg) + css_get(&blkcg->css); + else + blkcg = css_to_blkcg(blkcg_get_css()); /* bypass blkg lookup and use @q->root_rl directly for root */ if (blkcg == &blkcg_root) @@ -565,6 +647,8 @@ root_rl: */ static inline void blk_put_rl(struct request_list *rl) { + /* an additional ref is always taken for rl */ + css_put(&rl->blkg->blkcg->css); if (rl->blkg->blkcg != &blkcg_root) blkg_put(rl->blkg); } @@ -805,10 +889,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); + bio_associate_blkcg(bio, NULL); + blkcg = bio_blkcg(bio); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { @@ -930,6 +1014,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, -- cgit From 49f4c2dc2b5066e9211101c59cc0828e81d41614 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:27 -0400 Subject: blkcg: update blkg_lookup_create to do locking To know when to create a blkg, the general pattern is to do a blkg_lookup and if that fails, lock and then do a lookup again and if that fails finally create. It doesn't make much sense for everyone who wants to do creation to write this themselves. This changes blkg_lookup_create to do locking and implement this pattern. The old blkg_lookup_create is renamed to __blkg_lookup_create. If a call site wants to do its own error handling or already owns the queue lock, they can use __blkg_lookup_create. This will be used in upcoming patches. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Reviewed-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 31 ++++++++++++++++++++++++++++--- block/blk-iolatency.c | 2 +- include/linux/blk-cgroup.h | 4 +++- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c19f9078da1e..cd0d97bed83d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -259,7 +259,7 @@ err_free_blkg: } /** - * blkg_lookup_create - lookup blkg, try to create one if not there + * __blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * @@ -272,8 +272,8 @@ err_free_blkg: * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not * dead and bypassing, returns ERR_PTR(-EBUSY). */ -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) { struct blkcg_gq *blkg; @@ -310,6 +310,31 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, } } +/** + * blkg_lookup_create - find or create a blkg + * @blkcg: target block cgroup + * @q: target request_queue + * + * This looks up or creates the blkg representing the unique pair + * of the blkcg and the request_queue. + */ +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) +{ + struct blkcg_gq *blkg = blkg_lookup(blkcg, q); + unsigned long flags; + + if (unlikely(!blkg)) { + spin_lock_irqsave(q->queue_lock, flags); + + blkg = __blkg_lookup_create(blkcg, q); + + spin_unlock_irqrestore(q->queue_lock, flags); + } + + return blkg; +} + static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 82450c37f2aa..ffde3ab9f84c 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -407,7 +407,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, if (unlikely(!blkg)) { if (!lock) spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); + blkg = __blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; if (!lock) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 24067a1f8b36..cc0f238530f6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -184,6 +184,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -897,7 +899,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); + blkg = __blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; spin_unlock_irq(q->queue_lock); -- cgit From 07b05bcc3213ac9f8c28c9d835b4bf3d5798cc60 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:28 -0400 Subject: blkcg: convert blkg_lookup_create to find closest blkg There are several scenarios where blkg_lookup_create can fail. Examples include the blkcg dying, request_queue is dying, or simply being OOM. At the end of the day, most handle this by simply falling back to the q->root_blkg and calling it a day. This patch implements the notion of closest blkg. During blkg_lookup_create, if it fails to create, return the closest blkg found or the q->root_blkg. blkg_try_get_closest is introduced and used during association so a bio is always attached to a blkg. Acked-by: Tejun Heo Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- block/bio.c | 17 ++++++++++------- block/blk-cgroup.c | 25 +++++++++++++++++-------- include/linux/blk-cgroup.h | 14 ++++++++++++++ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/block/bio.c b/block/bio.c index 083f1c9cde0a..bfd41e8b53a8 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2007,21 +2007,24 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_associate_blkg - associate a bio with the specified blkg + * bio_associate_blkg - associate a bio with the a blkg * @bio: target bio * @blkg: the blkg to associate * - * Associate @bio with the blkg specified by @blkg. This is the queue specific - * blkcg information associated with the @bio, a reference will be taken on the - * @blkg and will be freed when the bio is freed. + * This tries to associate @bio with the specified blkg. Association failure + * is handled by walking up the blkg tree. Therefore, the blkg associated can + * be anything between @blkg and the root_blkg. This situation only happens + * when a cgroup is dying and then the remaining bios will spill to the closest + * alive blkg. + * + * A reference will be taken on the @blkg and will be released when @bio is + * freed. */ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { if (unlikely(bio->bi_blkg)) return -EBUSY; - if (!blkg_try_get(blkg)) - return -ENODEV; - bio->bi_blkg = blkg; + bio->bi_blkg = blkg_try_get_closest(blkg); return 0; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index cd0d97bed83d..e9e3a955f61a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -268,9 +268,8 @@ err_free_blkg: * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. * - * Returns pointer to the looked up or created blkg on success, ERR_PTR() - * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not - * dead and bypassing, returns ERR_PTR(-EBUSY). + * Returns the blkg or the closest blkg if blkg_create fails as it walks + * down from root. */ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) @@ -285,7 +284,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); + return q->root_blkg; blkg = __blkg_lookup(blkcg, q, true); if (blkg) @@ -293,19 +292,29 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, /* * Create blkgs walking down from blkcg_root to @blkcg, so that all - * non-root blkgs have access to their parents. + * non-root blkgs have access to their parents. Returns the closest + * blkg to the intended blkg should blkg_create() fail. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - - while (parent && !__blkg_lookup(parent, q, false)) { + struct blkcg_gq *ret_blkg = q->root_blkg; + + while (parent) { + blkg = __blkg_lookup(parent, q, false); + if (blkg) { + /* remember closest blkg */ + ret_blkg = blkg; + break; + } pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); - if (pos == blkcg || IS_ERR(blkg)) + if (IS_ERR(blkg)) + return ret_blkg; + if (pos == blkcg) return blkg; } } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index cc0f238530f6..1fbff1bbb651 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -549,6 +549,20 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) return NULL; } +/** + * blkg_try_get_closest - try and get a blkg ref on the closet blkg + * @blkg: blkg to get + * + * This walks up the blkg tree to find the closest non-dying blkg and returns + * the blkg that it did association with as it may not be the passed in blkg. + */ +static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) +{ + while (!atomic_inc_not_zero(&blkg->refcnt)) + blkg = blkg->parent; + + return blkg; +} void __blkg_release_rcu(struct rcu_head *rcu); -- cgit From a7b39b4e961c4e2b3ed837803a7441a65c90ce33 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:29 -0400 Subject: blkcg: always associate a bio with a blkg Previously, blkg's were only assigned as needed by blk-iolatency and blk-throttle. bio->css was also always being associated while blkg was being looked up and then thrown away in blkcg_bio_issue_check. This patch begins the cleanup of bio->css and bio->bi_blkg by always associating a blkg in blkcg_bio_issue_check. This tries to create the blkg, but if it is not possible, falls back to using the root_blkg of the request_queue. Therefore, a bio will always be associated with a blkg. The duplicate association logic is removed from blk-throttle and blk-iolatency. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 38 ++++++++++++++++++++++++++++++++++++++ block/blk-iolatency.c | 24 ++---------------------- block/blk-throttle.c | 5 +---- include/linux/bio.h | 3 +++ include/linux/blk-cgroup.h | 16 ++-------------- 5 files changed, 46 insertions(+), 40 deletions(-) diff --git a/block/bio.c b/block/bio.c index bfd41e8b53a8..748d7132f172 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2028,6 +2028,41 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) return 0; } +/** + * bio_associate_create_blkg - associate a bio with a blkg from q + * @q: request_queue where bio is going + * @bio: target bio + * + * Associate @bio with the blkg found from the bio's css and the request_queue. + * If one is not found, bio_lookup_blkg creates the blkg. + */ +int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) +{ + struct blkcg *blkcg; + struct blkcg_gq *blkg; + int ret = 0; + + /* someone has already associated this bio with a blkg */ + if (bio->bi_blkg) + return ret; + + rcu_read_lock(); + + bio_associate_blkcg(bio, NULL); + blkcg = bio_blkcg(bio); + + if (!blkcg->css.parent) { + ret = bio_associate_blkg(bio, q->root_blkg); + } else { + blkg = blkg_lookup_create(blkcg, q); + + ret = bio_associate_blkg(bio, blkg); + } + + rcu_read_unlock(); + return ret; +} + /** * bio_disassociate_task - undo bio_associate_current() * @bio: target bio @@ -2057,6 +2092,9 @@ void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { if (src->bi_css) WARN_ON(bio_associate_blkcg(dst, src->bi_css)); + + if (src->bi_blkg) + bio_associate_blkg(dst, src->bi_blkg); } EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); #endif /* CONFIG_BLK_CGROUP */ diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ffde3ab9f84c..7337fbc7f850 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -392,34 +392,14 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg *blkcg; - struct blkcg_gq *blkg; - struct request_queue *q = rqos->q; + struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; - rcu_read_lock(); - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - if (!lock) - spin_lock_irq(q->queue_lock); - blkg = __blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - if (!lock) - spin_unlock_irq(q->queue_lock); - } - if (!blkg) - goto out; - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - bio_associate_blkg(bio, blkg); -out: - rcu_read_unlock(); + while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index db1a3a2ae006..e62ae502891b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2118,9 +2118,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW - /* fallback to root_blkg if we fail to get a blkg ref */ - if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) - bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); bio_issue_init(&bio->bi_issue, bio_sectors(bio)); #endif } @@ -2129,7 +2126,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { struct throtl_qnode *qn = NULL; - struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; diff --git a/include/linux/bio.h b/include/linux/bio.h index 14b4fa266357..829cd0bb407d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -542,11 +542,14 @@ static inline int bio_associate_blkcg_from_page(struct bio *bio, #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); +int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } +static inline int bio_associate_create_blkg(struct request_queue *q, + struct bio *bio) { return 0; } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1fbff1bbb651..6e33ad1d92b4 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -900,29 +900,17 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; rcu_read_lock(); - /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); - - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); - blkg = __blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(q->queue_lock); - } + bio_associate_create_blkg(q, bio); + blkg = bio->bi_blkg; throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { - blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the -- cgit From 5bf9a1f3b4efef7e463105dde8bba4d2397909c2 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:30 -0400 Subject: blkcg: consolidate bio_issue_init to be a part of core bio_issue_init among other things initializes the timestamp for an IO. Rather than have this logic handled by policies, this consolidates it to be on the init paths (normal, clone, bounce clone). Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Liu Bo Signed-off-by: Jens Axboe --- block/bio.c | 2 ++ block/blk-iolatency.c | 2 -- block/blk-throttle.c | 8 -------- block/bounce.c | 2 ++ include/linux/blk-cgroup.h | 9 +++++++++ 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/block/bio.c b/block/bio.c index 748d7132f172..80c948da061c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -610,6 +610,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_io_vec = bio_src->bi_io_vec; bio_clone_blkcg_association(bio, bio_src); + + blkcg_bio_issue_init(bio); } EXPORT_SYMBOL(__bio_clone_fast); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 7337fbc7f850..2d848b2f8b87 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -398,8 +398,6 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, if (!blk_iolatency_enabled(blkiolat)) return; - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e62ae502891b..4bda70e8db48 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2115,13 +2115,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif -static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) -{ -#ifdef CONFIG_BLK_DEV_THROTTLING_LOW - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); -#endif -} - bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { @@ -2145,7 +2138,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, if (unlikely(blk_queue_bypass(q))) goto out_unlock; - blk_throtl_assoc_bio(tg, bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/block/bounce.c b/block/bounce.c index bc63b3a2d18c..7a08703b1204 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -259,6 +259,8 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, bio_clone_blkcg_association(bio, bio_src); + blkcg_bio_issue_init(bio); + return bio; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6e33ad1d92b4..a6b6e741a75e 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -897,6 +897,12 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { @@ -922,6 +928,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } + blkcg_bio_issue_init(bio); + rcu_read_unlock(); return !throtl; } @@ -1034,6 +1042,7 @@ static inline void blk_put_rl(struct request_list *rl) { } static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } +static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } -- cgit From 74b7c02a9bc124ee3df0d77880ee26db0a325516 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:31 -0400 Subject: blkcg: associate a blkg for pages being evicted by swap A prior patch in this series added blkg association to bios issued by cgroups. There are two other paths that we want to attribute work back to the appropriate cgroup: swap and writeback. Here we modify the way swap tags bios to include the blkg. Writeback will be tackle in the next patch. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 83 +++++++++++++++++++++++++++++++++++++---------------- include/linux/bio.h | 11 +++++-- mm/page_io.c | 2 +- 3 files changed, 68 insertions(+), 28 deletions(-) diff --git a/block/bio.c b/block/bio.c index 80c948da061c..387480de6992 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1956,30 +1956,6 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -#ifdef CONFIG_MEMCG -/** - * bio_associate_blkcg_from_page - associate a bio with the page's blkcg - * @bio: target bio - * @page: the page to lookup the blkcg from - * - * Associate @bio with the blkcg from @page's owning memcg. This works like - * every other associate function wrt references. - */ -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) -{ - struct cgroup_subsys_state *blkcg_css; - - if (unlikely(bio->bi_css)) - return -EBUSY; - if (!page->mem_cgroup) - return 0; - blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, - &io_cgrp_subsys); - bio->bi_css = blkcg_css; - return 0; -} -#endif /* CONFIG_MEMCG */ - /** * bio_associate_blkcg - associate a bio with the specified blkcg * @bio: target bio @@ -2030,6 +2006,65 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) return 0; } +static int __bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + struct blkcg_gq *blkg; + + rcu_read_lock(); + + blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue); + + rcu_read_unlock(); + + return bio_associate_blkg(bio, blkg); +} + +/** + * bio_associate_blkg_from_css - associate a bio with a specified css + * @bio: target bio + * @css: target css + * + * Associate @bio with the blkg found by combining the css's blkg and the + * request_queue of the @bio. This takes a reference on the css that will + * be put upon freeing of @bio. + */ +int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ + css_get(css); + bio->bi_css = css; + return __bio_associate_blkg_from_css(bio, css); +} +EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); + +#ifdef CONFIG_MEMCG +/** + * bio_associate_blkg_from_page - associate a bio with the page's blkg + * @bio: target bio + * @page: the page to lookup the blkcg from + * + * Associate @bio with the blkg from @page's owning memcg and the respective + * request_queue. This works like every other associate function wrt + * references. + * + * Note: this must be called after bio has an associated device. + */ +int bio_associate_blkg_from_page(struct bio *bio, struct page *page) +{ + struct cgroup_subsys_state *css; + + if (unlikely(bio->bi_css)) + return -EBUSY; + if (!page->mem_cgroup) + return 0; + css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + bio->bi_css = css; + + return __bio_associate_blkg_from_css(bio, css); +} +#endif /* CONFIG_MEMCG */ + /** * bio_associate_create_blkg - associate a bio with a blkg from q * @q: request_queue where bio is going diff --git a/include/linux/bio.h b/include/linux/bio.h index 829cd0bb407d..c73a870ebc0e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -533,21 +533,26 @@ do { \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +int bio_associate_blkg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkcg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline int bio_associate_blkg_from_page(struct bio *bio, + struct page *page) { return 0; } #endif #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); +int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } +static inline int bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ return 0; } static inline int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) { return 0; } static inline void bio_disassociate_task(struct bio *bio) { } diff --git a/mm/page_io.c b/mm/page_io.c index aafd19ec1db4..573d3663d846 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, goto out; } bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); - bio_associate_blkcg_from_page(bio, page); + bio_associate_blkg_from_page(bio, page); count_swpout_vm_event(page); set_page_writeback(page); unlock_page(page); -- cgit From bdc2491708c47601603918a9a20acddef6e1d814 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:32 -0400 Subject: blkcg: associate writeback bios with a blkg One of the goals of this series is to remove a separate reference to the css of the bio. This can and should be accessed via bio_blkcg. In this patch, the wbc_init_bio call is changed such that it must be called after a queue has been associated with the bio. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- Documentation/admin-guide/cgroup-v2.rst | 8 +++++--- fs/buffer.c | 10 +++++----- fs/ext4/page-io.c | 2 +- include/linux/writeback.h | 5 +++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 184193bcb262..caf36105a1c7 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1857,8 +1857,10 @@ following two functions. wbc_init_bio(@wbc, @bio) Should be called for each bio carrying writeback data and - associates the bio with the inode's owner cgroup. Can be - called anytime between bio allocation and submission. + associates the bio with the inode's owner cgroup and the + corresponding request queue. This must be called after + a queue (device) has been associated with the bio and + before submission. wbc_account_io(@wbc, @page, @bytes) Should be called for each data segment being written out. @@ -1877,7 +1879,7 @@ the configuration, the bio may be executed at a lower priority and if the writeback session is holding shared resources, e.g. a journal entry, may lead to priority inversion. There is no one easy solution for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_blkcg() +cases by skipping wbc_init_bio() or using bio_associate_create_blkg() directly. diff --git a/fs/buffer.c b/fs/buffer.c index 6f1ae3ac9789..109f55196866 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3060,11 +3060,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); - if (wbc) { - wbc_init_bio(wbc, bio); - wbc_account_io(wbc, bh->b_page, bh->b_size); - } - bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_write_hint = write_hint; @@ -3084,6 +3079,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, op_flags |= REQ_PRIO; bio_set_op_attrs(bio, op, op_flags); + if (wbc) { + wbc_init_bio(wbc, bio); + wbc_account_io(wbc, bh->b_page, bh->b_size); + } + submit_bio(bio); return 0; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index db7590178dfc..2aa62d58d8dd 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io, bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; - wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; io->io_next_block = bh->b_blocknr; + wbc_init_bio(io->io_wbc, bio); return 0; } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fdfd04e348f6..738a0c24874f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. + * writeback context. Must be called after the bio has been associated with + * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkcg(bio, wbc->wb->blkcg_css); + bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ -- cgit From c839e7a03f92bafd71fd145b470dcdc7f43f2d4c Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:33 -0400 Subject: blkcg: remove bio->bi_css and instead use bio->bi_blkg Prior patches ensured that all bios are now associated with some blkg. This now makes bio->bi_css unnecessary as blkg maintains a reference to the blkcg already. This patch removes the field bi_css and transfers corresponding uses to access via bi_blkg. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 56 +++++++++------------------------------------- block/bounce.c | 2 +- drivers/block/loop.c | 5 +++-- drivers/md/raid0.c | 2 +- include/linux/bio.h | 9 +++----- include/linux/blk-cgroup.h | 8 +++---- include/linux/blk_types.h | 1 - kernel/trace/blktrace.c | 4 ++-- 8 files changed, 25 insertions(+), 62 deletions(-) diff --git a/block/bio.c b/block/bio.c index 387480de6992..71cfe3720ea7 100644 --- a/block/bio.c +++ b/block/bio.c @@ -609,7 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); blkcg_bio_issue_init(bio); } @@ -1956,34 +1956,6 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -/** - * bio_associate_blkcg - associate a bio with the specified blkcg - * @bio: target bio - * @blkcg_css: css of the blkcg to associate - * - * Associate @bio with the blkcg specified by @blkcg_css. Block layer will - * treat @bio as if it were issued by a task which belongs to the blkcg. - * - * This function takes an extra reference of @blkcg_css which will be put - * when @bio is released. The caller must own @bio and is responsible for - * synchronizing calls to this function. If @blkcg_css is NULL, a call to - * blkcg_get_css finds the current css from the kthread or task. - */ -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) -{ - if (unlikely(bio->bi_css)) - return -EBUSY; - - if (blkcg_css) - css_get(blkcg_css); - else - blkcg_css = blkcg_get_css(); - - bio->bi_css = blkcg_css; - return 0; -} -EXPORT_SYMBOL_GPL(bio_associate_blkcg); - /** * bio_associate_blkg - associate a bio with the a blkg * @bio: target bio @@ -2033,7 +2005,6 @@ int bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { css_get(css); - bio->bi_css = css; return __bio_associate_blkg_from_css(bio, css); } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); @@ -2054,12 +2025,11 @@ int bio_associate_blkg_from_page(struct bio *bio, struct page *page) { struct cgroup_subsys_state *css; - if (unlikely(bio->bi_css)) + if (unlikely(bio->bi_blkg)) return -EBUSY; if (!page->mem_cgroup) return 0; css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); - bio->bi_css = css; return __bio_associate_blkg_from_css(bio, css); } @@ -2085,8 +2055,7 @@ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) rcu_read_lock(); - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); + blkcg = css_to_blkcg(blkcg_get_css()); if (!blkcg->css.parent) { ret = bio_associate_blkg(bio, q->root_blkg); @@ -2110,30 +2079,27 @@ void bio_disassociate_task(struct bio *bio) put_io_context(bio->bi_ioc); bio->bi_ioc = NULL; } - if (bio->bi_css) { - css_put(bio->bi_css); - bio->bi_css = NULL; - } if (bio->bi_blkg) { + /* a ref is always taken on css */ + css_put(&bio_blkcg(bio)->css); blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; } } /** - * bio_clone_blkcg_association - clone blkcg association from src to dst bio + * bio_clone_blkg_association - clone blkg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkcg_association(struct bio *dst, struct bio *src) +void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_css) - WARN_ON(bio_associate_blkcg(dst, src->bi_css)); - - if (src->bi_blkg) + if (src->bi_blkg) { + css_get(&bio_blkcg(src)->css); bio_associate_blkg(dst, src->bi_blkg); + } } -EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) diff --git a/block/bounce.c b/block/bounce.c index 7a08703b1204..b30071ac4ec6 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -257,7 +257,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } } - bio_clone_blkcg_association(bio, bio_src); + bio_clone_blkg_association(bio, bio_src); blkcg_bio_issue_init(bio); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ea9debf59b22..abad6d15f956 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -77,6 +77,7 @@ #include #include #include +#include #include "loop.h" @@ -1760,8 +1761,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_css) { - cmd->css = rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { + cmd->css = &bio_blkcg(rq->bio)->css; css_get(cmd->css); } else #endif diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ac1cffd2a09b..f3fb5bb8c82a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) !discard_bio) continue; bio_chain(discard_bio, bio); - bio_clone_blkcg_association(discard_bio, bio); + bio_clone_blkg_association(discard_bio, bio); if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rdev->bdev), discard_bio, disk_devt(mddev->gendisk), diff --git a/include/linux/bio.h b/include/linux/bio.h index c73a870ebc0e..e973876625a8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -540,24 +540,21 @@ static inline int bio_associate_blkg_from_page(struct bio *bio, #endif #ifdef CONFIG_BLK_CGROUP -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); int bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); -void bio_clone_blkcg_association(struct bio *dst, struct bio *src); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkcg(struct bio *bio, - struct cgroup_subsys_state *blkcg_css) { return 0; } static inline int bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { return 0; } static inline int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) { return 0; } static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkcg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a6b6e741a75e..c41cfcc2b4d8 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -308,8 +308,8 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) */ static inline struct blkcg *__bio_blkcg(struct bio *bio) { - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; return css_to_blkcg(blkcg_css()); } @@ -323,8 +323,8 @@ static inline struct blkcg *__bio_blkcg(struct bio *bio) */ static inline struct blkcg *bio_blkcg(struct bio *bio) { - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; return NULL; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f6dfb30737d8..9578c7ab1eb6 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -178,7 +178,6 @@ struct bio { * release. Read comment on top of bio_associate_current(). */ struct io_context *bi_ioc; - struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2868d85f1fb1..fac0ddf8a8e2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return NULL; - if (!bio->bi_css) + if (!bio->bi_blkg) return NULL; - return cgroup_get_kernfs_id(bio->bi_css->cgroup); + return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); } #else static union kernfs_node_id * -- cgit From f0fcb3ec89f37167810e660b0595d9a6155d9807 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:34 -0400 Subject: blkcg: remove additional reference to the css The previous patch in this series removed carrying around a pointer to the css in blkg. However, the blkg association logic still relied on taking a reference on the css to ensure we wouldn't fail in getting a reference for the blkg. Here the implicit dependency on the css is removed. The association continues to rely on the tryget logic walking up the blkg tree. This streamlines the three ways that association can happen: normal, swap, and writeback. Acked-by: Tejun Heo Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- block/bio.c | 62 +++++++++++++++++++++++++++------------------- include/linux/blk-cgroup.h | 52 +++----------------------------------- include/linux/cgroup.h | 2 ++ kernel/cgroup/cgroup.c | 48 ++++++++++++++++++++++++++++------- 4 files changed, 81 insertions(+), 83 deletions(-) diff --git a/block/bio.c b/block/bio.c index 71cfe3720ea7..c39251e69447 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1978,18 +1978,30 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) return 0; } +/** + * __bio_associate_blkg_from_css - internal blkg association function + * + * This in the core association function that all association paths rely on. + * A blkg reference is taken which is released upon freeing of the bio. + */ static int __bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { + struct request_queue *q = bio->bi_disk->queue; struct blkcg_gq *blkg; + int ret; rcu_read_lock(); - blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue); + if (!css || !css->parent) + blkg = q->root_blkg; + else + blkg = blkg_lookup_create(css_to_blkcg(css), q); - rcu_read_unlock(); + ret = bio_associate_blkg(bio, blkg); - return bio_associate_blkg(bio, blkg); + rcu_read_unlock(); + return ret; } /** @@ -1998,13 +2010,14 @@ static int __bio_associate_blkg_from_css(struct bio *bio, * @css: target css * * Associate @bio with the blkg found by combining the css's blkg and the - * request_queue of the @bio. This takes a reference on the css that will - * be put upon freeing of @bio. + * request_queue of the @bio. This falls back to the queue's root_blkg if + * the association fails with the css. */ int bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { - css_get(css); + if (unlikely(bio->bi_blkg)) + return -EBUSY; return __bio_associate_blkg_from_css(bio, css); } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); @@ -2016,22 +2029,29 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); * @page: the page to lookup the blkcg from * * Associate @bio with the blkg from @page's owning memcg and the respective - * request_queue. This works like every other associate function wrt - * references. + * request_queue. If cgroup_e_css returns NULL, fall back to the queue's + * root_blkg. * * Note: this must be called after bio has an associated device. */ int bio_associate_blkg_from_page(struct bio *bio, struct page *page) { struct cgroup_subsys_state *css; + int ret; if (unlikely(bio->bi_blkg)) return -EBUSY; if (!page->mem_cgroup) return 0; - css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); - return __bio_associate_blkg_from_css(bio, css); + rcu_read_lock(); + + css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + + ret = __bio_associate_blkg_from_css(bio, css); + + rcu_read_unlock(); + return ret; } #endif /* CONFIG_MEMCG */ @@ -2041,12 +2061,12 @@ int bio_associate_blkg_from_page(struct bio *bio, struct page *page) * @bio: target bio * * Associate @bio with the blkg found from the bio's css and the request_queue. - * If one is not found, bio_lookup_blkg creates the blkg. + * If one is not found, bio_lookup_blkg creates the blkg. This falls back to + * the queue's root_blkg if association fails. */ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; - struct blkcg_gq *blkg; + struct cgroup_subsys_state *css; int ret = 0; /* someone has already associated this bio with a blkg */ @@ -2055,15 +2075,9 @@ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) rcu_read_lock(); - blkcg = css_to_blkcg(blkcg_get_css()); + css = blkcg_css(); - if (!blkcg->css.parent) { - ret = bio_associate_blkg(bio, q->root_blkg); - } else { - blkg = blkg_lookup_create(blkcg, q); - - ret = bio_associate_blkg(bio, blkg); - } + ret = __bio_associate_blkg_from_css(bio, css); rcu_read_unlock(); return ret; @@ -2080,8 +2094,6 @@ void bio_disassociate_task(struct bio *bio) bio->bi_ioc = NULL; } if (bio->bi_blkg) { - /* a ref is always taken on css */ - css_put(&bio_blkcg(bio)->css); blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; } @@ -2094,10 +2106,8 @@ void bio_disassociate_task(struct bio *bio) */ void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) { - css_get(&bio_blkcg(src)->css); + if (src->bi_blkg) bio_associate_blkg(dst, src->bi_blkg); - } } EXPORT_SYMBOL_GPL(bio_clone_blkg_association); #endif /* CONFIG_BLK_CGROUP */ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c41cfcc2b4d8..2951ea3541b1 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -249,47 +249,6 @@ static inline struct cgroup_subsys_state *blkcg_css(void) return task_css(current, io_cgrp_id); } -/** - * blkcg_get_css - find and get a reference to the css - * - * Find the css associated with either the kthread or the current task. - * This takes a reference on the blkcg which will need to be managed by the - * caller. - */ -static inline struct cgroup_subsys_state *blkcg_get_css(void) -{ - struct cgroup_subsys_state *css; - - rcu_read_lock(); - - css = kthread_blkcg(); - if (css) { - css_get(css); - } else { - /* - * This is a bit complicated. It is possible task_css is seeing - * an old css pointer here. This is caused by the current - * thread migrating away from this cgroup and this cgroup dying. - * css_tryget() will fail when trying to take a ref on a cgroup - * that's ref count has hit 0. - * - * Therefore, if it does fail, this means current must have - * been swapped away already and this is waiting for it to - * propagate on the polling cpu. Hence the use of cpu_relax(). - */ - while (true) { - css = task_css(current, io_cgrp_id); - if (likely(css_tryget(css))) - break; - cpu_relax(); - } - } - - rcu_read_unlock(); - - return css; -} - static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; @@ -628,10 +587,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); blkcg = bio_blkcg(bio); - if (blkcg) - css_get(&blkcg->css); - else - blkcg = css_to_blkcg(blkcg_get_css()); + if (!blkcg) + blkcg = css_to_blkcg(blkcg_css()); /* bypass blkg lookup and use @q->root_rl directly for root */ if (blkcg == &blkcg_root) @@ -646,7 +603,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, if (unlikely(!blkg)) goto root_rl; - blkg_get(blkg); + if (!blkg_try_get(blkg)) + goto root_rl; rcu_read_unlock(); return &blkg->rl; root_rl: @@ -663,8 +621,6 @@ root_rl: */ static inline void blk_put_rl(struct request_list *rl) { - /* an additional ref is always taken for rl */ - css_put(&rl->blkg->blkcg->css); if (rl->blkg->blkcg != &blkcg_root) blkg_put(rl->blkg); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 32c553556bbd..b8bcbdeb2eac 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,6 +93,8 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index aae10baf1902..48fb22e49467 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, } /** - * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * @@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. */ -static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, - struct cgroup_subsys *ss) +static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, + struct cgroup_subsys *ss) { lockdep_assert_held(&cgroup_mutex); @@ -522,6 +522,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, return cgroup_css(cgrp, ss); } +/** + * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest + * + * Find and get the effective css of @cgrp for @ss. The effective css is + * defined as the matching css of the nearest ancestor including self which + * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, + * the root css is returned, so this function always returns a valid css. + * + * The returned css is not guaranteed to be online, and therefore it is the + * callers responsiblity to tryget a reference for it. + */ +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + struct cgroup_subsys_state *css; + + do { + css = cgroup_css(cgrp, ss); + + if (css) + return css; + cgrp = cgroup_parent(cgrp); + } while (cgrp); + + return init_css_set.subsys[ss->id]; +} + /** * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest @@ -604,10 +633,11 @@ EXPORT_SYMBOL_GPL(of_css); * * Should be called under cgroup_[tree_]mutex. */ -#define for_each_e_css(css, ssid, cgrp) \ - for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ - if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \ - ; \ +#define for_each_e_css(css, ssid, cgrp) \ + for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ + if (!((css) = cgroup_e_css_by_mask(cgrp, \ + cgroup_subsys[(ssid)]))) \ + ; \ else /** @@ -1006,7 +1036,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * @ss is in this hierarchy, so we want the * effective css from @cgrp. */ - template[i] = cgroup_e_css(cgrp, ss); + template[i] = cgroup_e_css_by_mask(cgrp, ss); } else { /* * @ss is not in this hierarchy, so we don't want @@ -3019,7 +3049,7 @@ static int cgroup_apply_control(struct cgroup *cgrp) return ret; /* - * At this point, cgroup_e_css() results reflect the new csses + * At this point, cgroup_e_css_by_mask() results reflect the new csses * making the following cgroup_update_dfl_csses() properly update * css associations of all tasks in the subtree. */ -- cgit From e2b0989954ae7c80609f77e7ce203bea6d2c54e1 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:35 -0400 Subject: blkcg: cleanup and make blk_get_rl use blkg_lookup_create blk_get_rl is responsible for identifying which request_list a request should be allocated to. Try get logic was added earlier, but semantically the logic was not changed. This patch makes better use of the bio already having a reference to the blkg in the hot path. The cold path uses a better fallback of blkg_lookup_create rather than just blkg_lookup and then falling back to the q->root_rl. If lookup_create fails with anything but -ENODEV, it falls back to q->root_rl. A clarifying comment is added to explain why q->root_rl is used rather than the root blkg's rl. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 2951ea3541b1..d2f7f1b00fcf 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -586,28 +586,36 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, rcu_read_lock(); - blkcg = bio_blkcg(bio); - if (!blkcg) - blkcg = css_to_blkcg(blkcg_css()); + if (bio && bio->bi_blkg) { + blkcg = bio->bi_blkg->blkcg; + if (blkcg == &blkcg_root) + goto rl_use_root; + + blkg_get(bio->bi_blkg); + rcu_read_unlock(); + return &bio->bi_blkg->rl; + } - /* bypass blkg lookup and use @q->root_rl directly for root */ + blkcg = css_to_blkcg(blkcg_css()); if (blkcg == &blkcg_root) - goto root_rl; + goto rl_use_root; - /* - * Try to use blkg->rl. blkg lookup may fail under memory pressure - * or if either the blkcg or queue is going away. Fall back to - * root_rl in such cases. - */ blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) - goto root_rl; + blkg = __blkg_lookup_create(blkcg, q); if (!blkg_try_get(blkg)) - goto root_rl; + goto rl_use_root; + rcu_read_unlock(); return &blkg->rl; -root_rl: + + /* + * Each blkg has its own request_list, however, the root blkcg + * uses the request_queue's root_rl. This is to avoid most + * overhead for the root blkcg. + */ +rl_use_root: rcu_read_unlock(); return &q->root_rl; } -- cgit From b3b9f24f5fcc099c41f7dc1d02350635830888e5 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:36 -0400 Subject: blkcg: change blkg reference counting to use percpu_ref Now that every bio is associated with a blkg, this puts the use of blkg_get, blkg_try_get, and blkg_put on the hot path. This switches over the refcnt in blkg to use percpu_ref. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 64 ++++++++++++++++++++++++++++------------------ include/linux/blk-cgroup.h | 15 ++++------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e9e3a955f61a..ab3676e1e15e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -84,6 +84,37 @@ static void blkg_free(struct blkcg_gq *blkg) kfree(blkg); } +static void __blkg_release(struct rcu_head *rcu) +{ + struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + + percpu_ref_exit(&blkg->refcnt); + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + wb_congested_put(blkg->wb_congested); + + blkg_free(blkg); +} + +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +static void blkg_release(struct percpu_ref *ref) +{ + struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); + + call_rcu(&blkg->rcu_head, __blkg_release); +} + /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -110,7 +141,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; - atomic_set(&blkg->refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { @@ -217,6 +247,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_get(blkg->parent); } + ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, + GFP_NOWAIT | __GFP_NOWARN); + if (ret) + goto err_cancel_ref; + /* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -249,6 +284,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_put(blkg); return ERR_PTR(ret); +err_cancel_ref: + percpu_ref_exit(&blkg->refcnt); err_put_congested: wb_congested_put(wb_congested); err_put_css: @@ -387,7 +424,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - blkg_put(blkg); + percpu_ref_kill(&blkg->refcnt); } /** @@ -414,29 +451,6 @@ static void blkg_destroy_all(struct request_queue *q) q->root_rl.blkg = NULL; } -/* - * A group is RCU protected, but having an rcu lock does not mean that one - * can access all the fields of blkg and assume these are valid. For - * example, don't try to follow throtl_data and request queue links. - * - * Having a reference to blkg under an rcu allows accesses to only values - * local to groups like group stats and group rate limits. - */ -void __blkg_release_rcu(struct rcu_head *rcu_head) -{ - struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); - - /* release the blkcg and parent blkg refs this blkg has been holding */ - css_put(&blkg->blkcg->css); - if (blkg->parent) - blkg_put(blkg->parent); - - wb_congested_put(blkg->wb_congested); - - blkg_free(blkg); -} -EXPORT_SYMBOL_GPL(__blkg_release_rcu); - /* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index d2f7f1b00fcf..7ff5d8ba8c7a 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -126,7 +126,7 @@ struct blkcg_gq { struct request_list rl; /* reference count */ - atomic_t refcnt; + struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -490,8 +490,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - atomic_inc(&blkg->refcnt); + percpu_ref_get(&blkg->refcnt); } /** @@ -503,7 +502,7 @@ static inline void blkg_get(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) { - if (atomic_inc_not_zero(&blkg->refcnt)) + if (percpu_ref_tryget(&blkg->refcnt)) return blkg; return NULL; } @@ -517,23 +516,19 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) { - while (!atomic_inc_not_zero(&blkg->refcnt)) + while (!percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; return blkg; } -void __blkg_release_rcu(struct rcu_head *rcu); - /** * blkg_put - put a blkg reference * @blkg: blkg to put */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - if (atomic_dec_and_test(&blkg->refcnt)) - call_rcu(&blkg->rcu_head, __blkg_release_rcu); + percpu_ref_put(&blkg->refcnt); } /** -- cgit From 101246ec02b54adf6a77180a01ccbe310add2c32 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Tue, 11 Sep 2018 14:41:37 -0400 Subject: blkcg: rename blkg_try_get to blkg_tryget blkg reference counting now uses percpu_ref rather than atomic_t. Let's make this consistent with css_tryget. This renames blkg_try_get to blkg_tryget and now returns a bool rather than the blkg or NULL. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bio.c | 2 +- block/blk-cgroup.c | 3 +-- block/blk-iolatency.c | 2 +- include/linux/blk-cgroup.h | 14 ++++++-------- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/block/bio.c b/block/bio.c index c39251e69447..1cd47f218200 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1974,7 +1974,7 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { if (unlikely(bio->bi_blkg)) return -EBUSY; - bio->bi_blkg = blkg_try_get_closest(blkg); + bio->bi_blkg = blkg_tryget_closest(blkg); return 0; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ab3676e1e15e..76136bea7a7f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1794,8 +1794,7 @@ void blkcg_maybe_throttle_current(void) blkg = blkg_lookup(blkcg, q); if (!blkg) goto out; - blkg = blkg_try_get(blkg); - if (!blkg) + if (!blkg_tryget(blkg)) goto out; rcu_read_unlock(); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2d848b2f8b87..27c14f8d2576 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -625,7 +625,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_try_get(blkg)) + if (!blkg_tryget(blkg)) continue; iolat = blkg_to_lat(blkg); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 7ff5d8ba8c7a..b7fd08013de2 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -494,27 +494,25 @@ static inline void blkg_get(struct blkcg_gq *blkg) } /** - * blkg_try_get - try and get a blkg reference + * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +static inline bool blkg_tryget(struct blkcg_gq *blkg) { - if (percpu_ref_tryget(&blkg->refcnt)) - return blkg; - return NULL; + return percpu_ref_tryget(&blkg->refcnt); } /** - * blkg_try_get_closest - try and get a blkg ref on the closet blkg + * blkg_tryget_closest - try and get a blkg ref on the closet blkg * @blkg: blkg to get * * This walks up the blkg tree to find the closest non-dying blkg and returns * the blkg that it did association with as it may not be the passed in blkg. */ -static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) +static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) { while (!percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; @@ -599,7 +597,7 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, if (unlikely(!blkg)) blkg = __blkg_lookup_create(blkcg, q); - if (!blkg_try_get(blkg)) + if (!blkg_tryget(blkg)) goto rl_use_root; rcu_read_unlock(); -- cgit From c7b1bf5cca76a31845a7d9e58cec7ff8f1cb0d4d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 21 Sep 2018 13:34:46 -0700 Subject: blk-mq: Document the functions that iterate over requests Make it easier to understand the purpose of the functions that iterate over requests by documenting their purpose. Fix several minor spelling and grammer mistakes in comments in these functions. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Cc: Christoph Hellwig Cc: Ming Lei Cc: Jianchao Wang Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 94e1ed667b6e..40d1667bceac 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -232,13 +232,26 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) /* * We can hit rq == NULL here, because the tagging functions - * test and set the bit before assining ->rqs[]. + * test and set the bit before assigning ->rqs[]. */ if (rq && rq->q == hctx->queue) iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } +/** + * bt_for_each - iterate over the requests associated with a hardware queue + * @hctx: Hardware queue to examine. + * @bt: sbitmap to examine. This is either the breserved_tags member + * or the bitmap_tags member of struct blk_mq_tags. + * @fn: Pointer to the function that will be called for each request + * associated with @hctx that has been assigned a driver tag. + * @fn will be called as follows: @fn(@hctx, rq, @data, @reserved) + * where rq is a pointer to a request. + * @data: Will be passed as third argument to @fn. + * @reserved: Indicates whether @bt is the breserved_tags member or the + * bitmap_tags member of struct blk_mq_tags. + */ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, busy_iter_fn *fn, void *data, bool reserved) { @@ -280,6 +293,18 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) return true; } +/** + * bt_tags_for_each - iterate over the requests in a tag map + * @tags: Tag map to iterate over. + * @bt: sbitmap to examine. This is either the breserved_tags member + * or the bitmap_tags member of struct blk_mq_tags. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @data, + * @reserved) where rq is a pointer to a request. + * @data: Will be passed as second argument to @fn. + * @reserved: Indicates whether @bt is the breserved_tags member or the + * bitmap_tags member of struct blk_mq_tags. + */ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, busy_tag_iter_fn *fn, void *data, bool reserved) { @@ -294,6 +319,15 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data); } +/** + * blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map + * @tags: Tag map to iterate over. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @priv, + * reserved) where rq is a pointer to a request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as second argument to @fn. + */ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { @@ -302,6 +336,15 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false); } +/** + * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set + * @tagset: Tag set to iterate over. + * @fn: Pointer to the function that will be called for each started + * request. @fn will be called as follows: @fn(rq, @priv, + * reserved) where rq is a pointer to a request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as second argument to @fn. + */ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) { @@ -314,6 +357,20 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); +/** + * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag + * @q: Request queue to examine. + * @fn: Pointer to the function that will be called for each request + * on @q. @fn will be called as follows: @fn(hctx, rq, @priv, + * reserved) where rq is a pointer to a request and hctx points + * to the hardware queue associated with the request. 'reserved' + * indicates whether or not @rq is a reserved request. + * @priv: Will be passed as third argument to @fn. + * + * Note: if @q->tag_set is shared with other request queues then @fn will be + * called for all requests on all queues that share that tag set and not only + * for requests associated with @q. + */ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { @@ -321,11 +378,11 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, int i; /* - * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and - * queue_hw_ctx after freeze the queue. So we could use q_usage_counter - * to avoid race with it. __blk_mq_update_nr_hw_queues will users - * synchronize_rcu to ensure all of the users go out of the critical - * section below and see zeroed q_usage_counter. + * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx + * while the queue is frozen. So we can use q_usage_counter to avoid + * racing with it. __blk_mq_update_nr_hw_queues() uses + * synchronize_rcu() to ensure this function left the critical section + * below. */ rcu_read_lock(); if (percpu_ref_is_zero(&q->q_usage_counter)) { @@ -337,7 +394,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, struct blk_mq_tags *tags = hctx->tags; /* - * If not software queues are currently mapped to this + * If no software queues are currently mapped to this * hardware queue, there's nothing to check */ if (!blk_mq_hw_queue_mapped(hctx)) -- cgit From 43b729bfe9cf30ad11499a66e3b7bd300c716d44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:47 +0200 Subject: block: move integrity_req_gap_{back,front}_merge to blk.h No need to expose these to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk.h | 35 +++++++++++++++++++++++++++++++++-- include/linux/blkdev.h | 31 ------------------------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/block/blk.h b/block/blk.h index 9db4e389582c..441c2de1d4b9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -158,7 +158,38 @@ static inline bool bio_integrity_endio(struct bio *bio) return __bio_integrity_endio(bio); return true; } -#else + +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + struct bio_integrity_payload *bip = bio_integrity(req->bio); + struct bio_integrity_payload *bip_next = bio_integrity(next); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_integrity_payload *bip_next = bio_integrity(req->bio); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + return false; +} +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + return false; +} + static inline void blk_flush_integrity(void) { } @@ -166,7 +197,7 @@ static inline bool bio_integrity_endio(struct bio *bio) { return true; } -#endif +#endif /* CONFIG_BLK_DEV_INTEGRITY */ void blk_timeout_work(struct work_struct *work); unsigned long blk_rq_timeout(unsigned long timeout); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d6869e0e2b64..bc534c857344 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1843,26 +1843,6 @@ queue_max_integrity_segments(struct request_queue *q) return q->limits.max_integrity_segments; } -static inline bool integrity_req_gap_back_merge(struct request *req, - struct bio *next) -{ - struct bio_integrity_payload *bip = bio_integrity(req->bio); - struct bio_integrity_payload *bip_next = bio_integrity(next); - - return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], - bip_next->bip_vec[0].bv_offset); -} - -static inline bool integrity_req_gap_front_merge(struct request *req, - struct bio *bio) -{ - struct bio_integrity_payload *bip = bio_integrity(bio); - struct bio_integrity_payload *bip_next = bio_integrity(req->bio); - - return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], - bip_next->bip_vec[0].bv_offset); -} - /** * bio_integrity_intervals - Return number of integrity intervals for a bio * @bi: blk_integrity profile for device @@ -1947,17 +1927,6 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq, return true; } -static inline bool integrity_req_gap_back_merge(struct request *req, - struct bio *next) -{ - return false; -} -static inline bool integrity_req_gap_front_merge(struct request *req, - struct bio *bio) -{ - return false; -} - static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi, unsigned int sectors) { -- cgit From e9907009cbfc0c93d987d5a8fdf3d6c3c7b89717 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:48 +0200 Subject: block: move req_gap_{back,front}_merge to blk-merge.c Keep it close to the actual users instead of exposing the function to all drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-merge.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 69 -------------------------------------------------- 2 files changed, 65 insertions(+), 69 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index aaec38cc37b8..ad8a226347a6 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -12,6 +12,71 @@ #include "blk.h" +/* + * Check if the two bvecs from two bios can be merged to one segment. If yes, + * no need to check gap between the two bios since the 1st bio and the 1st bvec + * in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) + return false; + if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + +static inline bool bio_will_gap(struct request_queue *q, + struct request *prev_rq, struct bio *prev, struct bio *next) +{ + struct bio_vec pb, nb; + + if (!bio_has_data(prev) || !queue_virt_boundary(q)) + return false; + + /* + * Don't merge if the 1st bio starts with non-zero offset, otherwise it + * is quite difficult to respect the sg gap limit. We work hard to + * merge a huge number of small single bios in case of mkfs. + */ + if (prev_rq) + bio_get_first_bvec(prev_rq->bio, &pb); + else + bio_get_first_bvec(prev, &pb); + if (pb.bv_offset) + return true; + + /* + * We don't need to worry about the situation that the merged segment + * ends in unaligned virt boundary: + * + * - if 'pb' ends aligned, the merged segment ends aligned + * - if 'pb' ends unaligned, the next bio must include + * one single bvec of 'nb', otherwise the 'nb' can't + * merge with 'pb' + */ + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); + if (bios_segs_mergeable(q, prev, &pb, &nb)) + return false; + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, NULL, bio, req->bio); +} + static struct bio *blk_bio_discard_split(struct request_queue *q, struct bio *bio, struct bio_set *bs, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc534c857344..b7e676bb01bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1695,75 +1695,6 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } -/* - * Check if the two bvecs from two bios can be merged to one segment. - * If yes, no need to check gap between the two bios since the 1st bio - * and the 1st bvec in the 2nd bio can be handled in one segment. - */ -static inline bool bios_segs_mergeable(struct request_queue *q, - struct bio *prev, struct bio_vec *prev_last_bv, - struct bio_vec *next_first_bv) -{ - if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) - return false; - if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) - return false; - if (prev->bi_seg_back_size + next_first_bv->bv_len > - queue_max_segment_size(q)) - return false; - return true; -} - -static inline bool bio_will_gap(struct request_queue *q, - struct request *prev_rq, - struct bio *prev, - struct bio *next) -{ - if (bio_has_data(prev) && queue_virt_boundary(q)) { - struct bio_vec pb, nb; - - /* - * don't merge if the 1st bio starts with non-zero - * offset, otherwise it is quite difficult to respect - * sg gap limit. We work hard to merge a huge number of small - * single bios in case of mkfs. - */ - if (prev_rq) - bio_get_first_bvec(prev_rq->bio, &pb); - else - bio_get_first_bvec(prev, &pb); - if (pb.bv_offset) - return true; - - /* - * We don't need to worry about the situation that the - * merged segment ends in unaligned virt boundary: - * - * - if 'pb' ends aligned, the merged segment ends aligned - * - if 'pb' ends unaligned, the next bio must include - * one single bvec of 'nb', otherwise the 'nb' can't - * merge with 'pb' - */ - bio_get_last_bvec(prev, &pb); - bio_get_first_bvec(next, &nb); - - if (!bios_segs_mergeable(q, prev, &pb, &nb)) - return __bvec_gap_to_prev(q, &pb, nb.bv_offset); - } - - return false; -} - -static inline bool req_gap_back_merge(struct request *req, struct bio *bio) -{ - return bio_will_gap(req->q, req, req->biotail, bio); -} - -static inline bool req_gap_front_merge(struct request *req, struct bio *bio) -{ - return bio_will_gap(req->q, NULL, bio, req->bio); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit From 27ca1d4ed04ea29dc77b47190a3cc82697023e76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:49 +0200 Subject: block: move req_gap_back_merge to blk.h No need to expose these helpers outside the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk.h | 19 +++++++++++++++++++ include/linux/blkdev.h | 19 ------------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/block/blk.h b/block/blk.h index 441c2de1d4b9..63035c95689c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -149,6 +149,25 @@ static inline void blk_queue_enter_live(struct request_queue *q) percpu_ref_get(&q->q_usage_counter); } +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + +/* + * Check if adding a bio_vec after bprv with offset would create a gap in + * the SG list. Most drivers don't care about this, but some do. + */ +static inline bool bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + if (!queue_virt_boundary(q)) + return false; + return __bvec_gap_to_prev(q, bprv, offset); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b7e676bb01bc..1d5e14139795 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1676,25 +1676,6 @@ static inline void put_dev_sector(Sector p) put_page(p.v); } -static inline bool __bvec_gap_to_prev(struct request_queue *q, - struct bio_vec *bprv, unsigned int offset) -{ - return offset || - ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); -} - -/* - * Check if adding a bio_vec after bprv with offset would create a gap in - * the SG list. Most drivers don't care about this, but some do. - */ -static inline bool bvec_gap_to_prev(struct request_queue *q, - struct bio_vec *bprv, unsigned int offset) -{ - if (!queue_virt_boundary(q)) - return false; - return __bvec_gap_to_prev(q, bprv, offset); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit From 6a9f5f240adfdced863a098d34f8f05ca6ab9d5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:50 +0200 Subject: block: simplify BIOVEC_PHYS_MERGEABLE Turn the macro into an inline, move it to blk.h and simplify the arch hooks a bit. Also rename the function to biovec_phys_mergeable as there is no need to shout. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/arm/include/asm/io.h | 5 ++--- arch/arm64/include/asm/io.h | 5 ++--- arch/x86/include/asm/io.h | 5 ++--- block/bio.c | 2 +- block/blk-integrity.c | 4 ++-- block/blk-merge.c | 10 +++++----- block/blk.h | 14 ++++++++++++++ include/linux/bio.h | 13 ------------- 8 files changed, 28 insertions(+), 30 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 2cfbc531f63b..3c835d6263fa 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -469,9 +469,8 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); struct bio_vec; extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) #ifdef CONFIG_MMU #define ARCH_HAS_VALID_PHYS_ADDR_RANGE diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 35b2e50f17fb..774e03ea1bb0 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -208,9 +208,8 @@ extern int devmem_is_allowed(unsigned long pfn); struct bio_vec; extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 6de64840dd22..7c6106216d9c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -376,9 +376,8 @@ struct bio_vec; extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2); -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) +#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) #endif /* CONFIG_XEN */ #define IO_SPACE_LIMIT 0xffff diff --git a/block/bio.c b/block/bio.c index 1cd47f218200..81d90b839e05 100644 --- a/block/bio.c +++ b/block/bio.c @@ -731,7 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt > 1 && biovec_phys_mergeable(bvec-1, bvec)) bio_clear_flag(bio, BIO_SEG_VALID); done: diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 6121611e1316..0f7267916509 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -49,7 +49,7 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) + if (!biovec_phys_mergeable(&ivprv, &iv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) @@ -95,7 +95,7 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv)) + if (!biovec_phys_mergeable(&ivprv, &iv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) diff --git a/block/blk-merge.c b/block/blk-merge.c index ad8a226347a6..5e63e8259f92 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -21,7 +21,7 @@ static inline bool bios_segs_mergeable(struct request_queue *q, struct bio *prev, struct bio_vec *prev_last_bv, struct bio_vec *next_first_bv) { - if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) + if (!biovec_phys_mergeable(prev_last_bv, next_first_bv)) return false; if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) return false; @@ -199,7 +199,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) + if (!biovec_phys_mergeable(bvprvp, &bv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) goto new_segment; @@ -332,7 +332,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) + if (!biovec_phys_mergeable(&bvprv, &bv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) goto new_segment; @@ -414,7 +414,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, bio_get_last_bvec(bio, &end_bv); bio_get_first_bvec(nxt, &nxt_bv); - if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) + if (!biovec_phys_mergeable(&end_bv, &nxt_bv)) return 0; /* @@ -439,7 +439,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, if ((*sg)->length + nbytes > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) + if (!biovec_phys_mergeable(bvprv, bvec)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) goto new_segment; diff --git a/block/blk.h b/block/blk.h index 63035c95689c..aed99cbc1bca 100644 --- a/block/blk.h +++ b/block/blk.h @@ -149,6 +149,20 @@ static inline void blk_queue_enter_live(struct request_queue *q) percpu_ref_get(&q->q_usage_counter); } +#ifndef ARCH_BIOVEC_PHYS_MERGEABLE +#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) true +#endif + +static inline bool biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2) +{ + if (bvec_to_phys(vec1) + vec1->bv_len != bvec_to_phys(vec2)) + return false; + if (!ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2)) + return false; + return true; +} + static inline bool __bvec_gap_to_prev(struct request_queue *q, struct bio_vec *bprv, unsigned int offset) { diff --git a/include/linux/bio.h b/include/linux/bio.h index e973876625a8..e2adb96346f0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -140,19 +140,6 @@ static inline bool bio_full(struct bio *bio) /* * merge helpers etc */ - -/* Default implementation of BIOVEC_PHYS_MERGEABLE */ -#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) - -/* - * allow arch override, for eg virtualized architectures (put in asm/io.h) - */ -#ifndef BIOVEC_PHYS_MERGEABLE -#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - __BIOVEC_PHYS_MERGEABLE(vec1, vec2) -#endif - #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ -- cgit From 0e253391a970300fe4ae69d0c1d1ab494eb07508 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:51 +0200 Subject: block: add a missing BIOVEC_SEG_BOUNDARY check in bio_add_pc_page The actual recaculation of segments in __blk_recalc_rq_segments will do this check, so there is no point in forcing it if we know it won't succeed. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 81d90b839e05..c254e5aa331f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -731,7 +731,9 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && biovec_phys_mergeable(bvec-1, bvec)) + if (bio->bi_vcnt > 1 && + biovec_phys_mergeable(bvec - 1, bvec) && + BIOVEC_SEG_BOUNDARY(q, bvec - 1, bvec)) bio_clear_flag(bio, BIO_SEG_VALID); done: -- cgit From 3dccdae54fe836a22cee9dc6df9fd1708ae075ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:52 +0200 Subject: block: merge BIOVEC_SEG_BOUNDARY into biovec_phys_mergeable These two checks should always be performed together, so merge them into a single helper. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 4 +--- block/blk-integrity.c | 12 ++---------- block/blk-merge.c | 29 +++++------------------------ block/blk.h | 12 +++++++++--- include/linux/bio.h | 8 -------- 5 files changed, 17 insertions(+), 48 deletions(-) diff --git a/block/bio.c b/block/bio.c index c254e5aa331f..e9f92b50724d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -731,9 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt > 1 && - biovec_phys_mergeable(bvec - 1, bvec) && - BIOVEC_SEG_BOUNDARY(q, bvec - 1, bvec)) + if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec)) bio_clear_flag(bio, BIO_SEG_VALID); done: diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 0f7267916509..d1ab089e0919 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -49,12 +49,8 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!biovec_phys_mergeable(&ivprv, &iv)) + if (!biovec_phys_mergeable(q, &ivprv, &iv)) goto new_segment; - - if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) - goto new_segment; - if (seg_size + iv.bv_len > queue_max_segment_size(q)) goto new_segment; @@ -95,12 +91,8 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, bio_for_each_integrity_vec(iv, bio, iter) { if (prev) { - if (!biovec_phys_mergeable(&ivprv, &iv)) + if (!biovec_phys_mergeable(q, &ivprv, &iv)) goto new_segment; - - if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv)) - goto new_segment; - if (sg->length + iv.bv_len > queue_max_segment_size(q)) goto new_segment; diff --git a/block/blk-merge.c b/block/blk-merge.c index 5e63e8259f92..42a46744c11b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -21,9 +21,7 @@ static inline bool bios_segs_mergeable(struct request_queue *q, struct bio *prev, struct bio_vec *prev_last_bv, struct bio_vec *next_first_bv) { - if (!biovec_phys_mergeable(prev_last_bv, next_first_bv)) - return false; - if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) + if (!biovec_phys_mergeable(q, prev_last_bv, next_first_bv)) return false; if (prev->bi_seg_back_size + next_first_bv->bv_len > queue_max_segment_size(q)) @@ -199,9 +197,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!biovec_phys_mergeable(bvprvp, &bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) + if (!biovec_phys_mergeable(q, bvprvp, &bv)) goto new_segment; seg_size += bv.bv_len; @@ -332,9 +328,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!biovec_phys_mergeable(&bvprv, &bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) + if (!biovec_phys_mergeable(q, &bvprv, &bv)) goto new_segment; seg_size += bv.bv_len; @@ -414,17 +408,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, bio_get_last_bvec(bio, &end_bv); bio_get_first_bvec(nxt, &nxt_bv); - if (!biovec_phys_mergeable(&end_bv, &nxt_bv)) - return 0; - - /* - * bio and nxt are contiguous in memory; check if the queue allows - * these two to be merged into one - */ - if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) - return 1; - - return 0; + return biovec_phys_mergeable(q, &end_bv, &nxt_bv); } static inline void @@ -438,10 +422,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, if (*sg && *cluster) { if ((*sg)->length + nbytes > queue_max_segment_size(q)) goto new_segment; - - if (!biovec_phys_mergeable(bvprv, bvec)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) + if (!biovec_phys_mergeable(q, bvprv, bvec)) goto new_segment; (*sg)->length += nbytes; diff --git a/block/blk.h b/block/blk.h index aed99cbc1bca..8f7229b6f63e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -153,13 +153,19 @@ static inline void blk_queue_enter_live(struct request_queue *q) #define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) true #endif -static inline bool biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2) +static inline bool biovec_phys_mergeable(struct request_queue *q, + struct bio_vec *vec1, struct bio_vec *vec2) { - if (bvec_to_phys(vec1) + vec1->bv_len != bvec_to_phys(vec2)) + unsigned long mask = queue_segment_boundary(q); + phys_addr_t addr1 = bvec_to_phys(vec1); + phys_addr_t addr2 = bvec_to_phys(vec2); + + if (addr1 + vec1->bv_len != addr2) return false; if (!ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2)) return false; + if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) + return false; return true; } diff --git a/include/linux/bio.h b/include/linux/bio.h index e2adb96346f0..9b580e1cb2e8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -137,14 +137,6 @@ static inline bool bio_full(struct bio *bio) */ #define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) -/* - * merge helpers etc - */ -#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ - (((addr1) | (mask)) == (((addr2) - 1) | (mask))) -#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ - __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) - /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it -- cgit From 6e768461c215eaf8912e6c23e40fdff1cd962aca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:53 +0200 Subject: block: remove bvec_to_phys We only use it in biovec_phys_mergeable and a m68k paravirt driver, so just opencode it there. Also remove the pointless unsigned long cast for the offset in the opencoded instances. Signed-off-by: Christoph Hellwig Reviewed-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 2 +- block/blk.h | 4 ++-- include/linux/bio.h | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index e9110b9b8bcd..38049357d6d3 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio) len = bvec.bv_len; len >>= 9; nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift, - bvec_to_phys(&bvec)); + page_to_phys(bvec.bv_page) + bvec.bv_offset); sec += len; } bio_endio(bio); diff --git a/block/blk.h b/block/blk.h index 8f7229b6f63e..50f74ce60453 100644 --- a/block/blk.h +++ b/block/blk.h @@ -157,8 +157,8 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { unsigned long mask = queue_segment_boundary(q); - phys_addr_t addr1 = bvec_to_phys(vec1); - phys_addr_t addr2 = bvec_to_phys(vec2); + phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; + phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; if (addr1 + vec1->bv_len != addr2) return false; diff --git a/include/linux/bio.h b/include/linux/bio.h index 9b580e1cb2e8..9ad4b0a487a4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -132,11 +132,6 @@ static inline bool bio_full(struct bio *bio) return bio->bi_vcnt >= bio->bi_max_vecs; } -/* - * will die - */ -#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) - /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it -- cgit From bceacbfa48bf132d8d88058904cf5859eeb4f3e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:54 +0200 Subject: block: don't include io.h from bio.h Now that we don't need an override for BIOVEC_PHYS_MERGEABLE there is no need to drag this header in. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 9ad4b0a487a4..b3d47862b1b4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -24,9 +24,6 @@ #include #ifdef CONFIG_BLOCK - -#include - /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include -- cgit From 65969e5cb249b765cee261c8f0458ec778b0c22f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Sep 2018 09:43:55 +0200 Subject: block: don't include bug.h from bio.h No need to pull in the BUG() defintion. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index b3d47862b1b4..f447b0ebb288 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -21,7 +21,6 @@ #include #include #include -#include #ifdef CONFIG_BLOCK /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ -- cgit From a5bb207ada5b6631906818ed0b7f62a5475cafcc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Sep 2018 13:30:05 -0700 Subject: arm: remove the unused BIOVEC_MERGEABLE define Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/arm/include/asm/io.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 3c835d6263fa..e58ca25eddb7 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -459,13 +459,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #include -/* - * can the hardware map this into one segment or not, given no other - * constraints. - */ -#define BIOVEC_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) - struct bio_vec; extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2); -- cgit From cd11b1d344784b1a4c0d7aa18edc0830fd3989d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Sep 2018 13:30:06 -0700 Subject: xen: remove the xen_biovec_phys_mergeable export BIOVEC_PHYS_MERGEABLE is only called from core block code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/xen/biomerge.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 55ed80c3a17c..399c4e30f723 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -20,4 +20,3 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, return false; #endif } -EXPORT_SYMBOL(xen_biovec_phys_mergeable); -- cgit From 20e3267601f95ff62d7a3116a17a680e9f5cbcc9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Sep 2018 13:30:07 -0700 Subject: xen: provide a prototype for xen_biovec_phys_mergeable in xen.h Having multiple externs in arch headers is not a good way to provide a common interface. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/arm/include/asm/io.h | 3 --- arch/arm64/include/asm/io.h | 3 --- arch/x86/include/asm/io.h | 4 ---- include/xen/xen.h | 4 ++++ 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index e58ca25eddb7..cf5cd88e7289 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -459,9 +459,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #include -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); #define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 774e03ea1bb0..06119ee511cd 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -205,9 +205,6 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -struct bio_vec; -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); #define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7c6106216d9c..abdb501a551d 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -371,10 +371,6 @@ extern bool is_early_ioremap_ptep(pte_t *ptep); #ifdef CONFIG_XEN #include -struct bio_vec; - -extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, - const struct bio_vec *vec2); #define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) diff --git a/include/xen/xen.h b/include/xen/xen.h index 1e1d9bd0bd37..d7a2678da77f 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -39,4 +39,8 @@ extern uint32_t xen_start_flags; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ +struct bio_vec; +bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); + #endif /* _XEN_XEN_H */ -- cgit From c39ae60dfbda66922f644193b91850abcd4d588c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Sep 2018 13:30:08 -0700 Subject: block: remove ARCH_BIOVEC_PHYS_MERGEABLE Take the Xen check into the core code instead of delegating it to the architectures. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/arm/include/asm/io.h | 3 --- arch/arm64/include/asm/io.h | 3 --- arch/x86/include/asm/io.h | 3 --- block/blk.h | 7 ++----- drivers/xen/biomerge.c | 2 +- 5 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index cf5cd88e7289..6ae7674da0d6 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -459,9 +459,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #include -#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) - #ifdef CONFIG_MMU #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 06119ee511cd..18f6ae6a43f1 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -205,8 +205,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) - #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index abdb501a551d..232d8e9ee8a0 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -371,9 +371,6 @@ extern bool is_early_ioremap_ptep(pte_t *ptep); #ifdef CONFIG_XEN #include - -#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)) #endif /* CONFIG_XEN */ #define IO_SPACE_LIMIT 0xffff diff --git a/block/blk.h b/block/blk.h index 50f74ce60453..58c030f727e9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -4,6 +4,7 @@ #include #include +#include #include "blk-mq.h" /* Amount of time in which a process may batch requests */ @@ -149,10 +150,6 @@ static inline void blk_queue_enter_live(struct request_queue *q) percpu_ref_get(&q->q_usage_counter); } -#ifndef ARCH_BIOVEC_PHYS_MERGEABLE -#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) true -#endif - static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { @@ -162,7 +159,7 @@ static inline bool biovec_phys_mergeable(struct request_queue *q, if (addr1 + vec1->bv_len != addr2) return false; - if (!ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2)) + if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2)) return false; if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) return false; diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 399c4e30f723..f3fbb700f569 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include +#include #include bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, -- cgit From 3cfa210bf3fe0803cca17f3775d8cf2360d5f443 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Sep 2018 13:30:09 -0700 Subject: xen: don't include from and Nothing Xen specific in these headers, which get included from a lot of code in the kernel. So prune the includes and move them to the Xen-specific files that actually use them instead. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/arm/include/asm/io.h | 1 - arch/arm64/include/asm/io.h | 2 -- arch/x86/include/asm/io.h | 4 ---- arch/x86/include/asm/xen/events.h | 2 ++ arch/x86/xen/enlighten.c | 1 + arch/x86/xen/enlighten_pvh.c | 1 + arch/x86/xen/platform-pci-unplug.c | 1 + arch/x86/xen/pmu.c | 1 + drivers/xen/xen-acpi-pad.c | 1 + 9 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 6ae7674da0d6..6b51826ab3d1 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -28,7 +28,6 @@ #include #include #include -#include /* * ISA I/O bus memory addresses are 1:1 with the physical address. diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 18f6ae6a43f1..9f8b915af3a7 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -31,8 +31,6 @@ #include #include -#include - /* * Generic IO read/write. These perform native-endian accesses. */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 232d8e9ee8a0..9a92a3ac2ac5 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -369,10 +369,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); extern bool is_early_ioremap_ptep(pte_t *ptep); -#ifdef CONFIG_XEN -#include -#endif /* CONFIG_XEN */ - #define IO_SPACE_LIMIT 0xffff #include diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index d383140e1dc8..068d9b067c83 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_XEN_EVENTS_H #define _ASM_X86_XEN_EVENTS_H +#include + enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2eeddd814653..0ca46e03b830 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index c85d1a88f476..2a9025343534 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 33a783c77d96..b99585034dd2 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -23,6 +23,7 @@ #include #include +#include #include #include "xen-ops.h" diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 7d00d4ad44d4..5f7d530fc679 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index 23d1808fe027..e25ab76b9c99 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include -- cgit From bca6b067b0b269a7b8ba129e2a918309ca8b4a55 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:03 -0700 Subject: block: Move power management code into a new source file Move the code for runtime power management from blk-core.c into the new source file blk-pm.c. Move the corresponding declarations from into . For CONFIG_PM=n, leave out the declarations of the functions that are not used in that mode. This patch not only reduces the number of #ifdefs in the block layer core code but also reduces the size of header file and hence should help to reduce the build time of the Linux kernel if CONFIG_PM is not defined. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/Kconfig | 3 + block/Makefile | 1 + block/blk-core.c | 196 +------------------------------------------------ block/blk-pm.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++ block/blk-pm.h | 43 +++++++++++ block/elevator.c | 22 +----- drivers/scsi/scsi_pm.c | 1 + drivers/scsi/sd.c | 1 + drivers/scsi/sr.c | 1 + include/linux/blk-pm.h | 24 ++++++ include/linux/blkdev.h | 23 ------ 11 files changed, 264 insertions(+), 239 deletions(-) create mode 100644 block/blk-pm.c create mode 100644 block/blk-pm.h create mode 100644 include/linux/blk-pm.h diff --git a/block/Kconfig b/block/Kconfig index 1f2469a0123c..85263e7bded6 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -228,4 +228,7 @@ config BLK_MQ_RDMA depends on BLOCK && INFINIBAND default y +config BLK_PM + def_bool BLOCK && PM + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 572b33f32c07..27eac600474f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_PM) += blk-pm.o diff --git a/block/blk-core.c b/block/blk-core.c index 4dbc93f43b38..6d4dd176bd9d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -42,6 +42,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-rq-qos.h" #ifdef CONFIG_DEBUG_FS @@ -1726,16 +1727,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part) } EXPORT_SYMBOL_GPL(part_round_stats); -#ifdef CONFIG_PM -static void blk_pm_put_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) - pm_runtime_mark_last_busy(rq->q->dev); -} -#else -static inline void blk_pm_put_request(struct request *rq) {} -#endif - void __blk_put_request(struct request_queue *q, struct request *req) { req_flags_t rq_flags = req->rq_flags; @@ -3757,191 +3748,6 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); -#ifdef CONFIG_PM -/** - * blk_pm_runtime_init - Block layer runtime PM initialization routine - * @q: the queue of the device - * @dev: the device the queue belongs to - * - * Description: - * Initialize runtime-PM-related fields for @q and start auto suspend for - * @dev. Drivers that want to take advantage of request-based runtime PM - * should call this function after @dev has been initialized, and its - * request queue @q has been allocated, and runtime PM for it can not happen - * yet(either due to disabled/forbidden or its usage_count > 0). In most - * cases, driver should call this function before any I/O has taken place. - * - * This function takes care of setting up using auto suspend for the device, - * the autosuspend delay is set to -1 to make runtime suspend impossible - * until an updated value is either set by user or by driver. Drivers do - * not need to touch other autosuspend settings. - * - * The block layer runtime PM is request based, so only works for drivers - * that use request as their IO unit instead of those directly use bio's. - */ -void blk_pm_runtime_init(struct request_queue *q, struct device *dev) -{ - /* Don't enable runtime PM for blk-mq until it is ready */ - if (q->mq_ops) { - pm_runtime_disable(dev); - return; - } - - q->dev = dev; - q->rpm_status = RPM_ACTIVE; - pm_runtime_set_autosuspend_delay(q->dev, -1); - pm_runtime_use_autosuspend(q->dev); -} -EXPORT_SYMBOL(blk_pm_runtime_init); - -/** - * blk_pre_runtime_suspend - Pre runtime suspend check - * @q: the queue of the device - * - * Description: - * This function will check if runtime suspend is allowed for the device - * by examining if there are any requests pending in the queue. If there - * are requests pending, the device can not be runtime suspended; otherwise, - * the queue's status will be updated to SUSPENDING and the driver can - * proceed to suspend the device. - * - * For the not allowed case, we mark last busy for the device so that - * runtime PM core will try to autosuspend it some time later. - * - * This function should be called near the start of the device's - * runtime_suspend callback. - * - * Return: - * 0 - OK to runtime suspend the device - * -EBUSY - Device should not be runtime suspended - */ -int blk_pre_runtime_suspend(struct request_queue *q) -{ - int ret = 0; - - if (!q->dev) - return ret; - - spin_lock_irq(q->queue_lock); - if (q->nr_pending) { - ret = -EBUSY; - pm_runtime_mark_last_busy(q->dev); - } else { - q->rpm_status = RPM_SUSPENDING; - } - spin_unlock_irq(q->queue_lock); - return ret; -} -EXPORT_SYMBOL(blk_pre_runtime_suspend); - -/** - * blk_post_runtime_suspend - Post runtime suspend processing - * @q: the queue of the device - * @err: return value of the device's runtime_suspend function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime suspend function and mark last busy for the device so - * that PM core will try to auto suspend the device at a later time. - * - * This function should be called near the end of the device's - * runtime_suspend callback. - */ -void blk_post_runtime_suspend(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_SUSPENDED; - } else { - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_suspend); - -/** - * blk_pre_runtime_resume - Pre runtime resume processing - * @q: the queue of the device - * - * Description: - * Update the queue's runtime status to RESUMING in preparation for the - * runtime resume of the device. - * - * This function should be called near the start of the device's - * runtime_resume callback. - */ -void blk_pre_runtime_resume(struct request_queue *q) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_RESUMING; - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_pre_runtime_resume); - -/** - * blk_post_runtime_resume - Post runtime resume processing - * @q: the queue of the device - * @err: return value of the device's runtime_resume function - * - * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime_resume function. If it is successfully resumed, process - * the requests that are queued into the device's queue when it is resuming - * and then mark last busy and initiate autosuspend for it. - * - * This function should be called near the end of the device's - * runtime_resume callback. - */ -void blk_post_runtime_resume(struct request_queue *q, int err) -{ - if (!q->dev) - return; - - spin_lock_irq(q->queue_lock); - if (!err) { - q->rpm_status = RPM_ACTIVE; - __blk_run_queue(q); - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - } else { - q->rpm_status = RPM_SUSPENDED; - } - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_post_runtime_resume); - -/** - * blk_set_runtime_active - Force runtime status of the queue to be active - * @q: the queue of the device - * - * If the device is left runtime suspended during system suspend the resume - * hook typically resumes the device and corrects runtime status - * accordingly. However, that does not affect the queue runtime PM status - * which is still "suspended". This prevents processing requests from the - * queue. - * - * This function can be used in driver's resume hook to correct queue - * runtime PM status and re-enable peeking requests from the queue. It - * should be called before first request is added to the queue. - */ -void blk_set_runtime_active(struct request_queue *q) -{ - spin_lock_irq(q->queue_lock); - q->rpm_status = RPM_ACTIVE; - pm_runtime_mark_last_busy(q->dev); - pm_request_autosuspend(q->dev); - spin_unlock_irq(q->queue_lock); -} -EXPORT_SYMBOL(blk_set_runtime_active); -#endif - int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); diff --git a/block/blk-pm.c b/block/blk-pm.c new file mode 100644 index 000000000000..9b636960d285 --- /dev/null +++ b/block/blk-pm.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +/** + * blk_pm_runtime_init - Block layer runtime PM initialization routine + * @q: the queue of the device + * @dev: the device the queue belongs to + * + * Description: + * Initialize runtime-PM-related fields for @q and start auto suspend for + * @dev. Drivers that want to take advantage of request-based runtime PM + * should call this function after @dev has been initialized, and its + * request queue @q has been allocated, and runtime PM for it can not happen + * yet(either due to disabled/forbidden or its usage_count > 0). In most + * cases, driver should call this function before any I/O has taken place. + * + * This function takes care of setting up using auto suspend for the device, + * the autosuspend delay is set to -1 to make runtime suspend impossible + * until an updated value is either set by user or by driver. Drivers do + * not need to touch other autosuspend settings. + * + * The block layer runtime PM is request based, so only works for drivers + * that use request as their IO unit instead of those directly use bio's. + */ +void blk_pm_runtime_init(struct request_queue *q, struct device *dev) +{ + /* Don't enable runtime PM for blk-mq until it is ready */ + if (q->mq_ops) { + pm_runtime_disable(dev); + return; + } + + q->dev = dev; + q->rpm_status = RPM_ACTIVE; + pm_runtime_set_autosuspend_delay(q->dev, -1); + pm_runtime_use_autosuspend(q->dev); +} +EXPORT_SYMBOL(blk_pm_runtime_init); + +/** + * blk_pre_runtime_suspend - Pre runtime suspend check + * @q: the queue of the device + * + * Description: + * This function will check if runtime suspend is allowed for the device + * by examining if there are any requests pending in the queue. If there + * are requests pending, the device can not be runtime suspended; otherwise, + * the queue's status will be updated to SUSPENDING and the driver can + * proceed to suspend the device. + * + * For the not allowed case, we mark last busy for the device so that + * runtime PM core will try to autosuspend it some time later. + * + * This function should be called near the start of the device's + * runtime_suspend callback. + * + * Return: + * 0 - OK to runtime suspend the device + * -EBUSY - Device should not be runtime suspended + */ +int blk_pre_runtime_suspend(struct request_queue *q) +{ + int ret = 0; + + if (!q->dev) + return ret; + + spin_lock_irq(q->queue_lock); + if (q->nr_pending) { + ret = -EBUSY; + pm_runtime_mark_last_busy(q->dev); + } else { + q->rpm_status = RPM_SUSPENDING; + } + spin_unlock_irq(q->queue_lock); + return ret; +} +EXPORT_SYMBOL(blk_pre_runtime_suspend); + +/** + * blk_post_runtime_suspend - Post runtime suspend processing + * @q: the queue of the device + * @err: return value of the device's runtime_suspend function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime suspend function and mark last busy for the device so + * that PM core will try to auto suspend the device at a later time. + * + * This function should be called near the end of the device's + * runtime_suspend callback. + */ +void blk_post_runtime_suspend(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_SUSPENDED; + } else { + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_suspend); + +/** + * blk_pre_runtime_resume - Pre runtime resume processing + * @q: the queue of the device + * + * Description: + * Update the queue's runtime status to RESUMING in preparation for the + * runtime resume of the device. + * + * This function should be called near the start of the device's + * runtime_resume callback. + */ +void blk_pre_runtime_resume(struct request_queue *q) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_RESUMING; + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_pre_runtime_resume); + +/** + * blk_post_runtime_resume - Post runtime resume processing + * @q: the queue of the device + * @err: return value of the device's runtime_resume function + * + * Description: + * Update the queue's runtime status according to the return value of the + * device's runtime_resume function. If it is successfully resumed, process + * the requests that are queued into the device's queue when it is resuming + * and then mark last busy and initiate autosuspend for it. + * + * This function should be called near the end of the device's + * runtime_resume callback. + */ +void blk_post_runtime_resume(struct request_queue *q, int err) +{ + if (!q->dev) + return; + + spin_lock_irq(q->queue_lock); + if (!err) { + q->rpm_status = RPM_ACTIVE; + __blk_run_queue(q); + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + } else { + q->rpm_status = RPM_SUSPENDED; + } + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_post_runtime_resume); + +/** + * blk_set_runtime_active - Force runtime status of the queue to be active + * @q: the queue of the device + * + * If the device is left runtime suspended during system suspend the resume + * hook typically resumes the device and corrects runtime status + * accordingly. However, that does not affect the queue runtime PM status + * which is still "suspended". This prevents processing requests from the + * queue. + * + * This function can be used in driver's resume hook to correct queue + * runtime PM status and re-enable peeking requests from the queue. It + * should be called before first request is added to the queue. + */ +void blk_set_runtime_active(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + q->rpm_status = RPM_ACTIVE; + pm_runtime_mark_last_busy(q->dev); + pm_request_autosuspend(q->dev); + spin_unlock_irq(q->queue_lock); +} +EXPORT_SYMBOL(blk_set_runtime_active); diff --git a/block/blk-pm.h b/block/blk-pm.h new file mode 100644 index 000000000000..1ffc8ef203ec --- /dev/null +++ b/block/blk-pm.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLOCK_BLK_PM_H_ +#define _BLOCK_BLK_PM_H_ + +#include + +#ifdef CONFIG_PM +static inline void blk_pm_requeue_request(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + rq->q->nr_pending--; +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ + if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && + (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) + pm_request_resume(q->dev); +} + +static inline void blk_pm_put_request(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) + pm_runtime_mark_last_busy(rq->q->dev); +} +#else +static inline void blk_pm_requeue_request(struct request *rq) +{ +} + +static inline void blk_pm_add_request(struct request_queue *q, + struct request *rq) +{ +} + +static inline void blk_pm_put_request(struct request *rq) +{ +} +#endif + +#endif /* _BLOCK_BLK_PM_H_ */ diff --git a/block/elevator.c b/block/elevator.c index 6a06b5d040e5..e18ac68626e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -41,6 +41,7 @@ #include "blk.h" #include "blk-mq-sched.h" +#include "blk-pm.h" #include "blk-wbt.h" static DEFINE_SPINLOCK(elv_list_lock); @@ -557,27 +558,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); } -#ifdef CONFIG_PM -static void blk_pm_requeue_request(struct request *rq) -{ - if (rq->q->dev && !(rq->rq_flags & RQF_PM)) - rq->q->nr_pending--; -} - -static void blk_pm_add_request(struct request_queue *q, struct request *rq) -{ - if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && - (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); -} -#else -static inline void blk_pm_requeue_request(struct request *rq) {} -static inline void blk_pm_add_request(struct request_queue *q, - struct request *rq) -{ -} -#endif - void elv_requeue_request(struct request_queue *q, struct request *rq) { /* diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index b44c1bb687a2..a2b4179bfdf7 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b79b366a94f7..64514e8359e4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index d0389b20574d..4f07b3410595 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h new file mode 100644 index 000000000000..b80c65aba249 --- /dev/null +++ b/include/linux/blk-pm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BLK_PM_H_ +#define _BLK_PM_H_ + +struct device; +struct request_queue; + +/* + * block layer runtime pm functions + */ +#ifdef CONFIG_PM +extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); +extern int blk_pre_runtime_suspend(struct request_queue *q); +extern void blk_post_runtime_suspend(struct request_queue *q, int err); +extern void blk_pre_runtime_resume(struct request_queue *q); +extern void blk_post_runtime_resume(struct request_queue *q, int err); +extern void blk_set_runtime_active(struct request_queue *q); +#else +static inline void blk_pm_runtime_init(struct request_queue *q, + struct device *dev) {} +#endif + +#endif /* _BLK_PM_H_ */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d5e14139795..cd863511dedb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1280,29 +1280,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -/* - * block layer runtime pm functions - */ -#ifdef CONFIG_PM -extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); -extern int blk_pre_runtime_suspend(struct request_queue *q); -extern void blk_post_runtime_suspend(struct request_queue *q, int err); -extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); -extern void blk_set_runtime_active(struct request_queue *q); -#else -static inline void blk_pm_runtime_init(struct request_queue *q, - struct device *dev) {} -static inline int blk_pre_runtime_suspend(struct request_queue *q) -{ - return -ENOSYS; -} -static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {} -static inline void blk_pre_runtime_resume(struct request_queue *q) {} -static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} -static inline void blk_set_runtime_active(struct request_queue *q) {} -#endif - /* * blk_plug permits building a queue of related requests by holding the I/O * fragments for a short period. This allows merging of sequential requests -- cgit From cd84a62e0078dce09f4ed349bec84f86c9d54b30 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:04 -0700 Subject: block, scsi: Change the preempt-only flag into a counter The RQF_PREEMPT flag is used for three purposes: - In the SCSI core, for making sure that power management requests are executed even if a device is in the "quiesced" state. - For domain validation by SCSI drivers that use the parallel port. - In the IDE driver, for IDE preempt requests. Rename "preempt-only" into "pm-only" because the primary purpose of this mode is power management. Since the power management core may but does not have to resume a runtime suspended device before performing system-wide suspend and since a later patch will set "pm-only" mode as long as a block device is runtime suspended, make it possible to set "pm-only" mode from more than one context. Since with this change scsi_device_quiesce() is no longer idempotent, make that function return early if it is called for a quiesced queue. Signed-off-by: Bart Van Assche Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Cc: Jianchao Wang Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 35 ++++++++++++++++++----------------- block/blk-mq-debugfs.c | 10 +++++++++- drivers/scsi/scsi_lib.c | 11 +++++++---- include/linux/blkdev.h | 14 +++++++++----- 4 files changed, 43 insertions(+), 27 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6d4dd176bd9d..1a691f5269bb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -422,24 +422,25 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** - * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY + * blk_set_pm_only - increment pm_only counter * @q: request queue pointer - * - * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not - * set and 1 if the flag was already set. */ -int blk_set_preempt_only(struct request_queue *q) +void blk_set_pm_only(struct request_queue *q) { - return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); + atomic_inc(&q->pm_only); } -EXPORT_SYMBOL_GPL(blk_set_preempt_only); +EXPORT_SYMBOL_GPL(blk_set_pm_only); -void blk_clear_preempt_only(struct request_queue *q) +void blk_clear_pm_only(struct request_queue *q) { - blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); - wake_up_all(&q->mq_freeze_wq); + int pm_only; + + pm_only = atomic_dec_return(&q->pm_only); + WARN_ON_ONCE(pm_only < 0); + if (pm_only == 0) + wake_up_all(&q->mq_freeze_wq); } -EXPORT_SYMBOL_GPL(blk_clear_preempt_only); +EXPORT_SYMBOL_GPL(blk_clear_pm_only); /** * __blk_run_queue_uncond - run a queue whether or not it has been stopped @@ -918,7 +919,7 @@ EXPORT_SYMBOL(blk_alloc_queue); */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool preempt = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & BLK_MQ_REQ_PREEMPT; while (true) { bool success = false; @@ -926,11 +927,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { /* - * The code that sets the PREEMPT_ONLY flag is - * responsible for ensuring that that flag is globally - * visible before the queue is unfrozen. + * The code that increments the pm_only counter is + * responsible for ensuring that that counter is + * globally visible before the queue is unfrozen. */ - if (preempt || !blk_queue_preempt_only(q)) { + if (pm || !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -955,7 +956,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (atomic_read(&q->mq_freeze_depth) == 0 && - (preempt || !blk_queue_preempt_only(q))) || + (pm || !blk_queue_pm_only(q))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index cb1e6cf7ac48..a5ea86835fcb 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, return 0; } +static int queue_pm_only_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + + seq_printf(m, "%d\n", atomic_read(&q->pm_only)); + return 0; +} + #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(QUEUED), @@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), QUEUE_FLAG_NAME(QUIESCED), - QUEUE_FLAG_NAME(PREEMPT_ONLY), }; #undef QUEUE_FLAG_NAME @@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, + { "pm_only", 0600, queue_pm_only_show, NULL }, { "state", 0600, queue_state_show, queue_state_write }, { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eb97d2dd3651..62348412ed1b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -3046,11 +3046,14 @@ scsi_device_quiesce(struct scsi_device *sdev) */ WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); - blk_set_preempt_only(q); + if (sdev->quiesced_by == current) + return 0; + + blk_set_pm_only(q); blk_mq_freeze_queue(q); /* - * Ensure that the effect of blk_set_preempt_only() will be visible + * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue * unfreeze even if the queue was already frozen before this function * was called. See also https://lwn.net/Articles/573497/. @@ -3063,7 +3066,7 @@ scsi_device_quiesce(struct scsi_device *sdev) if (err == 0) sdev->quiesced_by = current; else - blk_clear_preempt_only(q); + blk_clear_pm_only(q); mutex_unlock(&sdev->state_mutex); return err; @@ -3088,7 +3091,7 @@ void scsi_device_resume(struct scsi_device *sdev) mutex_lock(&sdev->state_mutex); WARN_ON_ONCE(!sdev->quiesced_by); sdev->quiesced_by = NULL; - blk_clear_preempt_only(sdev->request_queue); + blk_clear_pm_only(sdev->request_queue); if (sdev->sdev_state == SDEV_QUIESCE) scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd863511dedb..13bb54f26736 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -504,6 +504,12 @@ struct request_queue { * various queue flags, see QUEUE_* below */ unsigned long queue_flags; + /* + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM and RQF_PREEMPT requests are + * processed. + */ + atomic_t pm_only; /* * ida allocated id for this queue. Used to index queues from @@ -698,7 +704,6 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ -#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) -#define blk_queue_preempt_only(q) \ - test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) +#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) -extern int blk_set_preempt_only(struct request_queue *q); -extern void blk_clear_preempt_only(struct request_queue *q); +extern void blk_set_pm_only(struct request_queue *q); +extern void blk_clear_pm_only(struct request_queue *q); static inline int queue_in_flight(struct request_queue *q) { -- cgit From 154b00d566e221152514ba8259f38b21571081ef Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:05 -0700 Subject: block: Split blk_pm_add_request() and blk_pm_put_request() Move the pm_request_resume() and pm_runtime_mark_last_busy() calls into two new functions and thereby separate legacy block layer code from code that works for both the legacy block layer and blk-mq. A later patch will add calls to the new functions in the blk-mq code. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Martin K. Petersen Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + block/blk-pm.h | 36 +++++++++++++++++++++++++++++++----- block/elevator.c | 1 + 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 1a691f5269bb..fd91e9bf2893 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1744,6 +1744,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) blk_req_zone_write_unlock(req); blk_pm_put_request(req); + blk_pm_mark_last_busy(req); elv_completed_request(q, req); diff --git a/block/blk-pm.h b/block/blk-pm.h index 1ffc8ef203ec..a8564ea72a41 100644 --- a/block/blk-pm.h +++ b/block/blk-pm.h @@ -6,8 +6,23 @@ #include #ifdef CONFIG_PM +static inline void blk_pm_request_resume(struct request_queue *q) +{ + if (q->dev && (q->rpm_status == RPM_SUSPENDED || + q->rpm_status == RPM_SUSPENDING)) + pm_request_resume(q->dev); +} + +static inline void blk_pm_mark_last_busy(struct request *rq) +{ + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + pm_runtime_mark_last_busy(rq->q->dev); +} + static inline void blk_pm_requeue_request(struct request *rq) { + lockdep_assert_held(rq->q->queue_lock); + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) rq->q->nr_pending--; } @@ -15,17 +30,28 @@ static inline void blk_pm_requeue_request(struct request *rq) static inline void blk_pm_add_request(struct request_queue *q, struct request *rq) { - if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && - (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); + lockdep_assert_held(q->queue_lock); + + if (q->dev && !(rq->rq_flags & RQF_PM)) + q->nr_pending++; } static inline void blk_pm_put_request(struct request *rq) { - if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) - pm_runtime_mark_last_busy(rq->q->dev); + lockdep_assert_held(rq->q->queue_lock); + + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) + --rq->q->nr_pending; } #else +static inline void blk_pm_request_resume(struct request_queue *q) +{ +} + +static inline void blk_pm_mark_last_busy(struct request *rq) +{ +} + static inline void blk_pm_requeue_request(struct request *rq) { } diff --git a/block/elevator.c b/block/elevator.c index e18ac68626e3..1c992bf6cfb1 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -601,6 +601,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) trace_block_rq_insert(q, rq); blk_pm_add_request(q, rq); + blk_pm_request_resume(q); rq->q = q; -- cgit From 0d25bd072b494a0290a7855a2e0286c4a0c92041 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:06 -0700 Subject: block: Schedule runtime resume earlier Instead of scheduling runtime resume of a request queue after a request has been queued, schedule asynchronous resume during request allocation. The new pm_request_resume() calls occur after blk_queue_enter() has increased the q_usage_counter request queue member. This change is needed for a later patch that will make request allocation block while the queue status is not RPM_ACTIVE. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- block/elevator.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index fd91e9bf2893..fec135ae52cf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -956,7 +956,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (atomic_read(&q->mq_freeze_depth) == 0 && - (pm || !blk_queue_pm_only(q))) || + (pm || (blk_pm_request_resume(q), + !blk_queue_pm_only(q)))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; diff --git a/block/elevator.c b/block/elevator.c index 1c992bf6cfb1..e18ac68626e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -601,7 +601,6 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) trace_block_rq_insert(q, rq); blk_pm_add_request(q, rq); - blk_pm_request_resume(q); rq->q = q; -- cgit From 18c9a6bbe0645a05172a900740b9d2d379d54320 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:07 -0700 Subject: percpu-refcount: Introduce percpu_ref_resurrect() This function will be used in a later patch to switch the struct request_queue q_usage_counter from killed back to live. In contrast to percpu_ref_reinit(), this new function does not require that the refcount is zero. Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Reviewed-by: Ming Lei Cc: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/percpu-refcount.h | 1 + lib/percpu-refcount.c | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 009cdf3d65b6..b297cd1cd4f1 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -108,6 +108,7 @@ void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); +void percpu_ref_resurrect(struct percpu_ref *ref); void percpu_ref_reinit(struct percpu_ref *ref); /** diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9f96fa7bc000..de10b8c0bff6 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -356,11 +356,35 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); */ void percpu_ref_reinit(struct percpu_ref *ref) { + WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + + percpu_ref_resurrect(ref); +} +EXPORT_SYMBOL_GPL(percpu_ref_reinit); + +/** + * percpu_ref_resurrect - modify a percpu refcount from dead to live + * @ref: perpcu_ref to resurrect + * + * Modify @ref so that it's in the same state as before percpu_ref_kill() was + * called. @ref must be dead but must not yet have exited. + * + * If @ref->release() frees @ref then the caller is responsible for + * guaranteeing that @ref->release() does not get called while this + * function is in progress. + * + * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while + * this function is in progress. + */ +void percpu_ref_resurrect(struct percpu_ref *ref) +{ + unsigned long __percpu *percpu_count; unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); - WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); + WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; percpu_ref_get(ref); @@ -368,4 +392,4 @@ void percpu_ref_reinit(struct percpu_ref *ref) spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); } -EXPORT_SYMBOL_GPL(percpu_ref_reinit); +EXPORT_SYMBOL_GPL(percpu_ref_resurrect); -- cgit From bdd6316094e0370cd183bc979dd7e322b68dc993 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:08 -0700 Subject: block: Allow unfreezing of a queue while requests are in progress A later patch will call blk_freeze_queue_start() followed by blk_mq_unfreeze_queue() without waiting for q_usage_counter to drop to zero. Make sure that this doesn't cause a kernel warning to appear by switching from percpu_ref_reinit() to percpu_ref_resurrect(). The former namely requires that the refcount it operates on is zero. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 85a1c1a59c72..96d501e8663c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -198,7 +198,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) freeze_depth = atomic_dec_return(&q->mq_freeze_depth); WARN_ON_ONCE(freeze_depth < 0); if (!freeze_depth) { - percpu_ref_reinit(&q->q_usage_counter); + percpu_ref_resurrect(&q->q_usage_counter); wake_up_all(&q->mq_freeze_wq); } } -- cgit From 7cedffec8e759480f7f7a9be9cd0d7ebf0aafff2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:09 -0700 Subject: block: Make blk_get_request() block for non-PM requests while suspended Instead of allowing requests that are not power management requests to enter the queue in runtime suspended status (RPM_SUSPENDED), make the blk_get_request() caller block. This change fixes a starvation issue: it is now guaranteed that power management requests will be executed no matter how many blk_get_request() callers are waiting. For blk-mq, instead of maintaining the q->nr_pending counter, rely on q->q_usage_counter. Call pm_runtime_mark_last_busy() every time a request finishes instead of only if the queue depth drops to zero. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/blk-core.c | 37 ++++++++----------------------------- block/blk-pm.c | 44 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index fec135ae52cf..16dd3a989753 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2746,30 +2746,6 @@ void blk_account_io_done(struct request *req, u64 now) } } -#ifdef CONFIG_PM -/* - * Don't process normal requests when queue is suspended - * or in the process of suspending/resuming - */ -static bool blk_pm_allow_request(struct request *rq) -{ - switch (rq->q->rpm_status) { - case RPM_RESUMING: - case RPM_SUSPENDING: - return rq->rq_flags & RQF_PM; - case RPM_SUSPENDED: - return false; - default: - return true; - } -} -#else -static bool blk_pm_allow_request(struct request *rq) -{ - return true; -} -#endif - void blk_account_io_start(struct request *rq, bool new_io) { struct hd_struct *part; @@ -2815,11 +2791,14 @@ static struct request *elv_next_request(struct request_queue *q) while (1) { list_for_each_entry(rq, &q->queue_head, queuelist) { - if (blk_pm_allow_request(rq)) - return rq; - - if (rq->rq_flags & RQF_SOFTBARRIER) - break; +#ifdef CONFIG_PM + /* + * If a request gets queued in state RPM_SUSPENDED + * then that's a kernel bug. + */ + WARN_ON_ONCE(q->rpm_status == RPM_SUSPENDED); +#endif + return rq; } /* diff --git a/block/blk-pm.c b/block/blk-pm.c index 9b636960d285..972fbc656846 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include +#include "blk-mq.h" +#include "blk-mq-tag.h" /** * blk_pm_runtime_init - Block layer runtime PM initialization routine @@ -68,14 +71,40 @@ int blk_pre_runtime_suspend(struct request_queue *q) if (!q->dev) return ret; + WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE); + + /* + * Increase the pm_only counter before checking whether any + * non-PM blk_queue_enter() calls are in progress to avoid that any + * new non-PM blk_queue_enter() calls succeed before the pm_only + * counter is decreased again. + */ + blk_set_pm_only(q); + ret = -EBUSY; + /* Switch q_usage_counter from per-cpu to atomic mode. */ + blk_freeze_queue_start(q); + /* + * Wait until atomic mode has been reached. Since that + * involves calling call_rcu(), it is guaranteed that later + * blk_queue_enter() calls see the pm-only state. See also + * http://lwn.net/Articles/573497/. + */ + percpu_ref_switch_to_atomic_sync(&q->q_usage_counter); + if (percpu_ref_is_zero(&q->q_usage_counter)) + ret = 0; + /* Switch q_usage_counter back to per-cpu mode. */ + blk_mq_unfreeze_queue(q); + spin_lock_irq(q->queue_lock); - if (q->nr_pending) { - ret = -EBUSY; + if (ret < 0) pm_runtime_mark_last_busy(q->dev); - } else { + else q->rpm_status = RPM_SUSPENDING; - } spin_unlock_irq(q->queue_lock); + + if (ret) + blk_clear_pm_only(q); + return ret; } EXPORT_SYMBOL(blk_pre_runtime_suspend); @@ -106,6 +135,9 @@ void blk_post_runtime_suspend(struct request_queue *q, int err) pm_runtime_mark_last_busy(q->dev); } spin_unlock_irq(q->queue_lock); + + if (err) + blk_clear_pm_only(q); } EXPORT_SYMBOL(blk_post_runtime_suspend); @@ -153,13 +185,15 @@ void blk_post_runtime_resume(struct request_queue *q, int err) spin_lock_irq(q->queue_lock); if (!err) { q->rpm_status = RPM_ACTIVE; - __blk_run_queue(q); pm_runtime_mark_last_busy(q->dev); pm_request_autosuspend(q->dev); } else { q->rpm_status = RPM_SUSPENDED; } spin_unlock_irq(q->queue_lock); + + if (!err) + blk_clear_pm_only(q); } EXPORT_SYMBOL(blk_post_runtime_resume); -- cgit From 986d413b7c156e69198dfc80fb74aa18d0ddef44 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Sep 2018 14:01:10 -0700 Subject: blk-mq: Enable support for runtime power management Now that the blk-mq core processes power management requests (marked with RQF_PREEMPT) in other states than RPM_ACTIVE, enable runtime power management for blk-mq. Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Cc: Jianchao Wang Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Alan Stern Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ block/blk-pm.c | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 96d501e8663c..d384ab700afd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -33,6 +33,7 @@ #include "blk-mq.h" #include "blk-mq-debugfs.h" #include "blk-mq-tag.h" +#include "blk-pm.h" #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" @@ -475,6 +476,7 @@ static void __blk_mq_free_request(struct request *rq) struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); const int sched_tag = rq->internal_tag; + blk_pm_mark_last_busy(rq); if (rq->tag != -1) blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) diff --git a/block/blk-pm.c b/block/blk-pm.c index 972fbc656846..f8fdae01bea2 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -30,12 +30,6 @@ */ void blk_pm_runtime_init(struct request_queue *q, struct device *dev) { - /* Don't enable runtime PM for blk-mq until it is ready */ - if (q->mq_ops) { - pm_runtime_disable(dev); - return; - } - q->dev = dev; q->rpm_status = RPM_ACTIVE; pm_runtime_set_autosuspend_delay(q->dev, -1); -- cgit From ed88660a5372faa67c168c3db5201e33e488c9fd Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Sep 2018 15:55:51 -0700 Subject: block: move call of scheduler's ->completed_request() hook Commit 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()") consolidated some calls using ktime_get() so we'd only need to call it once. Kyber's ->completed_request() hook also calls ktime_get(), so let's move it to the same place, too. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-sched.h | 4 ++-- block/blk-mq.c | 5 +++-- block/kyber-iosched.c | 5 ++--- include/linux/elevator.h | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 4e028ee42430..8a9544203173 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -49,12 +49,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, return true; } -static inline void blk_mq_sched_completed_request(struct request *rq) +static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) { struct elevator_queue *e = rq->q->elevator; if (e && e->type->ops.mq.completed_request) - e->type->ops.mq.completed_request(rq); + e->type->ops.mq.completed_request(rq, now); } static inline void blk_mq_sched_started_request(struct request *rq) diff --git a/block/blk-mq.c b/block/blk-mq.c index d384ab700afd..1e72d53e8f2d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -528,6 +528,9 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) blk_stat_add(rq, now); } + if (rq->internal_tag != -1) + blk_mq_sched_completed_request(rq, now); + blk_account_io_done(rq, now); if (rq->end_io) { @@ -564,8 +567,6 @@ static void __blk_mq_complete_request(struct request *rq) if (!blk_mq_mark_complete(rq)) return; - if (rq->internal_tag != -1) - blk_mq_sched_completed_request(rq); if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { rq->q->softirq_done_fn(rq); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index a1660bafc912..95d062c07c61 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -558,12 +558,12 @@ static void kyber_finish_request(struct request *rq) rq_clear_domain_token(kqd, rq); } -static void kyber_completed_request(struct request *rq) +static void kyber_completed_request(struct request *rq, u64 now) { struct request_queue *q = rq->q; struct kyber_queue_data *kqd = q->elevator->elevator_data; unsigned int sched_domain; - u64 now, latency, target; + u64 latency, target; /* * Check if this request met our latency goal. If not, quickly gather @@ -585,7 +585,6 @@ static void kyber_completed_request(struct request *rq) if (blk_stat_is_active(kqd->cb)) return; - now = ktime_get_ns(); if (now < rq->io_start_time_ns) return; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index a02deea30185..015bb59c0331 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -111,7 +111,7 @@ struct elevator_mq_ops { void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); - void (*completed_request)(struct request *); + void (*completed_request)(struct request *, u64); void (*started_request)(struct request *); void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); -- cgit From f8232f29ca268b0ba9e98638c9ed71e337e7f0a4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Sep 2018 15:55:52 -0700 Subject: block: export blk_stat_enable_accounting() Kyber will need this in a future change if it is built as a module. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-stat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-stat.c b/block/blk-stat.c index 7587b1c3caaf..90561af85a62 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -190,6 +190,7 @@ void blk_stat_enable_accounting(struct request_queue *q) blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock(&q->stats->lock); } +EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); struct blk_queue_stats *blk_alloc_queue_stats(void) { -- cgit From fa2a1f609e6491383ab63ff6329e0aaa2db2b9f7 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Sep 2018 15:55:53 -0700 Subject: kyber: don't make domain token sbitmap larger than necessary The domain token sbitmaps are currently initialized to the device queue depth or 256, whichever is larger, and immediately resized to the maximum depth for that domain (256, 128, or 64 for read, write, and other, respectively). The sbitmap is never resized larger than that, so it's unnecessary to allocate a bitmap larger than the maximum depth. Let's just allocate it to the maximum depth to begin with. This will use marginally less memory, and more importantly, give us a more appropriate number of bits per sbitmap word. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 95d062c07c61..08eb5295c18d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -40,8 +40,6 @@ enum { }; enum { - KYBER_MIN_DEPTH = 256, - /* * In order to prevent starvation of synchronous requests by a flood of * asynchronous requests, we reserve 25% of requests for synchronous @@ -305,7 +303,6 @@ static int kyber_bucket_fn(const struct request *rq) static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) { struct kyber_queue_data *kqd; - unsigned int max_tokens; unsigned int shift; int ret = -ENOMEM; int i; @@ -320,25 +317,17 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) if (!kqd->cb) goto err_kqd; - /* - * The maximum number of tokens for any scheduling domain is at least - * the queue depth of a single hardware queue. If the hardware doesn't - * have many tags, still provide a reasonable number. - */ - max_tokens = max_t(unsigned int, q->tag_set->queue_depth, - KYBER_MIN_DEPTH); for (i = 0; i < KYBER_NUM_DOMAINS; i++) { WARN_ON(!kyber_depth[i]); WARN_ON(!kyber_batch_size[i]); ret = sbitmap_queue_init_node(&kqd->domain_tokens[i], - max_tokens, -1, false, GFP_KERNEL, - q->node); + kyber_depth[i], -1, false, + GFP_KERNEL, q->node); if (ret) { while (--i >= 0) sbitmap_queue_free(&kqd->domain_tokens[i]); goto err_cb; } - sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]); } shift = kyber_sched_tags_shift(kqd); -- cgit From 6e25cb01ea206362616a2be469d4f3635f58ca63 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Sep 2018 15:55:54 -0700 Subject: kyber: implement improved heuristics Kyber's current heuristics have a few flaws: - It's based on the mean latency, but p99 latency tends to be more meaningful to anyone who cares about latency. The mean can also be skewed by rare outliers that the scheduler can't do anything about. - The statistics calculations are purely time-based with a short window. This works for steady, high load, but is more sensitive to outliers with bursty workloads. - It only considers the latency once an I/O has been submitted to the device, but the user cares about the time spent in the kernel, as well. These are shortcomings of the generic blk-stat code which doesn't quite fit the ideal use case for Kyber. So, this replaces the statistics with a histogram used to calculate percentiles of total latency and I/O latency, which we then use to adjust depths in a slightly more intelligent manner: - Sync and async writes are now the same domain. - Discards are a separate domain. - Domain queue depths are scaled by the ratio of the p99 total latency to the target latency (e.g., if the p99 latency is double the target latency, we will double the queue depth; if the p99 latency is half of the target latency, we can halve the queue depth). - We use the I/O latency to determine whether we should scale queue depths down: we will only scale down if any domain's I/O latency exceeds the target latency, which is an indicator of congestion in the device. These new heuristics are just as scalable as the heuristics they replace. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 497 ++++++++++++++++++++++++++++---------------------- 1 file changed, 279 insertions(+), 218 deletions(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 08eb5295c18d..adc8e6393829 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -29,13 +29,16 @@ #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" -#include "blk-stat.h" -/* Scheduling domains. */ +/* + * Scheduling domains: the device is divided into multiple domains based on the + * request type. + */ enum { KYBER_READ, - KYBER_SYNC_WRITE, - KYBER_OTHER, /* Async writes, discard, etc. */ + KYBER_WRITE, + KYBER_DISCARD, + KYBER_OTHER, KYBER_NUM_DOMAINS, }; @@ -49,25 +52,82 @@ enum { }; /* - * Initial device-wide depths for each scheduling domain. + * Maximum device-wide depth for each scheduling domain. * - * Even for fast devices with lots of tags like NVMe, you can saturate - * the device with only a fraction of the maximum possible queue depth. - * So, we cap these to a reasonable value. + * Even for fast devices with lots of tags like NVMe, you can saturate the + * device with only a fraction of the maximum possible queue depth. So, we cap + * these to a reasonable value. */ static const unsigned int kyber_depth[] = { [KYBER_READ] = 256, - [KYBER_SYNC_WRITE] = 128, - [KYBER_OTHER] = 64, + [KYBER_WRITE] = 128, + [KYBER_DISCARD] = 64, + [KYBER_OTHER] = 16, }; /* - * Scheduling domain batch sizes. We favor reads. + * Default latency targets for each scheduling domain. + */ +static const u64 kyber_latency_targets[] = { + [KYBER_READ] = 2 * NSEC_PER_MSEC, + [KYBER_WRITE] = 10 * NSEC_PER_MSEC, + [KYBER_DISCARD] = 5 * NSEC_PER_SEC, +}; + +/* + * Batch size (number of requests we'll dispatch in a row) for each scheduling + * domain. */ static const unsigned int kyber_batch_size[] = { [KYBER_READ] = 16, - [KYBER_SYNC_WRITE] = 8, - [KYBER_OTHER] = 8, + [KYBER_WRITE] = 8, + [KYBER_DISCARD] = 1, + [KYBER_OTHER] = 1, +}; + +/* + * Requests latencies are recorded in a histogram with buckets defined relative + * to the target latency: + * + * <= 1/4 * target latency + * <= 1/2 * target latency + * <= 3/4 * target latency + * <= target latency + * <= 1 1/4 * target latency + * <= 1 1/2 * target latency + * <= 1 3/4 * target latency + * > 1 3/4 * target latency + */ +enum { + /* + * The width of the latency histogram buckets is + * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency. + */ + KYBER_LATENCY_SHIFT = 2, + /* + * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency, + * thus, "good". + */ + KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT, + /* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */ + KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT, +}; + +/* + * We measure both the total latency and the I/O latency (i.e., latency after + * submitting to the device). + */ +enum { + KYBER_TOTAL_LATENCY, + KYBER_IO_LATENCY, +}; + +/* + * Per-cpu latency histograms: total latency and I/O latency for each scheduling + * domain except for KYBER_OTHER. + */ +struct kyber_cpu_latency { + atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; }; /* @@ -84,14 +144,9 @@ struct kyber_ctx_queue { } ____cacheline_aligned_in_smp; struct kyber_queue_data { - struct request_queue *q; - - struct blk_stat_callback *cb; - /* - * The device is divided into multiple scheduling domains based on the - * request type. Each domain has a fixed number of in-flight requests of - * that type device-wide, limited by these tokens. + * Each scheduling domain has a limited number of in-flight requests + * device-wide, limited by these tokens. */ struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; @@ -101,8 +156,19 @@ struct kyber_queue_data { */ unsigned int async_depth; + struct kyber_cpu_latency __percpu *cpu_latency; + + /* Timer for stats aggregation and adjusting domain tokens. */ + struct timer_list timer; + + unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS]; + + unsigned long latency_timeout[KYBER_OTHER]; + + int domain_p99[KYBER_OTHER]; + /* Target latencies in nanoseconds. */ - u64 read_lat_nsec, write_lat_nsec; + u64 latency_targets[KYBER_OTHER]; }; struct kyber_hctx_data { @@ -122,182 +188,165 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, static unsigned int kyber_sched_domain(unsigned int op) { - if ((op & REQ_OP_MASK) == REQ_OP_READ) + switch (op & REQ_OP_MASK) { + case REQ_OP_READ: return KYBER_READ; - else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op)) - return KYBER_SYNC_WRITE; - else + case REQ_OP_WRITE: + return KYBER_WRITE; + case REQ_OP_DISCARD: + return KYBER_DISCARD; + default: return KYBER_OTHER; + } } -enum { - NONE = 0, - GOOD = 1, - GREAT = 2, - BAD = -1, - AWFUL = -2, -}; - -#define IS_GOOD(status) ((status) > 0) -#define IS_BAD(status) ((status) < 0) - -static int kyber_lat_status(struct blk_stat_callback *cb, - unsigned int sched_domain, u64 target) +static void flush_latency_buckets(struct kyber_queue_data *kqd, + struct kyber_cpu_latency *cpu_latency, + unsigned int sched_domain, unsigned int type) { - u64 latency; - - if (!cb->stat[sched_domain].nr_samples) - return NONE; + unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; + atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type]; + unsigned int bucket; - latency = cb->stat[sched_domain].mean; - if (latency >= 2 * target) - return AWFUL; - else if (latency > target) - return BAD; - else if (latency <= target / 2) - return GREAT; - else /* (latency <= target) */ - return GOOD; + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) + buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0); } /* - * Adjust the read or synchronous write depth given the status of reads and - * writes. The goal is that the latencies of the two domains are fair (i.e., if - * one is good, then the other is good). + * Calculate the histogram bucket with the given percentile rank, or -1 if there + * aren't enough samples yet. */ -static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd, - unsigned int sched_domain, int this_status, - int other_status) +static int calculate_percentile(struct kyber_queue_data *kqd, + unsigned int sched_domain, unsigned int type, + unsigned int percentile) { - unsigned int orig_depth, depth; + unsigned int *buckets = kqd->latency_buckets[sched_domain][type]; + unsigned int bucket, samples = 0, percentile_samples; + + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++) + samples += buckets[bucket]; + + if (!samples) + return -1; /* - * If this domain had no samples, or reads and writes are both good or - * both bad, don't adjust the depth. + * We do the calculation once we have 500 samples or one second passes + * since the first sample was recorded, whichever comes first. */ - if (this_status == NONE || - (IS_GOOD(this_status) && IS_GOOD(other_status)) || - (IS_BAD(this_status) && IS_BAD(other_status))) - return; - - orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth; + if (!kqd->latency_timeout[sched_domain]) + kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL); + if (samples < 500 && + time_is_after_jiffies(kqd->latency_timeout[sched_domain])) { + return -1; + } + kqd->latency_timeout[sched_domain] = 0; - if (other_status == NONE) { - depth++; - } else { - switch (this_status) { - case GOOD: - if (other_status == AWFUL) - depth -= max(depth / 4, 1U); - else - depth -= max(depth / 8, 1U); - break; - case GREAT: - if (other_status == AWFUL) - depth /= 2; - else - depth -= max(depth / 4, 1U); + percentile_samples = DIV_ROUND_UP(samples * percentile, 100); + for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) { + if (buckets[bucket] >= percentile_samples) break; - case BAD: - depth++; - break; - case AWFUL: - if (other_status == GREAT) - depth += 2; - else - depth++; - break; - } + percentile_samples -= buckets[bucket]; } + memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); + return bucket; +} + +static void kyber_resize_domain(struct kyber_queue_data *kqd, + unsigned int sched_domain, unsigned int depth) +{ depth = clamp(depth, 1U, kyber_depth[sched_domain]); - if (depth != orig_depth) + if (depth != kqd->domain_tokens[sched_domain].sb.depth) sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); } -/* - * Adjust the depth of other requests given the status of reads and synchronous - * writes. As long as either domain is doing fine, we don't throttle, but if - * both domains are doing badly, we throttle heavily. - */ -static void kyber_adjust_other_depth(struct kyber_queue_data *kqd, - int read_status, int write_status, - bool have_samples) -{ - unsigned int orig_depth, depth; - int status; - - orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth; - - if (read_status == NONE && write_status == NONE) { - depth += 2; - } else if (have_samples) { - if (read_status == NONE) - status = write_status; - else if (write_status == NONE) - status = read_status; - else - status = max(read_status, write_status); - switch (status) { - case GREAT: - depth += 2; - break; - case GOOD: - depth++; - break; - case BAD: - depth -= max(depth / 4, 1U); - break; - case AWFUL: - depth /= 2; - break; +static void kyber_timer_fn(struct timer_list *t) +{ + struct kyber_queue_data *kqd = from_timer(kqd, t, timer); + unsigned int sched_domain; + int cpu; + bool bad = false; + + /* Sum all of the per-cpu latency histograms. */ + for_each_online_cpu(cpu) { + struct kyber_cpu_latency *cpu_latency; + + cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu); + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + flush_latency_buckets(kqd, cpu_latency, sched_domain, + KYBER_TOTAL_LATENCY); + flush_latency_buckets(kqd, cpu_latency, sched_domain, + KYBER_IO_LATENCY); } } - depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]); - if (depth != orig_depth) - sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth); -} - -/* - * Apply heuristics for limiting queue depths based on gathered latency - * statistics. - */ -static void kyber_stat_timer_fn(struct blk_stat_callback *cb) -{ - struct kyber_queue_data *kqd = cb->data; - int read_status, write_status; - - read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec); - write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec); + /* + * Check if any domains have a high I/O latency, which might indicate + * congestion in the device. Note that we use the p90; we don't want to + * be too sensitive to outliers here. + */ + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + int p90; - kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status); - kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status); - kyber_adjust_other_depth(kqd, read_status, write_status, - cb->stat[KYBER_OTHER].nr_samples != 0); + p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY, + 90); + if (p90 >= KYBER_GOOD_BUCKETS) + bad = true; + } /* - * Continue monitoring latencies if we aren't hitting the targets or - * we're still throttling other requests. + * Adjust the scheduling domain depths. If we determined that there was + * congestion, we throttle all domains with good latencies. Either way, + * we ease up on throttling domains with bad latencies. */ - if (!blk_stat_is_active(kqd->cb) && - ((IS_BAD(read_status) || IS_BAD(write_status) || - kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER]))) - blk_stat_activate_msecs(kqd->cb, 100); + for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) { + unsigned int orig_depth, depth; + int p99; + + p99 = calculate_percentile(kqd, sched_domain, + KYBER_TOTAL_LATENCY, 99); + /* + * This is kind of subtle: different domains will not + * necessarily have enough samples to calculate the latency + * percentiles during the same window, so we have to remember + * the p99 for the next time we observe congestion; once we do, + * we don't want to throttle again until we get more data, so we + * reset it to -1. + */ + if (bad) { + if (p99 < 0) + p99 = kqd->domain_p99[sched_domain]; + kqd->domain_p99[sched_domain] = -1; + } else if (p99 >= 0) { + kqd->domain_p99[sched_domain] = p99; + } + if (p99 < 0) + continue; + + /* + * If this domain has bad latency, throttle less. Otherwise, + * throttle more iff we determined that there is congestion. + * + * The new depth is scaled linearly with the p99 latency vs the + * latency target. E.g., if the p99 is 3/4 of the target, then + * we throttle down to 3/4 of the current depth, and if the p99 + * is 2x the target, then we double the depth. + */ + if (bad || p99 >= KYBER_GOOD_BUCKETS) { + orig_depth = kqd->domain_tokens[sched_domain].sb.depth; + depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT; + kyber_resize_domain(kqd, sched_domain, depth); + } + } } -static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd) +static unsigned int kyber_sched_tags_shift(struct request_queue *q) { /* * All of the hardware queues have the same depth, so we can just grab * the shift of the first one. */ - return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; -} - -static int kyber_bucket_fn(const struct request *rq) -{ - return kyber_sched_domain(rq->cmd_flags); + return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; } static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) @@ -307,16 +356,17 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) int ret = -ENOMEM; int i; - kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); + kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); if (!kqd) goto err; - kqd->q = q; - kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn, - KYBER_NUM_DOMAINS, kqd); - if (!kqd->cb) + kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, + GFP_KERNEL | __GFP_ZERO); + if (!kqd->cpu_latency) goto err_kqd; + timer_setup(&kqd->timer, kyber_timer_fn, 0); + for (i = 0; i < KYBER_NUM_DOMAINS; i++) { WARN_ON(!kyber_depth[i]); WARN_ON(!kyber_batch_size[i]); @@ -326,20 +376,22 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) if (ret) { while (--i >= 0) sbitmap_queue_free(&kqd->domain_tokens[i]); - goto err_cb; + goto err_buckets; } } - shift = kyber_sched_tags_shift(kqd); - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; + for (i = 0; i < KYBER_OTHER; i++) { + kqd->domain_p99[i] = -1; + kqd->latency_targets[i] = kyber_latency_targets[i]; + } - kqd->read_lat_nsec = 2000000ULL; - kqd->write_lat_nsec = 10000000ULL; + shift = kyber_sched_tags_shift(q); + kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; return kqd; -err_cb: - blk_stat_free_callback(kqd->cb); +err_buckets: + free_percpu(kqd->cpu_latency); err_kqd: kfree(kqd); err: @@ -361,25 +413,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) return PTR_ERR(kqd); } + blk_stat_enable_accounting(q); + eq->elevator_data = kqd; q->elevator = eq; - blk_stat_add_callback(q, kqd->cb); - return 0; } static void kyber_exit_sched(struct elevator_queue *e) { struct kyber_queue_data *kqd = e->elevator_data; - struct request_queue *q = kqd->q; int i; - blk_stat_remove_callback(q, kqd->cb); + del_timer_sync(&kqd->timer); for (i = 0; i < KYBER_NUM_DOMAINS; i++) sbitmap_queue_free(&kqd->domain_tokens[i]); - blk_stat_free_callback(kqd->cb); + free_percpu(kqd->cpu_latency); kfree(kqd); } @@ -547,40 +598,44 @@ static void kyber_finish_request(struct request *rq) rq_clear_domain_token(kqd, rq); } -static void kyber_completed_request(struct request *rq, u64 now) +static void add_latency_sample(struct kyber_cpu_latency *cpu_latency, + unsigned int sched_domain, unsigned int type, + u64 target, u64 latency) { - struct request_queue *q = rq->q; - struct kyber_queue_data *kqd = q->elevator->elevator_data; - unsigned int sched_domain; - u64 latency, target; + unsigned int bucket; + u64 divisor; - /* - * Check if this request met our latency goal. If not, quickly gather - * some statistics and start throttling. - */ - sched_domain = kyber_sched_domain(rq->cmd_flags); - switch (sched_domain) { - case KYBER_READ: - target = kqd->read_lat_nsec; - break; - case KYBER_SYNC_WRITE: - target = kqd->write_lat_nsec; - break; - default: - return; + if (latency > 0) { + divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1); + bucket = min_t(unsigned int, div64_u64(latency - 1, divisor), + KYBER_LATENCY_BUCKETS - 1); + } else { + bucket = 0; } - /* If we are already monitoring latencies, don't check again. */ - if (blk_stat_is_active(kqd->cb)) - return; + atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]); +} - if (now < rq->io_start_time_ns) +static void kyber_completed_request(struct request *rq, u64 now) +{ + struct kyber_queue_data *kqd = rq->q->elevator->elevator_data; + struct kyber_cpu_latency *cpu_latency; + unsigned int sched_domain; + u64 target; + + sched_domain = kyber_sched_domain(rq->cmd_flags); + if (sched_domain == KYBER_OTHER) return; - latency = now - rq->io_start_time_ns; + cpu_latency = get_cpu_ptr(kqd->cpu_latency); + target = kqd->latency_targets[sched_domain]; + add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY, + target, now - rq->start_time_ns); + add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target, + now - rq->io_start_time_ns); + put_cpu_ptr(kqd->cpu_latency); - if (latency > target) - blk_stat_activate_msecs(kqd->cb, 10); + timer_reduce(&kqd->timer, jiffies + HZ / 10); } struct flush_kcq_data { @@ -778,17 +833,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx) return false; } -#define KYBER_LAT_SHOW_STORE(op) \ -static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \ - char *page) \ +#define KYBER_LAT_SHOW_STORE(domain, name) \ +static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \ + char *page) \ { \ struct kyber_queue_data *kqd = e->elevator_data; \ \ - return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \ + return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \ } \ \ -static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ - const char *page, size_t count) \ +static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \ + const char *page, size_t count) \ { \ struct kyber_queue_data *kqd = e->elevator_data; \ unsigned long long nsec; \ @@ -798,12 +853,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ if (ret) \ return ret; \ \ - kqd->op##_lat_nsec = nsec; \ + kqd->latency_targets[domain] = nsec; \ \ return count; \ } -KYBER_LAT_SHOW_STORE(read); -KYBER_LAT_SHOW_STORE(write); +KYBER_LAT_SHOW_STORE(KYBER_READ, read); +KYBER_LAT_SHOW_STORE(KYBER_WRITE, write); #undef KYBER_LAT_SHOW_STORE #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store) @@ -870,7 +925,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ return 0; \ } KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) -KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) #undef KYBER_DEBUGFS_DOMAIN_ATTRS @@ -892,8 +948,11 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m) case KYBER_READ: seq_puts(m, "READ\n"); break; - case KYBER_SYNC_WRITE: - seq_puts(m, "SYNC_WRITE\n"); + case KYBER_WRITE: + seq_puts(m, "WRITE\n"); + break; + case KYBER_DISCARD: + seq_puts(m, "DISCARD\n"); break; case KYBER_OTHER: seq_puts(m, "OTHER\n"); @@ -918,7 +977,8 @@ static int kyber_batching_show(void *data, struct seq_file *m) {#name "_tokens", 0400, kyber_##name##_tokens_show} static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { KYBER_QUEUE_DOMAIN_ATTRS(read), - KYBER_QUEUE_DOMAIN_ATTRS(sync_write), + KYBER_QUEUE_DOMAIN_ATTRS(write), + KYBER_QUEUE_DOMAIN_ATTRS(discard), KYBER_QUEUE_DOMAIN_ATTRS(other), {"async_depth", 0400, kyber_async_depth_show}, {}, @@ -930,7 +990,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { {#name "_waiting", 0400, kyber_##name##_waiting_show} static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { KYBER_HCTX_DOMAIN_ATTRS(read), - KYBER_HCTX_DOMAIN_ATTRS(sync_write), + KYBER_HCTX_DOMAIN_ATTRS(write), + KYBER_HCTX_DOMAIN_ATTRS(discard), KYBER_HCTX_DOMAIN_ATTRS(other), {"cur_domain", 0400, kyber_cur_domain_show}, {"batching", 0400, kyber_batching_show}, -- cgit From 6c3b7af1c975b87b86dcb2af233d1ae21eb05107 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 27 Sep 2018 15:55:55 -0700 Subject: kyber: add tracepoints When debugging Kyber, it's really useful to know what latencies we've been having, how the domain depths have been adjusted, and if we've actually been throttling. Add three tracepoints, kyber_latency, kyber_adjust, and kyber_throttled, to record that. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 52 +++++++++++++++--------- include/trace/events/kyber.h | 96 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 include/trace/events/kyber.h diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index adc8e6393829..2b62e362fb36 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -30,6 +30,9 @@ #include "blk-mq-sched.h" #include "blk-mq-tag.h" +#define CREATE_TRACE_POINTS +#include + /* * Scheduling domains: the device is divided into multiple domains based on the * request type. @@ -42,6 +45,13 @@ enum { KYBER_NUM_DOMAINS, }; +static const char *kyber_domain_names[] = { + [KYBER_READ] = "READ", + [KYBER_WRITE] = "WRITE", + [KYBER_DISCARD] = "DISCARD", + [KYBER_OTHER] = "OTHER", +}; + enum { /* * In order to prevent starvation of synchronous requests by a flood of @@ -122,6 +132,11 @@ enum { KYBER_IO_LATENCY, }; +static const char *kyber_latency_type_names[] = { + [KYBER_TOTAL_LATENCY] = "total", + [KYBER_IO_LATENCY] = "I/O", +}; + /* * Per-cpu latency histograms: total latency and I/O latency for each scheduling * domain except for KYBER_OTHER. @@ -144,6 +159,8 @@ struct kyber_ctx_queue { } ____cacheline_aligned_in_smp; struct kyber_queue_data { + struct request_queue *q; + /* * Each scheduling domain has a limited number of in-flight requests * device-wide, limited by these tokens. @@ -249,6 +266,10 @@ static int calculate_percentile(struct kyber_queue_data *kqd, } memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type])); + trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain], + kyber_latency_type_names[type], percentile, + bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples); + return bucket; } @@ -256,8 +277,11 @@ static void kyber_resize_domain(struct kyber_queue_data *kqd, unsigned int sched_domain, unsigned int depth) { depth = clamp(depth, 1U, kyber_depth[sched_domain]); - if (depth != kqd->domain_tokens[sched_domain].sb.depth) + if (depth != kqd->domain_tokens[sched_domain].sb.depth) { sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); + trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain], + depth); + } } static void kyber_timer_fn(struct timer_list *t) @@ -360,6 +384,8 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) if (!kqd) goto err; + kqd->q = q; + kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency, GFP_KERNEL | __GFP_ZERO); if (!kqd->cpu_latency) @@ -756,6 +782,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, rq_set_domain_token(rq, nr); list_del_init(&rq->queuelist); return rq; + } else { + trace_kyber_throttled(kqd->q, + kyber_domain_names[khd->cur_domain]); } } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) { nr = kyber_get_domain_token(kqd, khd, hctx); @@ -766,6 +795,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd, rq_set_domain_token(rq, nr); list_del_init(&rq->queuelist); return rq; + } else { + trace_kyber_throttled(kqd->q, + kyber_domain_names[khd->cur_domain]); } } @@ -944,23 +976,7 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m) struct blk_mq_hw_ctx *hctx = data; struct kyber_hctx_data *khd = hctx->sched_data; - switch (khd->cur_domain) { - case KYBER_READ: - seq_puts(m, "READ\n"); - break; - case KYBER_WRITE: - seq_puts(m, "WRITE\n"); - break; - case KYBER_DISCARD: - seq_puts(m, "DISCARD\n"); - break; - case KYBER_OTHER: - seq_puts(m, "OTHER\n"); - break; - default: - seq_printf(m, "%u\n", khd->cur_domain); - break; - } + seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]); return 0; } diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h new file mode 100644 index 000000000000..a9834c37ac40 --- /dev/null +++ b/include/trace/events/kyber.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kyber + +#if !defined(_TRACE_KYBER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KYBER_H + +#include +#include + +#define DOMAIN_LEN 16 +#define LATENCY_TYPE_LEN 8 + +TRACE_EVENT(kyber_latency, + + TP_PROTO(struct request_queue *q, const char *domain, const char *type, + unsigned int percentile, unsigned int numerator, + unsigned int denominator, unsigned int samples), + + TP_ARGS(q, domain, type, percentile, numerator, denominator, samples), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __array( char, domain, DOMAIN_LEN ) + __array( char, type, LATENCY_TYPE_LEN ) + __field( u8, percentile ) + __field( u8, numerator ) + __field( u8, denominator ) + __field( unsigned int, samples ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + strlcpy(__entry->domain, domain, DOMAIN_LEN); + strlcpy(__entry->type, type, DOMAIN_LEN); + __entry->percentile = percentile; + __entry->numerator = numerator; + __entry->denominator = denominator; + __entry->samples = samples; + ), + + TP_printk("%d,%d %s %s p%u %u/%u samples=%u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->domain, + __entry->type, __entry->percentile, __entry->numerator, + __entry->denominator, __entry->samples) +); + +TRACE_EVENT(kyber_adjust, + + TP_PROTO(struct request_queue *q, const char *domain, + unsigned int depth), + + TP_ARGS(q, domain, depth), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __array( char, domain, DOMAIN_LEN ) + __field( unsigned int, depth ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + strlcpy(__entry->domain, domain, DOMAIN_LEN); + __entry->depth = depth; + ), + + TP_printk("%d,%d %s %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->domain, + __entry->depth) +); + +TRACE_EVENT(kyber_throttled, + + TP_PROTO(struct request_queue *q, const char *domain), + + TP_ARGS(q, domain), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __array( char, domain, DOMAIN_LEN ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); + strlcpy(__entry->domain, domain, DOMAIN_LEN); + ), + + TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->domain) +); + +#define _TRACE_KYBER_H +#endif /* _TRACE_KYBER_H */ + +/* This part must be outside protection */ +#include -- cgit From fef912bf860e8e7e48a2bfb978a356bba743a8b7 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:19 +0200 Subject: block: genhd: add 'groups' argument to device_add_disk Update device_add_disk() to take an 'groups' argument so that individual drivers can register a device with additional sysfs attributes. This avoids race condition the driver would otherwise have if these groups were to be created with sysfs_add_groups(). Signed-off-by: Martin Wilck Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 2 +- block/genhd.c | 19 ++++++++++++++----- drivers/block/floppy.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/ps3vram.c | 2 +- drivers/block/rsxx/dev.c | 2 +- drivers/block/skd_main.c | 2 +- drivers/block/sunvdc.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/ide/ide-cd.c | 2 +- drivers/ide/ide-gd.c | 2 +- drivers/memstick/core/ms_block.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/core/block.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/nvdimm/blk.c | 2 +- drivers/nvdimm/btt.c | 2 +- drivers/nvdimm/pmem.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/s390/block/dcssblk.c | 2 +- drivers/s390/block/scm_blk.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- include/linux/genhd.h | 5 +++-- 28 files changed, 43 insertions(+), 33 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 83c470364dfb..6ee4c56032f7 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -891,7 +891,7 @@ static int ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(parent, disk); + device_add_disk(parent, disk, NULL); *disk_out = disk; return 0; diff --git a/block/genhd.c b/block/genhd.c index 8cc719a37b32..ef0936184d69 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -567,7 +567,8 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct device *parent, struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -582,6 +583,10 @@ static void register_disk(struct device *parent, struct gendisk *disk) /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); + if (groups) { + WARN_ON(ddev->groups); + ddev->groups = groups; + } if (device_add(ddev)) return; if (!sysfs_deprecated) { @@ -647,6 +652,7 @@ exit: * __device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups * @register_queue: register the queue if set to true * * This function registers the partitioning information in @disk @@ -655,6 +661,7 @@ exit: * FIXME: error handling */ static void __device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, bool register_queue) { dev_t devt; @@ -698,7 +705,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); } - register_disk(parent, disk); + register_disk(parent, disk, groups); if (register_queue) blk_register_queue(disk); @@ -712,15 +719,17 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, blk_integrity_add(disk); } -void device_add_disk(struct device *parent, struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) + { - __device_add_disk(parent, disk, true); + __device_add_disk(parent, disk, groups, true); } EXPORT_SYMBOL(device_add_disk); void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) { - __device_add_disk(parent, disk, false); + __device_add_disk(parent, disk, NULL, false); } EXPORT_SYMBOL(device_add_disk_no_queue_reg); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 48f622728ce6..1bc99e9dfaee 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4676,7 +4676,7 @@ static int __init do_floppy_init(void) /* to be cleaned up... */ disks[drive]->private_data = (void *)(long)drive; disks[drive]->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&floppy_device[drive].dev, disks[drive]); + device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); } return 0; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d0666f5ce003..1d7d48d8a205 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3861,7 +3861,7 @@ skip_create_disk: set_capacity(dd->disk, capacity); /* Enable the block device and add it to /dev */ - device_add_disk(&dd->pdev->dev, dd->disk); + device_add_disk(&dd->pdev->dev, dd->disk, NULL); dd->bdev = bdget_disk(dd->disk, 0); /* diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index afe1508d82c6..29a4419e8ba3 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -500,7 +500,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) gendisk->disk_name, priv->model, priv->raw_capacity >> 11, get_capacity(gendisk) >> 11); - device_add_disk(&dev->sbd.core, gendisk); + device_add_disk(&dev->sbd.core, gendisk, NULL); return 0; fail_cleanup_queue: diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 1e3d5de9d838..c0c50816a10b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -769,7 +769,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n", gendisk->disk_name, get_capacity(gendisk) >> 11); - device_add_disk(&dev->core, gendisk); + device_add_disk(&dev->core, gendisk, NULL); return 0; fail_cleanup_queue: diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 1a92f9e65937..3894aa0f350b 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -226,7 +226,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card) set_capacity(card->gendisk, card->size8 >> 9); else set_capacity(card->gendisk, 0); - device_add_disk(CARD_TO_DEV(card), card->gendisk); + device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); card->bdev_attached = 1; } diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 87b9e7fbf062..a85c9a622c41 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -3104,7 +3104,7 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo) static int skd_bdev_attach(struct device *parent, struct skd_device *skdev) { dev_dbg(&skdev->pdev->dev, "add_disk\n"); - device_add_disk(parent, skdev->disk); + device_add_disk(parent, skdev->disk, NULL); return 0; } diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5ca56bfae63c..09409edce384 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -850,7 +850,7 @@ static int probe_disk(struct vdc_port *port) port->vdisk_size, (port->vdisk_size >> (20 - 9)), port->vio.ver.major, port->vio.ver.minor); - device_add_disk(&port->vio.vdev->dev, g); + device_add_disk(&port->vio.vdev->dev, g, NULL); return 0; } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 23752dc99b00..fe80560000a1 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -780,7 +780,7 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - device_add_disk(&vdev->dev, vblk->disk); + device_add_disk(&vdev->dev, vblk->disk, NULL); err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); if (err) goto out_del_disk; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a71d817e900d..e5e40272d233 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2420,7 +2420,7 @@ static void blkfront_connect(struct blkfront_info *info) for (i = 0; i < info->nr_rings; i++) kick_pending_request_queues(&info->rinfo[i]); - device_add_disk(&info->xbdev->dev, info->gd); + device_add_disk(&info->xbdev->dev, info->gd, NULL); info->is_ready = 1; return; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 44a7a255ef74..f9b59d41813f 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1784,7 +1784,7 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - device_add_disk(&drive->gendev, g); + device_add_disk(&drive->gendev, g, NULL); return 0; out_free_disk: diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index e823394ed543..04e008e8f6f9 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -416,7 +416,7 @@ static int ide_gd_probe(ide_drive_t *drive) if (drive->dev_flags & IDE_DFLAG_REMOVABLE) g->flags = GENHD_FL_REMOVABLE; g->fops = &ide_gd_ops; - device_add_disk(&drive->gendev, g); + device_add_disk(&drive->gendev, g, NULL); return 0; out_free_disk: diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 716fc8ed31d3..8a02f11076f9 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2146,7 +2146,7 @@ static int msb_init_disk(struct memstick_dev *card) set_disk_ro(msb->disk, 1); msb_start(card); - device_add_disk(&card->dev, msb->disk); + device_add_disk(&card->dev, msb->disk, NULL); dbg("Disk added"); return 0; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 5ee932631fae..0cd30dcb6801 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1236,7 +1236,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) set_capacity(msb->disk, capacity); dev_dbg(&card->dev, "capacity set %ld\n", capacity); - device_add_disk(&card->dev, msb->disk); + device_add_disk(&card->dev, msb->disk, NULL); msb->active = 1; return 0; diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index a0b9102c4c6e..de8e1a8be690 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2698,7 +2698,7 @@ static int mmc_add_disk(struct mmc_blk_data *md) int ret; struct mmc_card *card = md->queue.card; - device_add_disk(md->parent, md->disk); + device_add_disk(md->parent, md->disk, NULL); md->force_ro.show = force_ro_show; md->force_ro.store = force_ro_store; sysfs_attr_init(&md->force_ro.attr); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 29c0bfd74e8a..6a41dfa3c36b 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -447,7 +447,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) if (new->readonly) set_disk_ro(gd, 1); - device_add_disk(&new->mtd->dev, gd); + device_add_disk(&new->mtd->dev, gd, NULL); if (new->disk_attributes) { ret = sysfs_create_group(&disk_to_dev(gd)->kobj, diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 62e9cb167aad..db45c6bbb7bb 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -290,7 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) } set_capacity(disk, available_disk_size >> SECTOR_SHIFT); - device_add_disk(dev, disk); + device_add_disk(dev, disk, NULL); revalidate_disk(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 0360c015f658..b123b0dcf274 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1556,7 +1556,7 @@ static int btt_blk_init(struct btt *btt) } } set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); - device_add_disk(&btt->nd_btt->dev, btt->btt_disk); + device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; revalidate_disk(btt->btt_disk); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 6071e2942053..a75d10c23d80 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -474,7 +474,7 @@ static int pmem_attach_disk(struct device *dev, gendev = disk_to_dev(disk); gendev->groups = pmem_attribute_groups; - device_add_disk(dev, disk); + device_add_disk(dev, disk, NULL); if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dd8ec1dd9219..0e824e8c8fd7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3099,7 +3099,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk); + device_add_disk(ctrl->device, ns->disk, NULL); if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group)) pr_warn("%s: failed to create sysfs group for identification\n", diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5a9562881d4e..477af51d01e8 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -283,7 +283,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) return; if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk); + device_add_disk(&head->subsys->dev, head->disk, NULL); if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, &nvme_ns_id_attr_group)) dev_warn(&head->subsys->dev, diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 7036a6c6f86f..5542d9eadfe0 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -76,7 +76,7 @@ int dasd_gendisk_alloc(struct dasd_block *block) gdp->queue = block->request_queue; block->gdp = gdp; set_capacity(block->gdp, 0); - device_add_disk(&base->cdev->dev, block->gdp); + device_add_disk(&base->cdev->dev, block->gdp, NULL); return 0; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 23e526cda5c1..4e8aedd50cb0 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -685,7 +685,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char } get_device(&dev_info->dev); - device_add_disk(&dev_info->dev, dev_info->gd); + device_add_disk(&dev_info->dev, dev_info->gd, NULL); switch (dev_info->segment_type) { case SEG_TYPE_SR: diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 98f66b7b6794..e01889394c84 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -500,7 +500,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) /* 512 byte sectors */ set_capacity(bdev->gendisk, scmdev->size >> 9); - device_add_disk(&scmdev->dev, bdev->gendisk); + device_add_disk(&scmdev->dev, bdev->gendisk, NULL); return 0; out_queue: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 64514e8359e4..67ed5906b462 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3272,7 +3272,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) } blk_pm_runtime_init(sdp->request_queue, dev); - device_add_disk(dev, gd); + device_add_disk(dev, gd, NULL); if (sdkp->capacity) sd_dif_config_host(sdkp); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 4f07b3410595..54dd70ae9731 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -759,7 +759,7 @@ static int sr_probe(struct device *dev) dev_set_drvdata(dev, cd); disk->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&sdev->sdev_gendev, disk); + device_add_disk(&sdev->sdev_gendev, disk, NULL); sdev_printk(KERN_DEBUG, sdev, "Attached scsi CD-ROM %s\n", cd->cdi.name); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 57864422a2c8..0b820ff05839 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -399,10 +399,11 @@ static inline void free_part_info(struct hd_struct *part) extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); /* block/genhd.c */ -extern void device_add_disk(struct device *parent, struct gendisk *disk); +extern void device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); static inline void add_disk(struct gendisk *disk) { - device_add_disk(NULL, disk); + device_add_disk(NULL, disk, NULL); } extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk); static inline void add_disk_no_queue_reg(struct gendisk *disk) -- cgit From 33b14f67a4e1eabd219fd6543da8f15ed86b641c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:20 +0200 Subject: nvme: register ns_id attributes as default sysfs groups We should be registering the ns_id attribute as default sysfs attribute groups, otherwise we have a race condition between the uevent and the attributes appearing in sysfs. Suggested-by: Bart van Assche Reviewed-by: Keith Busch Signed-off-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 21 ++++----- drivers/nvme/host/lightnvm.c | 105 ++++++++++++++++++------------------------ drivers/nvme/host/multipath.c | 15 ++---- drivers/nvme/host/nvme.h | 10 +--- 4 files changed, 59 insertions(+), 92 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0e824e8c8fd7..e0a9e1c5b30e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2734,6 +2734,14 @@ const struct attribute_group nvme_ns_id_attr_group = { .is_visible = nvme_ns_id_attrs_are_visible, }; +const struct attribute_group *nvme_ns_id_attr_groups[] = { + &nvme_ns_id_attr_group, +#ifdef CONFIG_NVM + &nvme_nvm_attr_group, +#endif + NULL, +}; + #define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -3099,14 +3107,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk, NULL); - if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group)) - pr_warn("%s: failed to create sysfs group for identification\n", - ns->disk->disk_name); - if (ns->ndev && nvme_nvm_register_sysfs(ns)) - pr_warn("%s: failed to register lightnvm sysfs group for identification\n", - ns->disk->disk_name); + device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(ns); @@ -3132,10 +3133,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group); - if (ns->ndev) - nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); if (blk_get_integrity(ns->disk)) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 6fe5923c95d4..1e4f97538838 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -1190,10 +1190,29 @@ static NVM_DEV_ATTR_12_RO(multiplane_modes); static NVM_DEV_ATTR_12_RO(media_capabilities); static NVM_DEV_ATTR_12_RO(max_phys_secs); -static struct attribute *nvm_dev_attrs_12[] = { +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs[] = { + /* version agnostic attrs */ &dev_attr_version.attr, &dev_attr_capabilities.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + /* 1.2 attrs */ &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, @@ -1208,8 +1227,6 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_page_size.attr, &dev_attr_hw_sector_size.attr, &dev_attr_oob_sector_size.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_prog_typ.attr, &dev_attr_prog_max.attr, &dev_attr_erase_typ.attr, @@ -1218,33 +1235,7 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, - NULL, -}; - -static const struct attribute_group nvm_dev_attr_group_12 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_12, -}; - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs_20[] = { - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - + /* 2.0 attrs */ &dev_attr_groups.attr, &dev_attr_punits.attr, &dev_attr_chunks.attr, @@ -1255,8 +1246,6 @@ static struct attribute *nvm_dev_attrs_20[] = { &dev_attr_maxocpu.attr, &dev_attr_mw_cunits.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_write_typ.attr, &dev_attr_write_max.attr, &dev_attr_reset_typ.attr, @@ -1265,44 +1254,38 @@ static struct attribute *nvm_dev_attrs_20[] = { NULL, }; -static const struct attribute_group nvm_dev_attr_group_20 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_20, -}; - -int nvme_nvm_register_sysfs(struct nvme_ns *ns) +static umode_t nvm_dev_attrs_visible(struct kobject *kobj, + struct attribute *attr, int index) { + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns *ns = disk->private_data; struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + struct device_attribute *dev_attr = + container_of(attr, typeof(*dev_attr), attr); if (!ndev) - return -EINVAL; - - switch (geo->major_ver_id) { - case 1: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); - case 2: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); - } - - return -EINVAL; -} + return 0; -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) -{ - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + if (dev_attr->show == nvm_dev_attr_show) + return attr->mode; - switch (geo->major_ver_id) { + switch (ndev->geo.major_ver_id) { case 1: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); + if (dev_attr->show == nvm_dev_attr_show_12) + return attr->mode; break; case 2: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); + if (dev_attr->show == nvm_dev_attr_show_20) + return attr->mode; break; } + + return 0; } + +const struct attribute_group nvme_nvm_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, + .is_visible = nvm_dev_attrs_visible, +}; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 477af51d01e8..8e846095c42d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -282,13 +282,9 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) if (!head->disk) return; - if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk, NULL); - if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group)) - dev_warn(&head->subsys->dev, - "failed to create id group.\n"); - } + if (!(head->disk->flags & GENHD_FL_UP)) + device_add_disk(&head->subsys->dev, head->disk, + nvme_ns_id_attr_groups); kblockd_schedule_work(&ns->head->requeue_work); } @@ -494,11 +490,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group); + if (head->disk->flags & GENHD_FL_UP) del_gendisk(head->disk); - } blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bb4a2003c097..2503f8fd54da 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -459,7 +459,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset); -extern const struct attribute_group nvme_ns_id_attr_group; +extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH @@ -551,8 +551,7 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); -int nvme_nvm_register_sysfs(struct nvme_ns *ns); -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +extern const struct attribute_group nvme_nvm_attr_group; int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; @@ -563,11 +562,6 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, } static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) -{ - return 0; -} -static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) { -- cgit From 95cf7809bf9169fec4e4b7bb24b8069d8f354f96 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:21 +0200 Subject: aoe: register default groups with device_add_disk() Register default sysfs groups during device_add_disk() to avoid a race condition with udev during startup. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Acked-by: Ed L. Cachin Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/aoe/aoe.h | 1 - drivers/block/aoe/aoeblk.c | 21 +++++++-------------- drivers/block/aoe/aoedev.c | 1 - 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index c0ebda1283cc..015c68017a1c 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -201,7 +201,6 @@ int aoeblk_init(void); void aoeblk_exit(void); void aoeblk_gdalloc(void *); void aoedisk_rm_debugfs(struct aoedev *d); -void aoedisk_rm_sysfs(struct aoedev *d); int aoechr_init(void); void aoechr_exit(void); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 429ebb84b592..ff770e7d9e52 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -177,10 +177,15 @@ static struct attribute *aoe_attrs[] = { NULL, }; -static const struct attribute_group attr_group = { +static const struct attribute_group aoe_attr_group = { .attrs = aoe_attrs, }; +static const struct attribute_group *aoe_attr_groups[] = { + &aoe_attr_group, + NULL, +}; + static const struct file_operations aoe_debugfs_fops = { .open = aoe_debugfs_open, .read = seq_read, @@ -219,17 +224,6 @@ aoedisk_rm_debugfs(struct aoedev *d) d->debugfs = NULL; } -static int -aoedisk_add_sysfs(struct aoedev *d) -{ - return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); -} -void -aoedisk_rm_sysfs(struct aoedev *d) -{ - sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); -} - static int aoeblk_open(struct block_device *bdev, fmode_t mode) { @@ -417,8 +411,7 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); - add_disk(gd); - aoedisk_add_sysfs(d); + device_add_disk(NULL, gd, aoe_attr_groups); aoedisk_add_debugfs(d); spin_lock_irqsave(&d->lock, flags); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 41060e9cedf2..f29a140cdbc1 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -275,7 +275,6 @@ freedev(struct aoedev *d) del_timer_sync(&d->timer); if (d->gd) { aoedisk_rm_debugfs(d); - aoedisk_rm_sysfs(d); del_gendisk(d->gd); put_disk(d->gd); blk_cleanup_queue(d->blkq); -- cgit From 98af4d4df889dcea3bc0ce6b8a04759658ba8826 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:22 +0200 Subject: zram: register default groups with device_add_disk() Register default sysfs groups during device_add_disk() to avoid a race condition with udev during startup. Signed-off-by: Hannes Reinecke Cc: Minchan Kim Cc: Nitin Gupta Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a1d6b5597c17..4879595200e1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1636,6 +1636,11 @@ static const struct attribute_group zram_disk_attr_group = { .attrs = zram_disk_attrs, }; +static const struct attribute_group *zram_disk_attr_groups[] = { + &zram_disk_attr_group, + NULL, +}; + /* * Allocate and initialize new zram device. the function returns * '>= 0' device_id upon success, and negative value otherwise. @@ -1716,24 +1721,14 @@ static int zram_add(void) zram->disk->queue->backing_dev_info->capabilities |= (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); - add_disk(zram->disk); - - ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - if (ret < 0) { - pr_err("Error creating sysfs group for device %d\n", - device_id); - goto out_free_disk; - } + device_add_disk(NULL, zram->disk, zram_disk_attr_groups); + strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; -out_free_disk: - del_gendisk(zram->disk); - put_disk(zram->disk); out_free_queue: blk_cleanup_queue(queue); out_free_idr: @@ -1762,15 +1757,6 @@ static int zram_remove(struct zram *zram) mutex_unlock(&bdev->bd_mutex); zram_debugfs_unregister(zram); - /* - * Remove sysfs first, so no one will perform a disksize - * store while we destroy the devices. This also helps during - * hot_remove -- zram_reset_device() is the last holder of - * ->init_lock, no later/concurrent disksize_store() or any - * other sysfs handlers are possible. - */ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); -- cgit From e982c4d0a29b1d61fbe7716a8dcf8984936d6730 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:23 +0200 Subject: virtio-blk: modernize sysfs attribute creation Use new-style DEVICE_ATTR_RO/DEVICE_ATTR_RW to create the sysfs attributes and register the disk with default sysfs attribute groups. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Acked-by: Michael S. Tsirkin Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 68 ++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index fe80560000a1..086c6bb12baa 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -351,8 +351,8 @@ static int minor_to_index(int minor) return minor >> PART_BITS; } -static ssize_t virtblk_serial_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); int err; @@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev, return err; } -static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL); +static DEVICE_ATTR_RO(serial); /* The queue's logical block size must be set before calling this */ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) @@ -545,8 +545,8 @@ static const char *const virtblk_cache_types[] = { }; static ssize_t -virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +cache_type_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; @@ -564,8 +564,7 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, } static ssize_t -virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, - char *buf) +cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; @@ -575,12 +574,38 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); } -static const struct device_attribute dev_attr_cache_type_ro = - __ATTR(cache_type, 0444, - virtblk_cache_type_show, NULL); -static const struct device_attribute dev_attr_cache_type_rw = - __ATTR(cache_type, 0644, - virtblk_cache_type_show, virtblk_cache_type_store); +static DEVICE_ATTR_RW(cache_type); + +static struct attribute *virtblk_attrs[] = { + &dev_attr_serial.attr, + &dev_attr_cache_type.attr, + NULL, +}; + +static umode_t virtblk_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct virtio_blk *vblk = disk->private_data; + struct virtio_device *vdev = vblk->vdev; + + if (a == &dev_attr_cache_type.attr && + !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) + return S_IRUGO; + + return a->mode; +} + +static const struct attribute_group virtblk_attr_group = { + .attrs = virtblk_attrs, + .is_visible = virtblk_attrs_are_visible, +}; + +static const struct attribute_group *virtblk_attr_groups[] = { + &virtblk_attr_group, + NULL, +}; static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) @@ -780,24 +805,9 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - device_add_disk(&vdev->dev, vblk->disk, NULL); - err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); - if (err) - goto out_del_disk; - - if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) - err = device_create_file(disk_to_dev(vblk->disk), - &dev_attr_cache_type_rw); - else - err = device_create_file(disk_to_dev(vblk->disk), - &dev_attr_cache_type_ro); - if (err) - goto out_del_disk; + device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; -out_del_disk: - del_gendisk(vblk->disk); - blk_cleanup_queue(vblk->disk->queue); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: -- cgit From f0a0cdddb14c7a32e7ca68f45fbc44aa347e959d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 28 Sep 2018 09:22:50 -0700 Subject: kyber: fix integer overflow of latency targets on 32-bit NSEC_PER_SEC has type long, so 5 * NSEC_PER_SEC is calculated as a long. However, 5 seconds is 5,000,000,000 nanoseconds, which overflows a 32-bit long. Make sure all of the targets are calculated as 64-bit values. Fixes: 6e25cb01ea20 ("kyber: implement improved heuristics") Reported-by: Stephen Rothwell Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 2b62e362fb36..eccac01a10b6 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -79,9 +79,9 @@ static const unsigned int kyber_depth[] = { * Default latency targets for each scheduling domain. */ static const u64 kyber_latency_targets[] = { - [KYBER_READ] = 2 * NSEC_PER_MSEC, - [KYBER_WRITE] = 10 * NSEC_PER_MSEC, - [KYBER_DISCARD] = 5 * NSEC_PER_SEC, + [KYBER_READ] = 2ULL * NSEC_PER_MSEC, + [KYBER_WRITE] = 10ULL * NSEC_PER_MSEC, + [KYBER_DISCARD] = 5ULL * NSEC_PER_SEC, }; /* -- cgit From ff4cee0898756f07f8b50b30ea00efc773de24e6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 13:45:39 -0400 Subject: blk-iolatency: use q->nr_requests directly We were using blk_queue_depth() assuming that it would return nr_requests, but we hit a case in production on drives that had to have NCQ turned off in order for them to not shit the bed which resulted in a qd of 1, even though the nr_requests was much larger. iolatency really only cares about requests we are allowed to queue up, as any io that get's onto the request list is going to be serviced soonish, so we want to be throttling before the bio gets onto the request list. To make iolatency work as expected, simply use q->nr_requests instead of blk_queue_depth() as that is what we actually care about. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 27c14f8d2576..c2e38bc12f27 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -255,7 +255,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, struct child_latency_info *lat_info, bool up) { - unsigned long qd = blk_queue_depth(blkiolat->rqos.q); + unsigned long qd = blkiolat->rqos.q->nr_requests; unsigned long scale = scale_amount(qd, up); unsigned long old = atomic_read(&lat_info->scale_cookie); unsigned long max_scale = qd << 1; @@ -295,7 +295,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat, */ static void scale_change(struct iolatency_grp *iolat, bool up) { - unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q); + unsigned long qd = iolat->blkiolat->rqos.q->nr_requests; unsigned long scale = scale_amount(qd, up); unsigned long old = iolat->rq_depth.max_depth; @@ -857,7 +857,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) rq_wait_init(&iolat->rq_wait); spin_lock_init(&iolat->child_lat.lock); - iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q); + iolat->rq_depth.queue_depth = blkg->q->nr_requests; iolat->rq_depth.max_depth = UINT_MAX; iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth; iolat->blkiolat = blkiolat; -- cgit From 9f60511a021e677c43b3e2d1a890e3d0d372e394 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 13:45:40 -0400 Subject: blk-iolatency: deal with nr_requests == 1 Hitting the case where blk_queue_depth() returned 1 uncovered the fact that iolatency doesn't actually handle this case properly, it simply doesn't scale down anybody. For this case we should go straight into applying the time delay, which we weren't doing. Since we already limit the floor at 1 request this if statement is not needed, and this allows us to set our depth to 1 which allows us to apply the delay if needed. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index c2e38bc12f27..8daea7a4fe49 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -312,7 +312,7 @@ static void scale_change(struct iolatency_grp *iolat, bool up) iolat->rq_depth.max_depth = old; wake_up_all(&iolat->rq_wait.wait); } - } else if (old > 1) { + } else { old >>= 1; iolat->rq_depth.max_depth = max(old, 1UL); } -- cgit From 22ed8a93adc7a9cbb2c0a0fc1d7f10068a1f84c1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 13:45:41 -0400 Subject: blk-iolatency: deal with small samples There is logic to keep cgroups that haven't done a lot of IO in the most recent scale window from being punished for over-active higher priority groups. However for things like ssd's where the windows are pretty short we'll end up with small numbers of samples, so 5% of samples will come out to 0 if there aren't enough. Make the floor 1 sample to keep us from improperly bailing out of scaling down. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 8daea7a4fe49..e7be77b0ce8b 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -366,7 +366,7 @@ static void check_scale_change(struct iolatency_grp *iolat) * scale down event. */ samples_thresh = lat_info->nr_samples * 5; - samples_thresh = div64_u64(samples_thresh, 100); + samples_thresh = max(1ULL, div64_u64(samples_thresh, 100)); if (iolat->nr_samples <= samples_thresh) return; } -- cgit From 1fa2840e56f9032e14a75fcf67edfe0f21102e4b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 13:45:42 -0400 Subject: blk-iolatency: use a percentile approache for ssd's We use an average latency approach for determining if we're missing our latency target. This works well for rotational storage where we have generally consistent latencies, but for ssd's and other low latency devices you have more of a spikey behavior, which means we often won't throttle misbehaving groups because a lot of IO completes at drastically faster times than our latency target. Instead keep track of how many IO's miss our target and how many IO's are done in our time window. If the p(90) latency is above our target then we know we need to throttle. With this change in place we are seeing the same throttling behavior with our testcase on ssd's as we see with rotational drives. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 179 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 145 insertions(+), 34 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index e7be77b0ce8b..fd246805b0be 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -115,9 +115,21 @@ struct child_latency_info { atomic_t scale_cookie; }; +struct percentile_stats { + u64 total; + u64 missed; +}; + +struct latency_stat { + union { + struct percentile_stats ps; + struct blk_rq_stat rqs; + }; +}; + struct iolatency_grp { struct blkg_policy_data pd; - struct blk_rq_stat __percpu *stats; + struct latency_stat __percpu *stats; struct blk_iolatency *blkiolat; struct rq_depth rq_depth; struct rq_wait rq_wait; @@ -132,6 +144,7 @@ struct iolatency_grp { /* Our current number of IO's for the last summation. */ u64 nr_samples; + bool ssd; struct child_latency_info child_lat; }; @@ -172,6 +185,80 @@ static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat) return pd_to_blkg(&iolat->pd); } +static inline void latency_stat_init(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) { + stat->ps.total = 0; + stat->ps.missed = 0; + } else + blk_rq_stat_init(&stat->rqs); +} + +static inline void latency_stat_sum(struct iolatency_grp *iolat, + struct latency_stat *sum, + struct latency_stat *stat) +{ + if (iolat->ssd) { + sum->ps.total += stat->ps.total; + sum->ps.missed += stat->ps.missed; + } else + blk_rq_stat_sum(&sum->rqs, &stat->rqs); +} + +static inline void latency_stat_record_time(struct iolatency_grp *iolat, + u64 req_time) +{ + struct latency_stat *stat = get_cpu_ptr(iolat->stats); + if (iolat->ssd) { + if (req_time >= iolat->min_lat_nsec) + stat->ps.missed++; + stat->ps.total++; + } else + blk_rq_stat_add(&stat->rqs, req_time); + put_cpu_ptr(stat); +} + +static inline bool latency_sum_ok(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) { + u64 thresh = div64_u64(stat->ps.total, 10); + thresh = max(thresh, 1ULL); + return stat->ps.missed < thresh; + } + return stat->rqs.mean <= iolat->min_lat_nsec; +} + +static inline u64 latency_stat_samples(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + if (iolat->ssd) + return stat->ps.total; + return stat->rqs.nr_samples; +} + +static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat, + struct latency_stat *stat) +{ + int exp_idx; + + if (iolat->ssd) + return; + + /* + * CALC_LOAD takes in a number stored in fixed point representation. + * Because we are using this for IO time in ns, the values stored + * are significantly larger than the FIXED_1 denominator (2048). + * Therefore, rounding errors in the calculation are negligible and + * can be ignored. + */ + exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, + div64_u64(iolat->cur_win_nsec, + BLKIOLATENCY_EXP_BUCKET_SIZE)); + CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean); +} + static inline bool iolatency_may_queue(struct iolatency_grp *iolat, wait_queue_entry_t *wait, bool first_block) @@ -418,7 +505,6 @@ static void iolatency_record_time(struct iolatency_grp *iolat, struct bio_issue *issue, u64 now, bool issue_as_root) { - struct blk_rq_stat *rq_stat; u64 start = bio_issue_time(issue); u64 req_time; @@ -444,9 +530,7 @@ static void iolatency_record_time(struct iolatency_grp *iolat, return; } - rq_stat = get_cpu_ptr(iolat->stats); - blk_rq_stat_add(rq_stat, req_time); - put_cpu_ptr(rq_stat); + latency_stat_record_time(iolat, req_time); } #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC) @@ -457,17 +541,17 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) struct blkcg_gq *blkg = lat_to_blkg(iolat); struct iolatency_grp *parent; struct child_latency_info *lat_info; - struct blk_rq_stat stat; + struct latency_stat stat; unsigned long flags; - int cpu, exp_idx; + int cpu; - blk_rq_stat_init(&stat); + latency_stat_init(iolat, &stat); preempt_disable(); for_each_online_cpu(cpu) { - struct blk_rq_stat *s; + struct latency_stat *s; s = per_cpu_ptr(iolat->stats, cpu); - blk_rq_stat_sum(&stat, s); - blk_rq_stat_init(s); + latency_stat_sum(iolat, &stat, s); + latency_stat_init(iolat, s); } preempt_enable(); @@ -477,41 +561,33 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) lat_info = &parent->child_lat; - /* - * CALC_LOAD takes in a number stored in fixed point representation. - * Because we are using this for IO time in ns, the values stored - * are significantly larger than the FIXED_1 denominator (2048). - * Therefore, rounding errors in the calculation are negligible and - * can be ignored. - */ - exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, - div64_u64(iolat->cur_win_nsec, - BLKIOLATENCY_EXP_BUCKET_SIZE)); - CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean); + iolat_update_total_lat_avg(iolat, &stat); /* Everything is ok and we don't need to adjust the scale. */ - if (stat.mean <= iolat->min_lat_nsec && + if (latency_sum_ok(iolat, &stat) && atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE) return; /* Somebody beat us to the punch, just bail. */ spin_lock_irqsave(&lat_info->lock, flags); lat_info->nr_samples -= iolat->nr_samples; - lat_info->nr_samples += stat.nr_samples; - iolat->nr_samples = stat.nr_samples; + lat_info->nr_samples += latency_stat_samples(iolat, &stat); + iolat->nr_samples = latency_stat_samples(iolat, &stat); if ((lat_info->last_scale_event >= now || now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) && lat_info->scale_lat <= iolat->min_lat_nsec) goto out; - if (stat.mean <= iolat->min_lat_nsec && - stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) { + if (latency_sum_ok(iolat, &stat)) { + if (latency_stat_samples(iolat, &stat) < + BLKIOLATENCY_MIN_GOOD_SAMPLES) + goto out; if (lat_info->scale_grp == iolat) { lat_info->last_scale_event = now; scale_cookie_change(iolat->blkiolat, lat_info, true); } - } else if (stat.mean > iolat->min_lat_nsec) { + } else { lat_info->last_scale_event = now; if (!lat_info->scale_grp || lat_info->scale_lat > iolat->min_lat_nsec) { @@ -808,13 +884,43 @@ static int iolatency_print_limit(struct seq_file *sf, void *v) return 0; } +static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf, + size_t size) +{ + struct latency_stat stat; + int cpu; + + latency_stat_init(iolat, &stat); + preempt_disable(); + for_each_online_cpu(cpu) { + struct latency_stat *s; + s = per_cpu_ptr(iolat->stats, cpu); + latency_stat_sum(iolat, &stat, s); + } + preempt_enable(); + + if (iolat->rq_depth.max_depth == UINT_MAX) + return scnprintf(buf, size, " missed=%llu total=%llu depth=max", + (unsigned long long)stat.ps.missed, + (unsigned long long)stat.ps.total); + return scnprintf(buf, size, " missed=%llu total=%llu depth=%u", + (unsigned long long)stat.ps.missed, + (unsigned long long)stat.ps.total, + iolat->rq_depth.max_depth); +} + static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size) { struct iolatency_grp *iolat = pd_to_lat(pd); - unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC); - unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); + unsigned long long avg_lat; + unsigned long long cur_win; + + if (iolat->ssd) + return iolatency_ssd_stat(iolat, buf, size); + avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC); + cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); if (iolat->rq_depth.max_depth == UINT_MAX) return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu", avg_lat, cur_win); @@ -831,8 +937,8 @@ static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node) iolat = kzalloc_node(sizeof(*iolat), gfp, node); if (!iolat) return NULL; - iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat), - __alignof__(struct blk_rq_stat), gfp); + iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat), + __alignof__(struct latency_stat), gfp); if (!iolat->stats) { kfree(iolat); return NULL; @@ -849,10 +955,15 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) u64 now = ktime_to_ns(ktime_get()); int cpu; + if (blk_queue_nonrot(blkg->q)) + iolat->ssd = true; + else + iolat->ssd = false; + for_each_possible_cpu(cpu) { - struct blk_rq_stat *stat; + struct latency_stat *stat; stat = per_cpu_ptr(iolat->stats, cpu); - blk_rq_stat_init(stat); + latency_stat_init(iolat, stat); } rq_wait_init(&iolat->rq_wait); -- cgit From 451bb7c3319739997d2e5a2527aef62d1f9200ac Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Sep 2018 13:45:43 -0400 Subject: blk-iolatency: keep track of previous windows stats We apply a smoothing to the scale changes in order to keep sawtoothy behavior from occurring. However our window for checking if we've missed our target can sometimes be lower than the smoothing interval (500ms), especially on faster drives like ssd's. In order to deal with this keep track of the running tally of the previous intervals that we threw away because we had already done a scale event recently. This is needed for the ssd case as these low latency drives will have bursts of latency, and if it happens to be ok for the window that directly follows the opening of the scale window we could unthrottle when previous windows we were missing our target. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index fd246805b0be..35c48d7b8f78 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -130,6 +130,7 @@ struct latency_stat { struct iolatency_grp { struct blkg_policy_data pd; struct latency_stat __percpu *stats; + struct latency_stat cur_stat; struct blk_iolatency *blkiolat; struct rq_depth rq_depth; struct rq_wait rq_wait; @@ -570,24 +571,27 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) /* Somebody beat us to the punch, just bail. */ spin_lock_irqsave(&lat_info->lock, flags); + + latency_stat_sum(iolat, &iolat->cur_stat, &stat); lat_info->nr_samples -= iolat->nr_samples; - lat_info->nr_samples += latency_stat_samples(iolat, &stat); - iolat->nr_samples = latency_stat_samples(iolat, &stat); + lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat); + iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat); if ((lat_info->last_scale_event >= now || - now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) && - lat_info->scale_lat <= iolat->min_lat_nsec) + now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME)) goto out; - if (latency_sum_ok(iolat, &stat)) { - if (latency_stat_samples(iolat, &stat) < + if (latency_sum_ok(iolat, &iolat->cur_stat) && + latency_sum_ok(iolat, &stat)) { + if (latency_stat_samples(iolat, &iolat->cur_stat) < BLKIOLATENCY_MIN_GOOD_SAMPLES) goto out; if (lat_info->scale_grp == iolat) { lat_info->last_scale_event = now; scale_cookie_change(iolat->blkiolat, lat_info, true); } - } else { + } else if (lat_info->scale_lat == 0 || + lat_info->scale_lat >= iolat->min_lat_nsec) { lat_info->last_scale_event = now; if (!lat_info->scale_grp || lat_info->scale_lat > iolat->min_lat_nsec) { @@ -596,6 +600,7 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) } scale_cookie_change(iolat->blkiolat, lat_info, false); } + latency_stat_init(iolat, &iolat->cur_stat); out: spin_unlock_irqrestore(&lat_info->lock, flags); } @@ -966,6 +971,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) latency_stat_init(iolat, stat); } + latency_stat_init(iolat, &iolat->cur_stat); rq_wait_init(&iolat->rq_wait); spin_lock_init(&iolat->child_lat.lock); iolat->rq_depth.queue_depth = blkg->q->nr_requests; -- cgit From 53b3a66163ea6cc7a86e0a3a04b1166d96665824 Mon Sep 17 00:00:00 2001 From: "Milan P. Gandhi" Date: Thu, 9 Aug 2018 21:49:24 +0530 Subject: nvme: fix typo in nvme_identify_ns_descs Signed-off-by: Milan P. Gandhi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e0a9e1c5b30e..f0778d3dd2f8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); break; default: - /* Skip unnkown types */ + /* Skip unknown types */ len = cur->nidl; break; } -- cgit From d93cb3927ca5528bd21f2635a2300f9a1426ac46 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 10 Sep 2018 17:39:33 -0700 Subject: nvmet: remove redundant module prefix This patch removes the redundant module prefix used in the pr_err() when nvmet_get_smart_log_nsid() failed to find the namespace provided as a part of smart-log command. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2008fa62a373..7a45f4477679 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -58,7 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); if (!ns) { - pr_err("nvmet : Could not find namespace id : %d\n", + pr_err("Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); return NVME_SC_INVALID_NS; } -- cgit From d4e4230c8f5646a19a0e58765a30fb2bab5f1dcc Mon Sep 17 00:00:00 2001 From: "Milan P. Gandhi" Date: Fri, 10 Aug 2018 14:54:02 +0530 Subject: nvme-fc: fix for a minor typos Signed-off-by: Milan P. Gandhi Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- drivers/nvme/target/fc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 611e70cae754..fd700073d312 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1385,7 +1385,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) __nvme_fc_finish_ls_req(lsop); - /* fc-nvme iniator doesn't care about success or failure of cmd */ + /* fc-nvme initiator doesn't care about success or failure of cmd */ kfree(lsop); } @@ -3159,7 +3159,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 29b4b236afd8..a3905673e17f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2468,7 +2468,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], -- cgit From ea96d6496ff59b2b26dc9e13dc8f57d77731eb37 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 9 Aug 2018 16:48:14 -0700 Subject: nvmet_fc: support target port removal with nvmet layer Currently, if a targetport has been connected to via the nvmet config (in other words, the add_port() transport routine called, and the nvmet port pointer stored for using in upcalls on new io), and if the targetport is then removed (say the lldd driver decides to unload or fully reset its hardware) and then re-added (the lldd driver reloads or reinits its hardware), the port pointer has been lost so there's no way to continue to post commands up to nvmet via the transport port. Correct by allocating a small "port context" structure that will be linked to by the targetport. The context will save the targetport WWN's and the nvmet port pointer to use for it. Initial allocation will occur when the targetport is bound to via add_port. The context will be deallocated when remove_port() is called. If a targetport is removed while nvmet has the active port context, the targetport will be unlinked from the port context before removal. If a new targetport is registered, the port contexts without a binding are looked through and if the WWN's match (so it's the same as nvmet's port context) the port context is linked to the new target port. Thus new io can be received on the new targetport and operation resumes with nvmet. Additionally, this also resolves nvmet configuration changing out from underneath of the nvme-fc target port (for example: a nvmetcli clear). Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 128 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 120 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a3905673e17f..ef286b72d958 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -110,11 +110,19 @@ struct nvmet_fc_tgtport { struct list_head ls_busylist; struct list_head assoc_list; struct ida assoc_cnt; - struct nvmet_port *port; + struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; }; +struct nvmet_fc_port_entry { + struct nvmet_fc_tgtport *tgtport; + struct nvmet_port *port; + u64 node_name; + u64 port_name; + struct list_head pe_list; +}; + struct nvmet_fc_defer_fcp_req { struct list_head req_list; struct nvmefc_tgt_fcp_req *fcp_req; @@ -132,7 +140,6 @@ struct nvmet_fc_tgt_queue { atomic_t zrspcnt; atomic_t rsn; spinlock_t qlock; - struct nvmet_port *port; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvmet_fc_tgt_assoc *assoc; @@ -221,6 +228,7 @@ static DEFINE_SPINLOCK(nvmet_fc_tgtlock); static LIST_HEAD(nvmet_fc_target_list); static DEFINE_IDA(nvmet_fc_tgtport_cnt); +static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); @@ -645,7 +653,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); @@ -957,6 +964,83 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, return ret; } +static void +nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_port_entry *pe, + struct nvmet_port *port) +{ + lockdep_assert_held(&nvmet_fc_tgtlock); + + pe->tgtport = tgtport; + tgtport->pe = pe; + + pe->port = port; + port->priv = pe; + + pe->node_name = tgtport->fc_target_port.node_name; + pe->port_name = tgtport->fc_target_port.port_name; + INIT_LIST_HEAD(&pe->pe_list); + + list_add_tail(&pe->pe_list, &nvmet_fc_portentry_list); +} + +static void +nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe) +{ + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (pe->tgtport) + pe->tgtport->pe = NULL; + list_del(&pe->pe_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a targetport deregisters. Breaks the relationship + * with the nvmet port, but leaves the port_entry in place so that + * re-registration can resume operation. + */ +static void +nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + pe = tgtport->pe; + if (pe) + pe->tgtport = NULL; + tgtport->pe = NULL; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a new targetport is registered. Looks in the + * existing nvmet port_entries to see if the nvmet layer is + * configured for the targetport's wwn's. (the targetport existed, + * nvmet configured, the lldd unregistered the tgtport, and is now + * reregistering the same targetport). If so, set the nvmet port + * port entry on the targetport. + */ +static void +nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) { + if (tgtport->fc_target_port.node_name == pe->node_name && + tgtport->fc_target_port.port_name == pe->port_name) { + WARN_ON(pe->tgtport); + tgtport->pe = pe; + pe->tgtport = tgtport; + break; + } + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} /** * nvme_fc_register_targetport - transport entry point called by an @@ -1034,6 +1118,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, goto out_free_newrec; } + nvmet_fc_portentry_rebind_tgt(newrec); + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list); spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); @@ -1171,6 +1257,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + nvmet_fc_portentry_unbind_tgt(tgtport); + /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); @@ -2147,7 +2235,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* - * Actual processing routine for received FC-NVME LS Requests from the LLD + * Actual processing routine for received FC-NVME I/O Requests from the LLD */ static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, @@ -2157,6 +2245,13 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, u32 xfrlen = be32_to_cpu(cmdiu->data_len); int ret; + /* + * if there is no nvmet mapping to the targetport there + * shouldn't be requests. just terminate them. + */ + if (!tgtport->pe) + goto transport_error; + /* * Fused commands are currently not supported in the linux * implementation. @@ -2184,7 +2279,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.rsp = &fod->rspiubuf.cqe; - fod->req.port = fod->queue->port; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2508,6 +2603,7 @@ static int nvmet_fc_add_port(struct nvmet_port *port) { struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_port_entry *pe; struct nvmet_fc_traddr traddr = { 0L, 0L }; unsigned long flags; int ret; @@ -2524,24 +2620,40 @@ nvmet_fc_add_port(struct nvmet_port *port) if (ret) return ret; + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + if (!pe) + return -ENOMEM; + ret = -ENXIO; spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { - tgtport->port = port; - ret = 0; + /* a FC port can only be 1 nvmet port id */ + if (!tgtport->pe) { + nvmet_fc_portentry_bind(tgtport, pe, port); + ret = 0; + } else + ret = -EALREADY; break; } } spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + if (ret) + kfree(pe); + return ret; } static void nvmet_fc_remove_port(struct nvmet_port *port) { - /* nothing to do */ + struct nvmet_fc_port_entry *pe = port->priv; + + nvmet_fc_portentry_unbind(pe); + + kfree(pe); } static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { -- cgit From 97faec531460c949d7120672b8c77e2f41f8d6d7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 13 Sep 2018 16:17:38 -0700 Subject: nvme_fc: add 'nvme_discovery' sysfs attribute to fc transport device The fc transport device should allow for a rediscovery, as userspace might have lost the events. Example is udev events not handled during system startup. This patch add a sysfs entry 'nvme_discovery' on the fc class to have it replay all udev discovery events for all local port/remote port address pairs. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 104 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fd700073d312..9d201b35397d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -122,6 +122,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; spinlock_t lock; @@ -210,7 +211,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. */ -static struct class *fc_class; static struct device *fc_udev_device; @@ -507,6 +507,7 @@ nvme_fc_free_rport(struct kref *ref) list_del(&rport->endp_list); spin_unlock_irqrestore(&nvme_fc_lock, flags); + WARN_ON(!list_empty(&rport->disc_list)); ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); kfree(rport); @@ -694,6 +695,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); INIT_LIST_HEAD(&newrec->ls_req_list); + INIT_LIST_HEAD(&newrec->disc_list); kref_init(&newrec->ref); atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); @@ -3254,6 +3256,90 @@ static struct nvmf_transport_ops nvme_fc_transport = { .create_ctrl = nvme_fc_create_ctrl, }; +/* Arbitrary successive failures max. With lots of subsystems could be high */ +#define DISCOVERY_MAX_FAIL 20 + +static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long flags; + LIST_HEAD(local_disc_list); + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + int failcnt = 0; + + spin_lock_irqsave(&nvme_fc_lock, flags); +restart: + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (!nvme_fc_lport_get(lport)) + continue; + if (!nvme_fc_rport_get(rport)) { + /* + * This is a temporary condition. Upon restart + * this rport will be gone from the list. + * + * Revert the lport put and retry. Anything + * added to the list already will be skipped (as + * they are no longer list_empty). Loops should + * resume at rports that were not yet seen. + */ + nvme_fc_lport_put(lport); + + if (failcnt++ < DISCOVERY_MAX_FAIL) + goto restart; + + pr_err("nvme_discovery: too many reference " + "failures\n"); + goto process_local_list; + } + if (list_empty(&rport->disc_list)) + list_add_tail(&rport->disc_list, + &local_disc_list); + } + } + +process_local_list: + while (!list_empty(&local_disc_list)) { + rport = list_first_entry(&local_disc_list, + struct nvme_fc_rport, disc_list); + list_del_init(&rport->disc_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + lport = rport->lport; + /* signal discovery. Won't hurt if it repeats */ + nvme_fc_signal_discovery_scan(lport, rport); + nvme_fc_rport_put(rport); + nvme_fc_lport_put(lport); + + spin_lock_irqsave(&nvme_fc_lock, flags); + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return count; +} +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +static struct attribute *nvme_fc_attrs[] = { + &dev_attr_nvme_discovery.attr, + NULL +}; + +static struct attribute_group nvme_fc_attr_group = { + .attrs = nvme_fc_attrs, +}; + +static const struct attribute_group *nvme_fc_attr_groups[] = { + &nvme_fc_attr_group, + NULL +}; + +static struct class fc_class = { + .name = "fc", + .dev_groups = nvme_fc_attr_groups, + .owner = THIS_MODULE, +}; + static int __init nvme_fc_init_module(void) { int ret; @@ -3272,16 +3358,16 @@ static int __init nvme_fc_init_module(void) * put in place, this code will move to a more generic * location for the class. */ - fc_class = class_create(THIS_MODULE, "fc"); - if (IS_ERR(fc_class)) { + ret = class_register(&fc_class); + if (ret) { pr_err("couldn't register class fc\n"); - return PTR_ERR(fc_class); + return ret; } /* * Create a device for the FC-centric udev events */ - fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL, + fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, "fc_udev_device"); if (IS_ERR(fc_udev_device)) { pr_err("couldn't create fc_udev device!\n"); @@ -3296,9 +3382,9 @@ static int __init nvme_fc_init_module(void) return 0; out_destroy_device: - device_destroy(fc_class, MKDEV(0, 0)); + device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: - class_destroy(fc_class); + class_unregister(&fc_class); return ret; } @@ -3313,8 +3399,8 @@ static void __exit nvme_fc_exit_module(void) ida_destroy(&nvme_fc_local_port_cnt); ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(fc_class, MKDEV(0, 0)); - class_destroy(fc_class); + device_destroy(&fc_class, MKDEV(0, 0)); + class_unregister(&fc_class); } module_init(nvme_fc_init_module); -- cgit From 09bd1ff4b15143bc0e6dd2adf39f59f6ab6e2621 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 17 Sep 2018 10:47:06 -0700 Subject: nvme-core: add async event trace helper This patch adds a new event for nvme async event notification. We print the async event in the decoded format when we recognize the event otherwise we just dump the result. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 +++++++++-- drivers/nvme/host/trace.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f0778d3dd2f8..089d744e5065 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3408,16 +3408,21 @@ static void nvme_fw_act_work(struct work_struct *work) static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { - switch ((result & 0xff00) >> 8) { + u32 aer_notice_type = (result & 0xff00) >> 8; + + switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: + trace_nvme_async_event(ctrl, aer_notice_type); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: + trace_nvme_async_event(ctrl, aer_notice_type); queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: + trace_nvme_async_event(ctrl, aer_notice_type); if (!ctrl->ana_log_buf) break; queue_work(nvme_wq, &ctrl->ana_work); @@ -3432,11 +3437,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); + u32 aer_type = result & 0x07; if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; - switch (result & 0x7) { + switch (aer_type) { case NVME_AER_NOTICE: nvme_handle_aen_notice(ctrl, result); break; @@ -3444,6 +3450,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, case NVME_AER_SMART: case NVME_AER_CSS: case NVME_AER_VS: + trace_nvme_async_event(ctrl, aer_type); ctrl->aen_result = result; break; default: diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index a490790d6691..196d5bd56718 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -156,6 +156,34 @@ TRACE_EVENT(nvme_complete_rq, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvme_async_event, + TP_PROTO(struct nvme_ctrl *ctrl, u32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->instance; + __entry->result = result; + ), + TP_printk("nvme%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); + +#undef aer_name + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH -- cgit From 783f4a4408e1251d17f333ad56abac24dde988b9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 27 Sep 2018 16:58:54 -0700 Subject: nvme: call nvme_complete_rq when nvmf_check_ready fails for mpath I/O When an io is rejected by nvmf_check_ready() due to validation of the controller state, the nvmf_fail_nonready_command() will normally return BLK_STS_RESOURCE to requeue and retry. However, if the controller is dying or the I/O is marked for NVMe multipath, the I/O is failed so that the controller can terminate or so that the io can be issued on a different path. Unfortunately, as this reject point is before the transport has accepted the command, blk-mq ends up completing the I/O and never calls nvme_complete_rq(), which is where multipath may preserve or re-route the I/O. The end result is, the device user ends up seeing an EIO error. Example: single path connectivity, controller is under load, and a reset is induced. An I/O is received: a) while the reset state has been set but the queues have yet to be stopped; or b) after queues are started (at end of reset) but before the reconnect has completed. The I/O finishes with an EIO status. This patch makes the following changes: - Adds the HOST_PATH_ERROR pathing status from TP4028 - Modifies the reject point such that it appears to queue successfully, but actually completes the io with the new pathing status and calls nvme_complete_rq(). - nvme_complete_rq() recognizes the new status, avoids resetting the controller (likely was already done in order to get this new status), and calls the multipather to clear the current path that errored. This allows the next command (retry or new command) to select a new path if there is one. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 7 +++++-- drivers/nvme/host/multipath.c | 7 +++++++ include/linux/nvme.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 206d63cb1afc..bcd09d3a44da 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, ctrl->state != NVME_CTRL_DEAD && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; + + nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR; + blk_mq_start_request(rq); + nvme_complete_rq(rq); + return BLK_STS_OK; } EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index bfbc6d5b1d93..ac16093a7928 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req) queue_work(nvme_wq, &ns->ctrl->ana_work); } break; + case NVME_SC_HOST_PATH_ERROR: + /* + * Temporary transport disruption in talking to the controller. + * Try to send on a new path. + */ + nvme_mpath_clear_current_path(ns); + break; default: /* * Reset the controller for any non-ANA error as we don't know diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 68e91ef5494c..818dbe9331be 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1241,6 +1241,7 @@ enum { NVME_SC_ANA_PERSISTENT_LOSS = 0x301, NVME_SC_ANA_INACCESSIBLE = 0x302, NVME_SC_ANA_TRANSITION = 0x303, + NVME_SC_HOST_PATH_ERROR = 0x370, NVME_SC_DNR = 0x4000, }; -- cgit From 73383adfad245bb84e6d6ef7830f01048fcfc217 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 28 Sep 2018 15:40:43 -0700 Subject: nvmet: don't split large I/Os unconditionally If we know that the I/O size exceeds our inline bio vec, no point using it and split the rest to begin with. We could in theory reuse the inline bio and only allocate the bio_vec, but its really not worth optimizing for. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 9 +++++++-- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 7bc9f6240432..f93fb5711142 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -58,7 +58,7 @@ static void nvmet_bio_done(struct bio *bio) static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; - struct bio *bio = &req->b.inline_bio; + struct bio *bio; struct scatterlist *sg; sector_t sector; blk_qc_t cookie; @@ -81,7 +81,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector = le64_to_cpu(req->cmd->rw.slba); sector <<= (req->ns->blksize_shift - 9); - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) { + bio = &req->b.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + } bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index ec9af4ee03b6..08f7b57a1203 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -264,6 +264,7 @@ struct nvmet_fabrics_ops { }; #define NVMET_MAX_INLINE_BIOVEC 8 +#define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE struct nvmet_req { struct nvme_command *cmd; -- cgit From f333444708f82c4a4d3ccac004da0bfd9cfdfa42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Sep 2018 09:51:29 +0200 Subject: nvme: take node locality into account when selecting a path Make current_path an array with an entry for every possible node, and cache the best path on a per-node basis. Take the node distance into account when selecting it. This is primarily useful for dual-ported PCIe devices which are connected to PCIe root ports on different sockets. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 7 +++++- drivers/nvme/host/multipath.c | 50 +++++++++++++++++++++++++++++++++---------- drivers/nvme/host/nvme.h | 25 ++++++++-------------- 3 files changed, 54 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 089d744e5065..2db33a752e2b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ns_head *head; + size_t size = sizeof(*head); int ret = -ENOMEM; - head = kzalloc(sizeof(*head), GFP_KERNEL); +#ifdef CONFIG_NVME_MULTIPATH + size += num_possible_nodes() * sizeof(struct nvme_ns *); +#endif + + head = kzalloc(size, GFP_KERNEL); if (!head) goto out; ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ac16093a7928..52987052b7fc 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -117,29 +117,55 @@ static const char *nvme_ana_state_names[] = { [NVME_ANA_CHANGE] = "change", }; -static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) +void nvme_mpath_clear_current_path(struct nvme_ns *ns) { - struct nvme_ns *ns, *fallback = NULL; + struct nvme_ns_head *head = ns->head; + int node; + + if (!head) + return; + + for_each_node(node) { + if (ns == rcu_access_pointer(head->current_path[node])) + rcu_assign_pointer(head->current_path[node], NULL); + } +} + +static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) +{ + int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; + struct nvme_ns *found = NULL, *fallback = NULL, *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { if (ns->ctrl->state != NVME_CTRL_LIVE || test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; + + distance = node_distance(node, dev_to_node(ns->ctrl->dev)); + switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: - rcu_assign_pointer(head->current_path, ns); - return ns; + if (distance < found_distance) { + found_distance = distance; + found = ns; + } + break; case NVME_ANA_NONOPTIMIZED: - fallback = ns; + if (distance < fallback_distance) { + fallback_distance = distance; + fallback = ns; + } break; default: break; } } - if (fallback) - rcu_assign_pointer(head->current_path, fallback); - return fallback; + if (!found) + found = fallback; + if (found) + rcu_assign_pointer(head->current_path[node], found); + return found; } static inline bool nvme_path_is_optimized(struct nvme_ns *ns) @@ -150,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) { - struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); + int node = numa_node_id(); + struct nvme_ns *ns; + ns = srcu_dereference(head->current_path[node], &head->srcu); if (unlikely(!ns || !nvme_path_is_optimized(ns))) - ns = __nvme_find_path(head); + ns = __nvme_find_path(head, node); return ns; } @@ -200,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) int srcu_idx; srcu_idx = srcu_read_lock(&head->srcu); - ns = srcu_dereference(head->current_path, &head->srcu); + ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); if (likely(ns && nvme_path_is_optimized(ns))) found = ns->queue->poll_fn(q, qc); srcu_read_unlock(&head->srcu, srcu_idx); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2503f8fd54da..9fefba039d1e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -277,14 +277,6 @@ struct nvme_ns_ids { * only ever has a single entry for private namespaces. */ struct nvme_ns_head { -#ifdef CONFIG_NVME_MULTIPATH - struct gendisk *disk; - struct nvme_ns __rcu *current_path; - struct bio_list requeue_list; - spinlock_t requeue_lock; - struct work_struct requeue_work; - struct mutex lock; -#endif struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; @@ -293,6 +285,14 @@ struct nvme_ns_head { struct list_head entry; struct kref ref; int instance; +#ifdef CONFIG_NVME_MULTIPATH + struct gendisk *disk; + struct bio_list requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; + struct mutex lock; + struct nvme_ns __rcu *current_path[]; +#endif }; #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS @@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head && ns == rcu_access_pointer(head->current_path)) - rcu_assign_pointer(head->current_path, NULL); -} +void nvme_mpath_clear_current_path(struct nvme_ns *ns); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) -- cgit From fb6360b1ef33b7799e6a81e1075a47e3b8ae01fd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 2 Oct 2018 12:24:40 +0200 Subject: pktcdvd: fix fall-through annotation Replace "fallthru" with a proper "fall through" annotation. This fix is part of the ongoing efforts to enabling -Wimplicit-fallthrough Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 6f1d25c1eb64..9381f4e3b221 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2645,7 +2645,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, */ if (pd->refcnt == 1) pkt_lock_door(pd, 0); - /* fallthru */ + /* fall through */ /* * forward selected CDROM ioctls to CD-ROM, for UDF */ -- cgit From e4f3aa2e1e67bb48dfbaaf1cad59013d5a5bc276 Mon Sep 17 00:00:00 2001 From: Young_X Date: Wed, 3 Oct 2018 12:54:29 +0000 Subject: cdrom: fix improper type cast, which can leat to information leak. There is another cast from unsigned long to int which causes a bounds check to fail with specially crafted input. The value is then used as an index in the slot array in cdrom_slot_status(). This issue is similar to CVE-2018-16658 and CVE-2018-10940. Signed-off-by: Young_X Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index a5d5a96479bf..10802d1fc554 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2445,7 +2445,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, return -ENOSYS; if (arg != CDSL_CURRENT && arg != CDSL_NONE) { - if ((int)arg >= cdi->capacity) + if (arg >= cdi->capacity) return -EINVAL; } -- cgit From 9305455acfa65a2749cd2329d027bf944b26e14c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Oct 2018 13:56:25 -0700 Subject: block: Finish renaming REQ_DISCARD into REQ_OP_DISCARD Some time ago REQ_DISCARD was renamed into REQ_OP_DISCARD. Some comments and documentation files were not updated however. Update these comments and documentation files. See also commit 4e1b2d52a80d ("block, fs, drivers: remove REQ_OP compat defs and related code"). Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Martin K. Petersen Cc: Philipp Reisner Cc: Lars Ellenberg Signed-off-by: Jens Axboe --- Documentation/blockdev/zram.txt | 2 +- Documentation/device-mapper/log-writes.txt | 2 +- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_protocol.h | 4 ++-- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- drivers/target/target_core_spc.c | 6 +++--- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 875b2b56b87f..3c1b5ab54bc0 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -190,7 +190,7 @@ whitespace: notify_free Depending on device usage scenario it may account a) the number of pages freed because of swap slot free notifications or b) the number of pages freed because of - REQ_DISCARD requests sent by bio. The former ones are + REQ_OP_DISCARD requests sent by bio. The former ones are sent to a swap block device when a swap slot is freed, which implies that this disk is being used as a swap disk. The latter ones are sent by filesystem mounted with diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt index f4ebcbaf50f3..b638d124be6a 100644 --- a/Documentation/device-mapper/log-writes.txt +++ b/Documentation/device-mapper/log-writes.txt @@ -38,7 +38,7 @@ inconsistent file system. Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as they complete as those requests will obviously bypass the device cache. -Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would +Any REQ_OP_DISCARD requests are treated like WRITE requests. Otherwise we would have all the DISCARD requests, and then the WRITE requests and then the FLUSH request. Consider the following example: diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8a4c1328e6f9..1e47db57b9d2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -429,7 +429,7 @@ enum { __EE_CALL_AL_COMPLETE_IO, __EE_MAY_SET_IN_SYNC, - /* is this a TRIM aka REQ_DISCARD? */ + /* is this a TRIM aka REQ_OP_DISCARD? */ __EE_IS_TRIM, /* In case a barrier failed, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 15a9ffce9012..55fd104f1ed4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1673,7 +1673,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0; } -/* Used to send write or TRIM aka REQ_DISCARD requests +/* Used to send write or TRIM aka REQ_OP_DISCARD requests * R_PRIMARY -> Peer (P_DATA, P_TRIM) */ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index c3081f93051c..48dabbb21e11 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h @@ -57,7 +57,7 @@ enum drbd_packet { P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ /* 0x2e to 0x30 reserved, used in drbd 9 */ - /* REQ_DISCARD. We used "discard" in different contexts before, + /* REQ_OP_DISCARD. We used "discard" in different contexts before, * which is why I chose TRIM here, to disambiguate. */ P_TRIM = 0x31, @@ -126,7 +126,7 @@ struct p_header100 { #define DP_UNPLUG 8 /* not used anymore */ #define DP_FUA 16 /* equals REQ_FUA */ #define DP_FLUSH 32 /* equals REQ_PREFLUSH */ -#define DP_DISCARD 64 /* equals REQ_DISCARD */ +#define DP_DISCARD 64 /* equals REQ_OP_DISCARD */ #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ #define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ #define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */ diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 19cac36e9737..1c4da17e902e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -650,7 +650,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case DISCARD_COMPLETED_NOTSUPP: case DISCARD_COMPLETED_WITH_ERROR: /* I'd rather not detach from local disk just because it - * failed a REQ_DISCARD. */ + * failed a REQ_OP_DISCARD. */ mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); break; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c7ef48efe871..99255d0c9e2f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -152,7 +152,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); - /* FIXME do we want to detach for failed REQ_DISCARD? + /* FIXME do we want to detach for failed REQ_OP_DISCARD? * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ if (peer_req->flags & EE_WAS_ERROR) __drbd_chk_io_error(device, DRBD_WRITE_ERROR); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index cb0461a10808..f459118bc11b 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -636,9 +636,9 @@ spc_emulate_evpd_b2(struct se_cmd *cmd, unsigned char *buf) /* * The unmap_zeroes_data set means that the underlying device supports - * REQ_DISCARD and has the discard_zeroes_data bit set. This satisfies - * the SBC requirements for LBPRZ, meaning that a subsequent read - * will return zeroes after an UNMAP or WRITE SAME (16) to an LBA + * REQ_OP_DISCARD and has the discard_zeroes_data bit set. This + * satisfies the SBC requirements for LBPRZ, meaning that a subsequent + * read will return zeroes after an UNMAP or WRITE SAME (16) to an LBA * See sbc4r36 6.6.4. */ if (((dev->dev_attrib.emulate_tpu != 0) || -- cgit From 2acf70ade79d26b97611a8df52eb22aa33814cd4 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 27 Sep 2018 11:00:31 -0700 Subject: nvmet-rdma: use a private workqueue for delete Queue deletion is done asynchronous when the last reference on the queue is dropped. Thus, in order to make sure we don't over allocate under a connect/disconnect storm, we let queue deletion complete before making forward progress. However, given that we flush the system_wq from rdma_cm context which runs from a workqueue context, we can have a circular locking complaint [1]. Fix that by using a private workqueue for queue deletion. [1]: ====================================================== WARNING: possible circular locking dependency detected 4.19.0-rc4-dbg+ #3 Not tainted ------------------------------------------------------ kworker/5:0/39 is trying to acquire lock: 00000000a10b6db9 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm] but task is already holding lock: 00000000331b4e2c ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 ((work_completion)(&queue->release_work)){+.+.}: process_one_work+0x474/0xa20 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 -> #2 ((wq_completion)"events"){+.+.}: flush_workqueue+0xf3/0x970 nvmet_rdma_cm_handler+0x133d/0x1734 [nvmet_rdma] cma_ib_req_handler+0x72f/0xf90 [rdma_cm] cm_process_work+0x2e/0x110 [ib_cm] cm_req_handler+0x135b/0x1c30 [ib_cm] cm_work_handler+0x2b7/0x38cd [ib_cm] process_one_work+0x4ae/0xa20 nvmet_rdma:nvmet_rdma_cm_handler: nvmet_rdma: disconnected (10): status 0 id 0000000040357082 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 nvme nvme0: Reconnecting in 10 seconds... -> #1 (&id_priv->handler_mutex/1){+.+.}: __mutex_lock+0xfe/0xbe0 mutex_lock_nested+0x1b/0x20 cma_ib_req_handler+0x6aa/0xf90 [rdma_cm] cm_process_work+0x2e/0x110 [ib_cm] cm_req_handler+0x135b/0x1c30 [ib_cm] cm_work_handler+0x2b7/0x38cd [ib_cm] process_one_work+0x4ae/0xa20 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 -> #0 (&id_priv->handler_mutex){+.+.}: lock_acquire+0xc5/0x200 __mutex_lock+0xfe/0xbe0 mutex_lock_nested+0x1b/0x20 rdma_destroy_id+0x6f/0x440 [rdma_cm] nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma] process_one_work+0x4ae/0xa20 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 Fixes: 777dc82395de ("nvmet-rdma: occasionally flush ongoing controller teardown") Reported-by: Bart Van Assche Signed-off-by: Sagi Grimberg Tested-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bfc4da660bb4..5becca88ccbe 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,6 +122,7 @@ struct nvmet_rdma_device { int inline_page_count; }; +struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); @@ -1267,12 +1268,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_rdma_delete_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); /* Destroying rdma_cm id is not needed here */ return 0; } @@ -1337,7 +1338,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } } @@ -1367,7 +1368,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } /** @@ -1649,8 +1650,17 @@ static int __init nvmet_rdma_init(void) if (ret) goto err_ib_client; + nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvmet_rdma_delete_wq) { + ret = -ENOMEM; + goto err_unreg_transport; + } + return 0; +err_unreg_transport: + nvmet_unregister_transport(&nvmet_rdma_ops); err_ib_client: ib_unregister_client(&nvmet_rdma_ib_client); return ret; @@ -1658,6 +1668,7 @@ err_ib_client: static void __exit nvmet_rdma_exit(void) { + destroy_workqueue(nvmet_rdma_delete_wq); nvmet_unregister_transport(&nvmet_rdma_ops); ib_unregister_client(&nvmet_rdma_ib_client); WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); -- cgit From 6d8623a71135d8e2d397c1534f35e04dcf867749 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 4 Oct 2018 10:35:24 -0700 Subject: blk-mq-debugfs: Also show requests that have not yet been started When debugging e.g. the SCSI timeout handler it is important that requests that have not yet been started or that already have completed are also reported through debugfs. Cc: Christoph Hellwig Cc: Ming Lei Cc: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a5ea86835fcb..41b86f50d126 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -431,8 +431,7 @@ static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) { const struct show_busy_params *params = data; - if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx && - blk_mq_rq_state(rq) != MQ_RQ_IDLE) + if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx) __blk_mq_debugfs_rq_show(params->m, list_entry_rq(&rq->queuelist)); } -- cgit From 7a55948d38eb9b274cbbdd56dc1dd4b96ebfbe04 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 8 Oct 2018 20:41:07 +0800 Subject: bcache: account size of buckets used in uuid write to ca->meta_sectors_written UUIDs are considered as metadata. __uuid_write should add the number of buckets (in sectors) written to disk to ca->meta_sectors_written. Currently only 1 bucket is used in uuid write. Steps to test: 1) create a fresh backing device and a fresh cache device separately. The backing device didn't attach to any cache set. 2) cd /sys/block//bcache cat metadata_written // record the output value cat bucket_size 3) attach the backing device to cache set 4) cat metadata_written The output value is almost the same as the value in step 2 before the change. After the change, the value is bigger about 1 bucket size. Signed-off-by: Shenghui Wang Reviewed-by: Tang Junhui Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 30ba9aeb5ee8..5fd1466b34e4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -418,6 +418,7 @@ static int __uuid_write(struct cache_set *c) { BKEY_PADDED(key) k; struct closure cl; + struct cache *ca; closure_init_stack(&cl); lockdep_assert_held(&bch_register_lock); @@ -429,6 +430,10 @@ static int __uuid_write(struct cache_set *c) uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); closure_sync(&cl); + /* Only one bucket used for uuid write */ + ca = PTR_CACHE(c, &k.key, 0); + atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written); + bkey_copy(&c->uuid_bucket, &k.key); bkey_put(c, &k.key); return 0; -- cgit From 502b291568fc7faf1ebdb2c2590f12851db0ff76 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:08 +0800 Subject: bcache: trace missed reading by cache_missed Missed reading IOs are identified by s->cache_missed, not the s->cache_miss, so in trace_bcache_read() using trace_bcache_read to identify whether the IO is missed or not. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 51be355a3309..4946d486f734 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -850,7 +850,7 @@ static void cached_dev_read_done_bh(struct closure *cl) bch_mark_cache_accounting(s->iop.c, s->d, !s->cache_missed, s->iop.bypass); - trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); + trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass); if (s->iop.status) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); -- cgit From 752f66a75abad2ae40b4570b3c77b422389cffcf Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 8 Oct 2018 20:41:09 +0800 Subject: bcache: use REQ_PRIO to indicate bio for metadata In cached_dev_cache_miss() and check_should_bypass(), REQ_META is used to check whether a bio is for metadata request. REQ_META is used for blktrace, the correct REQ_ flag should be REQ_PRIO. This flag means the bio should be prior to other bio, and frequently be used to indicate metadata io in file system code. This patch replaces REQ_META with correct flag REQ_PRIO. CC Adam Manzanares because he explains to me what REQ_PRIO is for. Signed-off-by: Coly Li Cc: Adam Manzanares Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4946d486f734..ee15fb039fd0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -395,7 +395,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) * unless the read-ahead request is for metadata (eg, for gfs2). */ if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && - !(bio->bi_opf & REQ_META)) + !(bio->bi_opf & REQ_PRIO)) goto skip; if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || @@ -877,7 +877,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, } if (!(bio->bi_opf & REQ_RAHEAD) && - !(bio->bi_opf & REQ_META) && + !(bio->bi_opf & REQ_PRIO) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, get_capacity(bio->bi_disk) - bio_end_sector(bio)); -- cgit From dd0c91793b7c2658ea32c6b3a2247a8ceca45dc0 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:10 +0800 Subject: bcache: fix ioctl in flash device When doing ioctl in flash device, it will call ioctl_dev() in super.c, then we should not to get cached device since flash only device has no backend device. This patch just move the jugement dc->io_disable to cached_dev_ioctl() to make ioctl in flash device correctly. Fixes: 0f0709e6bfc3c ("bcache: stop bcache device when backing device is offline") Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 3 +++ drivers/md/bcache/super.c | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ee15fb039fd0..3bf35914bb57 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1218,6 +1218,9 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, { struct cached_dev *dc = container_of(d, struct cached_dev, disk); + if (dc->io_disable) + return -EIO; + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 5fd1466b34e4..9de6695195bf 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -648,10 +648,6 @@ static int ioctl_dev(struct block_device *b, fmode_t mode, unsigned int cmd, unsigned long arg) { struct bcache_device *d = b->bd_disk->private_data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - - if (dc->io_disable) - return -EIO; return d->ioctl(d, mode, cmd, arg); } -- cgit From 4516da427fcf214af7d826273ab275f1d6e56ef0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 8 Oct 2018 20:41:11 +0800 Subject: bcache: fix typo in code comments of closure_return_with_destructor() The code comments of closure_return_with_destructor() in closure.h makrs function name as closure_return(). This patch fixes this type with the correct name - closure_return_with_destructor. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index eca0d496b686..c88cdc4ae4ec 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -345,7 +345,8 @@ do { \ } while (0) /** - * closure_return - finish execution of a closure, with destructor + * closure_return_with_destructor - finish execution of a closure, + * with destructor * * Works like closure_return(), except @destructor will be called when all * outstanding refs on @cl have been dropped; @destructor may be used to safely -- cgit From 2e17a262a2371d38d2ec03614a2675a32cef9912 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:12 +0800 Subject: bcache: correct dirty data statistics When bcache device is clean, dirty keys may still exist after journal replay, so we need to count these dirty keys even device in clean status, otherwise after writeback, the amount of dirty data would be incorrect. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 9de6695195bf..c116b6e0d85a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1153,11 +1153,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); bch_writeback_queue(dc); } + bch_sectors_dirty_init(&dc->disk); + bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); -- cgit From 7567c2a2ad9e80a2ce977eef535e64b61899633e Mon Sep 17 00:00:00 2001 From: Ben Peddell Date: Mon, 8 Oct 2018 20:41:13 +0800 Subject: bcache: Populate writeback_rate_minimum attribute Forgot to include the maintainers with my first email. Somewhere between Michael Lyle's original "bcache: PI controller for writeback rate V2" patch dated 07 Sep 2017 and 1d316e6 bcache: implement PI controller for writeback rate, the mapping of the writeback_rate_minimum attribute was dropped. Re-add the missing sysfs writeback_rate_minimum attribute mapping to "allow the user to specify a minimum rate at which dirty blocks are retired." Fixes: 1d316e6 ("bcache: implement PI controller for writeback rate") Signed-off-by: Ben Peddell Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 150cf4f4cf74..26f035a0c5b9 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -285,6 +285,7 @@ STORE(__cached_dev) 1, WRITEBACK_RATE_UPDATE_SECS_MAX); d_strtoul(writeback_rate_i_term_inverse); d_strtoul_nonzero(writeback_rate_p_term_inverse); + d_strtoul_nonzero(writeback_rate_minimum); sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX); @@ -412,6 +413,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_update_seconds, &sysfs_writeback_rate_i_term_inverse, &sysfs_writeback_rate_p_term_inverse, + &sysfs_writeback_rate_minimum, &sysfs_writeback_rate_debug, &sysfs_errors, &sysfs_io_error_limit, -- cgit From 2d6cb6edd2c7fb4f40998895bda45006281b1ac5 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Oct 2018 20:41:14 +0800 Subject: bcache: fix miss key refill->end in writeback refill->end record the last key of writeback, for example, at the first time, keys (1,128K) to (1,1024K) are flush to the backend device, but the end key (1,1024K) is not included, since the bellow code: if (bkey_cmp(k, refill->end) >= 0) { ret = MAP_DONE; goto out; } And in the next time when we refill writeback keybuf again, we searched key start from (1,1024K), and got a key bigger than it, so the key (1,1024K) missed. This patch modify the above code, and let the end key to be included to the writeback key buffer. Signed-off-by: Tang Junhui Cc: stable@vger.kernel.org Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e7d4817681f2..3f4211b5cd33 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2434,7 +2434,7 @@ static int refill_keybuf_fn(struct btree_op *op, struct btree *b, struct keybuf *buf = refill->buf; int ret = MAP_CONTINUE; - if (bkey_cmp(k, refill->end) >= 0) { + if (bkey_cmp(k, refill->end) > 0) { ret = MAP_DONE; goto out; } -- cgit From 46010141da6677b81cc77f9b47f8ac62bd1cbfd3 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 8 Oct 2018 20:41:15 +0800 Subject: bcache: recal cached_dev_sectors on detach Recal cached_dev_sectors on cached_dev detached, as recal done on cached_dev attached. Update the cached_dev_sectors before bcache_device_detach called as bcache_device_detach will set bcache_device->c to NULL. Signed-off-by: Shenghui Wang Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c116b6e0d85a..1155bd823b6f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1009,6 +1009,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bch_write_bdev_super(dc, &cl); closure_sync(&cl); + calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); -- cgit From 3fd3c5c02b2865e506bee1212c8423eac0d4cddf Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 8 Oct 2018 20:41:16 +0800 Subject: bcache: remove unused bch_passthrough_cache struct kmem_cache *bch_passthrough_cache is not used in bcache code. Remove it. Signed-off-by: Shenghui Wang Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/request.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index aa055cfeb099..721bf336ed1a 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -39,6 +39,6 @@ void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); void bch_flash_dev_request_init(struct bcache_device *d); -extern struct kmem_cache *bch_search_cache, *bch_passthrough_cache; +extern struct kmem_cache *bch_search_cache; #endif /* _BCACHE_REQUEST_H_ */ -- cgit From 91bafdf081b8ad8ab4977918ee45dffe3d744060 Mon Sep 17 00:00:00 2001 From: Dongbo Cao Date: Mon, 8 Oct 2018 20:41:17 +0800 Subject: bcache: remove useless parameter of bch_debug_init() Parameter "struct kobject *kobj" in bch_debug_init() is useless, remove it in this patch. Signed-off-by: Dongbo Cao Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 954dad29e6e8..b61b83bbcfff 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -1004,7 +1004,7 @@ void bch_open_buckets_free(struct cache_set *c); int bch_cache_allocator_start(struct cache *ca); void bch_debug_exit(void); -void bch_debug_init(struct kobject *kobj); +void bch_debug_init(void); void bch_request_exit(void); int bch_request_init(void); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 06da66b2488a..8f448b9c96a1 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -253,7 +253,7 @@ void bch_debug_exit(void) debugfs_remove_recursive(bcache_debug); } -void __init bch_debug_init(struct kobject *kobj) +void __init bch_debug_init(void) { /* * it is unnecessary to check return value of diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1155bd823b6f..2ce3fe9a1efe 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2389,7 +2389,7 @@ static int __init bcache_init(void) sysfs_create_files(bcache_kobj, files)) goto err; - bch_debug_init(bcache_kobj); + bch_debug_init(); closure_debug_init(); return 0; -- cgit From 149d0efada7777ad5a5242b095692af142f533d8 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 8 Oct 2018 20:41:18 +0800 Subject: bcache: replace hard coded number with BUCKET_GC_GEN_MAX In extents.c:bch_extent_bad(), number 96 is used as parameter to call btree_bug_on(). The purpose is to check whether stale gen value exceeds BUCKET_GC_GEN_MAX, so it is better to use macro BUCKET_GC_GEN_MAX to make the code more understandable. Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index c809724e6571..956004366699 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) for (i = 0; i < KEY_PTRS(k); i++) { stale = ptr_stale(b->c, k, i); - btree_bug_on(stale > 96, b, + btree_bug_on(stale > BUCKET_GC_GEN_MAX, b, "key too stale: %i, need_gc %u", stale, b->c->need_gc); -- cgit From 8792099f9ad487cf381f4e8199ff2158ba0f6eb5 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Mon, 8 Oct 2018 20:41:19 +0800 Subject: bcache: use MAX_CACHES_PER_SET instead of magic number 8 in __bch_bucket_alloc_set Current cache_set has MAX_CACHES_PER_SET caches most, and the macro is used for " struct cache *cache_by_alloc[MAX_CACHES_PER_SET]; " in the define of struct cache_set. Use MAX_CACHES_PER_SET instead of magic number 8 in __bch_bucket_alloc_set. Signed-off-by: Shenghui Wang Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 7a28232d868b..5002838ea476 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -484,7 +484,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, int i; lockdep_assert_held(&c->bucket_lock); - BUG_ON(!n || n > c->caches_loaded || n > 8); + BUG_ON(!n || n > c->caches_loaded || n > MAX_CACHES_PER_SET); bkey_init(k); -- cgit From f6027bca9e382efc4d4f28a2d9678e0a07428363 Mon Sep 17 00:00:00 2001 From: Dongbo Cao Date: Mon, 8 Oct 2018 20:41:20 +0800 Subject: bcache: split combined if-condition code into separate ones Split the combined '||' statements in if() check, to make the code easier for debug. Signed-off-by: Dongbo Cao Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 90 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 2ce3fe9a1efe..4e89d6d4bbd6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2052,6 +2052,8 @@ static int cache_alloc(struct cache *ca) size_t free; size_t btree_buckets; struct bucket *b; + int ret = -ENOMEM; + const char *err = NULL; __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); @@ -2070,26 +2072,86 @@ static int cache_alloc(struct cache *ca) btree_buckets = ca->sb.njournal_buckets ?: 8; free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; - if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) || - !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || - !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || - !init_heap(&ca->heap, free << 3, GFP_KERNEL) || - !(ca->buckets = vzalloc(array_size(sizeof(struct bucket), - ca->sb.nbuckets))) || - !(ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), - prio_buckets(ca), 2), - GFP_KERNEL)) || - !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca))) - return -ENOMEM; + if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, + GFP_KERNEL)) { + err = "ca->free[RESERVE_BTREE] alloc failed"; + goto err_btree_alloc; + } + + if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), + GFP_KERNEL)) { + err = "ca->free[RESERVE_PRIO] alloc failed"; + goto err_prio_alloc; + } + + if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) { + err = "ca->free[RESERVE_MOVINGGC] alloc failed"; + goto err_movinggc_alloc; + } + + if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) { + err = "ca->free[RESERVE_NONE] alloc failed"; + goto err_none_alloc; + } + + if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) { + err = "ca->free_inc alloc failed"; + goto err_free_inc_alloc; + } + + if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) { + err = "ca->heap alloc failed"; + goto err_heap_alloc; + } + + ca->buckets = vzalloc(array_size(sizeof(struct bucket), + ca->sb.nbuckets)); + if (!ca->buckets) { + err = "ca->buckets alloc failed"; + goto err_buckets_alloc; + } + + ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t), + prio_buckets(ca), 2), + GFP_KERNEL); + if (!ca->prio_buckets) { + err = "ca->prio_buckets alloc failed"; + goto err_prio_buckets_alloc; + } + + ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca); + if (!ca->disk_buckets) { + err = "ca->disk_buckets alloc failed"; + goto err_disk_buckets_alloc; + } ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); for_each_bucket(b, ca) atomic_set(&b->pin, 0); - return 0; + +err_disk_buckets_alloc: + kfree(ca->prio_buckets); +err_prio_buckets_alloc: + vfree(ca->buckets); +err_buckets_alloc: + free_heap(&ca->heap); +err_heap_alloc: + free_fifo(&ca->free_inc); +err_free_inc_alloc: + free_fifo(&ca->free[RESERVE_NONE]); +err_none_alloc: + free_fifo(&ca->free[RESERVE_MOVINGGC]); +err_movinggc_alloc: + free_fifo(&ca->free[RESERVE_PRIO]); +err_prio_alloc: + free_fifo(&ca->free[RESERVE_BTREE]); +err_btree_alloc: + module_put(THIS_MODULE); + if (err) + pr_notice("error %s: %s", ca->cache_dev_name, err); + return ret; } static int register_cache(struct cache_sb *sb, struct page *sb_page, -- cgit From 3a646fd77684dd5fbe20748bb04e12077bbecddc Mon Sep 17 00:00:00 2001 From: Dongbo Cao Date: Mon, 8 Oct 2018 20:41:21 +0800 Subject: bcache: panic fix for making cache device when the nbuckets of cache device is smaller than 1024, making cache device will trigger BUG_ON in kernel, add a condition to avoid this. Reported-by: nitroxis Signed-off-by: Dongbo Cao Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4e89d6d4bbd6..7bbd670a5a84 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2071,6 +2071,11 @@ static int cache_alloc(struct cache *ca) */ btree_buckets = ca->sb.njournal_buckets ?: 8; free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; + if (!free) { + ret = -EPERM; + err = "ca->sb.nbuckets is too small"; + goto err_free; + } if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL)) { @@ -2148,6 +2153,7 @@ err_movinggc_alloc: err_prio_alloc: free_fifo(&ca->free[RESERVE_BTREE]); err_btree_alloc: +err_free: module_put(THIS_MODULE); if (err) pr_notice("error %s: %s", ca->cache_dev_name, err); @@ -2177,6 +2183,8 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; + else if (ret == -EPERM) + err = "cache_alloc(): cache device is too small"; else err = "cache_alloc(): unknown error"; goto err; -- cgit From 36e765392e48e0322222347c4d21078c0b94758c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 28 Sep 2018 16:42:20 +0800 Subject: blk-mq: complete req in softirq context in case of single queue Lot of controllers may have only one irq vector for completing IO request. And usually affinity of the only irq vector is all possible CPUs, however, on most of ARCH, there may be only one specific CPU for handling this interrupt. So if all IOs are completed in hardirq context, it is inevitable to degrade IO performance because of increased irq latency. This patch tries to address this issue by allowing to complete request in softirq context, like the legacy IO path. IOPS is observed as ~13%+ in the following randread test on raid0 over virtio-scsi. mdadm --create --verbose /dev/md0 --level=0 --chunk=1024 --raid-devices=8 /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg /dev/sdh /dev/sdi fio --time_based --name=benchmark --runtime=30 --filename=/dev/md0 --nrfiles=1 --ioengine=libaio --iodepth=32 --direct=1 --invalidate=1 --verify=0 --verify_fatal=0 --numjobs=32 --rw=randread --blocksize=4k Cc: Dongli Zhang Cc: Zach Marano Cc: Christoph Hellwig Cc: Bart Van Assche Cc: Jianchao Wang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 ++++++++++++++ block/blk-softirq.c | 5 ++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1dc157c85a83..89bd9cb9defc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -568,6 +568,20 @@ static void __blk_mq_complete_request(struct request *rq) if (!blk_mq_mark_complete(rq)) return; + /* + * Most of single queue controllers, there is only one irq vector + * for handling IO completion, and the only irq's affinity is set + * as all possible CPUs. On most of ARCHs, this affinity means the + * irq is handled on one specific CPU. + * + * So complete IO reqeust in softirq context in case of single queue + * for not degrading IO performance by irqsoff latency. + */ + if (rq->q->nr_hw_queues == 1) { + __blk_complete_request(rq); + return; + } + if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { rq->q->softirq_done_fn(rq); return; diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 15c1f5e12eb8..e47a2f751884 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -97,8 +97,8 @@ static int blk_softirq_cpu_dead(unsigned int cpu) void __blk_complete_request(struct request *req) { - int ccpu, cpu; struct request_queue *q = req->q; + int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu; unsigned long flags; bool shared = false; @@ -110,8 +110,7 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU */ - if (req->cpu != -1) { - ccpu = req->cpu; + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) { if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) shared = cpus_share_cache(cpu, ccpu); } else -- cgit From 73569e11032fc5a9b314b6351632cfca7793afd5 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:31 +0200 Subject: lightnvm: remove dependencies on BLK_DEV_NVME and PCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to force NVMe device driver to be compiled in if the lightnvm subsystem is selected. Also no need for PCI to be selected as well, as it would be selected by the device driver that hooks into the subsystem. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 439bf90d084d..a872cd720967 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,8 +4,7 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK && PCI - select BLK_DEV_NVME + depends on BLOCK help Say Y here to get to enable Open-channel SSDs. -- cgit From d7b6801673f95e5f72efd3ffba1bcbb606883049 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:32 +0200 Subject: lightnvm: combine 1.2 and 2.0 command flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add nvm_set_flags helper to enable core to appropriately set the command flags for read/write/erase depending on which version a drive supports. The flags arguments can be distilled into the access hint, scrambling, and program/erase suspend. Replace the access hint with a "is_seq" parameter. The rest of the flags are dependent on the command opcode, which is trivial to detect and set. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 20 ++++++++++++++++++++ drivers/lightnvm/pblk-core.c | 13 ++++--------- drivers/lightnvm/pblk-read.c | 8 +------- drivers/lightnvm/pblk-recovery.c | 14 ++++---------- drivers/lightnvm/pblk-write.c | 2 +- drivers/lightnvm/pblk.h | 38 -------------------------------------- include/linux/lightnvm.h | 2 ++ 7 files changed, 32 insertions(+), 65 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 60aa7bc5a630..68553c7ae937 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -752,6 +752,24 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, } EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); +static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) +{ + int flags = 0; + + if (geo->version == NVM_OCSSD_SPEC_20) + return 0; + + if (rqd->is_seq) + flags |= geo->pln_mode >> 1; + + if (rqd->opcode == NVM_OP_PREAD) + flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND); + else if (rqd->opcode == NVM_OP_PWRITE) + flags |= NVM_IO_SCRAMBLE_ENABLE; + + return flags; +} + int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { struct nvm_dev *dev = tgt_dev->parent; @@ -763,6 +781,7 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_rq_tgt_to_dev(tgt_dev, rqd); rqd->dev = tgt_dev; + rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); /* In case of error, fail with right address format */ ret = dev->ops->submit_io(dev, rqd); @@ -783,6 +802,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_rq_tgt_to_dev(tgt_dev, rqd); rqd->dev = tgt_dev; + rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); /* In case of error, fail with right address format */ ret = dev->ops->submit_io_sync(dev, rqd); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 00984b486fea..72acf2f6dbd6 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -688,7 +688,7 @@ next_rq: if (dir == PBLK_WRITE) { struct pblk_sec_meta *meta_list = rqd.meta_list; - rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE); + rqd.is_seq = 1; for (i = 0; i < rqd.nr_ppas; ) { spin_lock(&line->lock); paddr = __pblk_alloc_page(pblk, line, min); @@ -703,11 +703,9 @@ next_rq: for (i = 0; i < rqd.nr_ppas; ) { struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); int pos = pblk_ppa_to_pos(geo, ppa); - int read_type = PBLK_READ_RANDOM; if (pblk_io_aligned(pblk, rq_ppas)) - read_type = PBLK_READ_SEQUENTIAL; - rqd.flags = pblk_set_read_mode(pblk, read_type); + rqd.is_seq = 1; while (test_bit(pos, line->blk_bitmap)) { paddr += min; @@ -787,17 +785,14 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, __le64 *lba_list = NULL; int i, ret; int cmd_op, bio_op; - int flags; if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; cmd_op = NVM_OP_PWRITE; - flags = pblk_set_progr_mode(pblk, PBLK_WRITE); lba_list = emeta_to_lbas(pblk, line->emeta->buf); } else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) { bio_op = REQ_OP_READ; cmd_op = NVM_OP_PREAD; - flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); } else return -EINVAL; @@ -822,7 +817,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, rqd.bio = bio; rqd.opcode = cmd_op; - rqd.flags = flags; + rqd.is_seq = 1; rqd.nr_ppas = lm->smeta_sec; for (i = 0; i < lm->smeta_sec; i++, paddr++) { @@ -885,7 +880,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, rqd->opcode = NVM_OP_ERASE; rqd->ppa_addr = ppa; rqd->nr_ppas = 1; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE); + rqd->is_seq = 1; rqd->bio = NULL; } diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 5a46d7f9302f..07b7f82c7595 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -93,9 +93,7 @@ next: } if (pblk_io_aligned(pblk, nr_secs)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(nr_secs, &pblk->inflight_reads); @@ -344,7 +342,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, rqd->bio = new_bio; rqd->nr_ppas = nr_holes; - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); pr_ctx->ppa_ptr = NULL; pr_ctx->orig_bio = bio; @@ -438,8 +435,6 @@ retry: } else { rqd->ppa_addr = ppa; } - - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); } int pblk_submit_read(struct pblk *pblk, struct bio *bio) @@ -663,7 +658,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) rqd.opcode = NVM_OP_PREAD; rqd.nr_ppas = gc_rq->secs_to_gc; - rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); rqd.bio = bio; if (pblk_submit_io_sync(pblk, &rqd)) { diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index e232e47e1353..cf629ab016ba 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -159,9 +159,7 @@ next_read_rq: rqd->dma_meta_list = dma_meta_list; if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; @@ -302,7 +300,7 @@ next_pad_rq: rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); + rqd->is_seq = 1; rqd->meta_list = meta_list; rqd->nr_ppas = rq_ppas; rqd->ppa_list = ppa_list; @@ -436,9 +434,7 @@ next_rq: rqd->dma_meta_list = dma_meta_list; if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; @@ -567,9 +563,7 @@ next_rq: rqd->dma_meta_list = dma_meta_list; if (pblk_io_aligned(pblk, rq_ppas)) - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); - else - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); + rqd->is_seq = 1; for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index ee774a86cf1e..508c63701eda 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -302,7 +302,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, /* Setup write request */ rqd->opcode = NVM_OP_PWRITE; rqd->nr_ppas = nr_secs; - rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); + rqd->is_seq = 1; rqd->private = pblk; rqd->end_io = end_io; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 4760af7b6499..48b3035df3c4 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1255,44 +1255,6 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, return crc; } -static inline int pblk_set_progr_mode(struct pblk *pblk, int type) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int flags; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - flags = geo->pln_mode >> 1; - - if (type == PBLK_WRITE) - flags |= NVM_IO_SCRAMBLE_ENABLE; - - return flags; -} - -enum { - PBLK_READ_RANDOM = 0, - PBLK_READ_SEQUENTIAL = 1, -}; - -static inline int pblk_set_read_mode(struct pblk *pblk, int type) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int flags; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; - if (type == PBLK_READ_SEQUENTIAL) - flags |= geo->pln_mode >> 1; - - return flags; -} - static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs) { return !(nr_secs % pblk->min_write_pgs); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e9e0d1c7eaf5..8acc2fe277d6 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -305,6 +305,8 @@ struct nvm_rq { u64 ppa_status; /* ppa media status */ int error; + int is_seq; /* Sequential hint flag. 1.2 only */ + void *private; }; -- cgit From 4b5d56edb8fc565c5db029aecaea598eadfba7f6 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:33 +0200 Subject: lightnvm: pblk: fix rqd.error return value in pblk_blk_erase_sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rqd.error is masked by the return value of pblk_submit_io_sync. The rqd structure is then passed on to the end_io function, which assumes that any error should lead to a chunk being marked offline/bad. Since the pblk_submit_io_sync can fail before the command is issued to the device, the error value maybe not correspond to a media failure, leading to chunks being immaturely retired. Also, the pblk_blk_erase_sync function prints an error message in case the erase fails. Since the caller prints an error message by itself, remove the error message in this function. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Reviewed-by: Hans Holmberg Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 72acf2f6dbd6..72de7456845b 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -886,10 +886,8 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) { - struct nvm_rq rqd; - int ret = 0; - - memset(&rqd, 0, sizeof(struct nvm_rq)); + struct nvm_rq rqd = {NULL}; + int ret; pblk_setup_e_rq(pblk, &rqd, ppa); @@ -897,19 +895,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) * with writes. Thus, there is no need to take the LUN semaphore. */ ret = pblk_submit_io_sync(pblk, &rqd); - if (ret) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - pblk_err(pblk, "could not sync erase line:%d,blk:%d\n", - pblk_ppa_to_line(ppa), - pblk_ppa_to_pos(geo, ppa)); - - rqd.error = ret; - goto out; - } - -out: rqd.private = pblk; __pblk_end_io_erase(pblk, &rqd); -- cgit From 656e33ca3d405196f94133babc4e38454a49cb73 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:34 +0200 Subject: lightnvm: move device L2P detection to core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A 1.2 device is able to manage the logical to physical mapping table internally or leave it to the host. A target only supports one of those approaches, and therefore must check on initialization. Move this check to core to avoid each target implement the check. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 5 +++++ drivers/lightnvm/pblk-init.c | 7 ------- include/linux/lightnvm.h | 6 ++++++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 68553c7ae937..964352720a03 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -355,6 +355,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -EINVAL; } + if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) { + pr_err("nvm: device is incompatible with target L2P type.\n"); + return -EINVAL; + } + if (nvm_target_exists(create->tgtname)) { pr_err("nvm: target name already exists (%s)\n", create->tgtname); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 537e98f2b24a..039f62d05e84 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1210,13 +1210,6 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } - if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) { - pblk_err(pblk, "host-side L2P table not supported. (%x)\n", - geo->dom); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 8acc2fe277d6..f4a84694e5e2 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -495,9 +495,15 @@ typedef void (nvm_tgt_exit_fn)(void *, bool); typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); +enum { + NVM_TGT_F_DEV_L2P = 0, + NVM_TGT_F_HOST_L2P = 1 << 0, +}; + struct nvm_tgt_type { const char *name; unsigned int version[3]; + int flags; /* target entry points */ nvm_tgt_make_rq_fn *make_rq; -- cgit From d8adaa3b86324c6186d0adf74bc256bdacfffdb6 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:35 +0200 Subject: lightnvm: pblk: fix race condition on metadata I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pblk, when a new line is allocated, metadata for the previously written line is scheduled. This is done through a fixed memory region that is shared through time and contexts across different lines and therefore protected by a lock. Unfortunately, this lock is not properly covering all the metadata used for sharing this memory regions, resulting in a race condition. This patch fixes this race condition by protecting this metadata properly. Fixes: dd2a43437337 ("lightnvm: pblk: sched. metadata on write thread") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-write.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 508c63701eda..df99c45778d4 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -417,12 +417,11 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); } + spin_lock(&l_mg->close_lock); emeta->mem += rq_len; - if (emeta->mem >= lm->emeta_len[0]) { - spin_lock(&l_mg->close_lock); + if (emeta->mem >= lm->emeta_len[0]) list_del(&meta_line->list); - spin_unlock(&l_mg->close_lock); - } + spin_unlock(&l_mg->close_lock); pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); @@ -491,14 +490,15 @@ static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line; spin_lock(&l_mg->close_lock); -retry: if (list_empty(&l_mg->emeta_list)) { spin_unlock(&l_mg->close_lock); return NULL; } meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (meta_line->emeta->mem >= lm->emeta_len[0]) - goto retry; + if (meta_line->emeta->mem >= lm->emeta_len[0]) { + spin_unlock(&l_mg->close_lock); + return NULL; + } spin_unlock(&l_mg->close_lock); if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) -- cgit From aff3fb18f957de93e629c7d3d2c4ef1f360aa511 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:36 +0200 Subject: lightnvm: move bad block and chunk state logic to core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk implements two data paths for recovery line state. One for 1.2 and another for 2.0, instead of having pblk implement these, combine them in the core to reduce complexity and make available to other targets. The new interface will adhere to the 2.0 chunk definition, including managing open chunks with an active write pointer. To provide this interface, a 1.2 device recovers the state of the chunks by manually detecting if a chunk is either free/open/close/offline, and if open, scanning the flash pages sequentially to find the next writeable page. This process takes on average ~10 seconds on a device with 64 dies, 1024 blocks and 60us read access time. The process can be parallelized but is left out for maintenance simplicity, as the 1.2 specification is deprecated. For 2.0 devices, the logic is maintained internally in the drive and retrieved through the 2.0 interface. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 309 +++++++++++++++++++++++++++++++++++-------- drivers/lightnvm/pblk-core.c | 6 +- drivers/lightnvm/pblk-init.c | 116 +--------------- drivers/lightnvm/pblk.h | 2 +- drivers/nvme/host/lightnvm.c | 4 +- include/linux/lightnvm.h | 15 +-- 6 files changed, 265 insertions(+), 187 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 964352720a03..8df188e0767e 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -717,46 +717,6 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta, - struct ppa_addr ppa, int nchks) -{ - struct nvm_dev *dev = tgt_dev->parent; - - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - - return dev->ops->get_chk_meta(tgt_dev->parent, meta, - (sector_t)ppa.ppa, nchks); -} -EXPORT_SYMBOL(nvm_get_chunk_meta); - -int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; - - if (nr_ppas > NVM_MAX_VLBA) { - pr_err("nvm: unable to update all blocks atomically\n"); - return -EINVAL; - } - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - nvm_rq_tgt_to_dev(tgt_dev, &rqd); - - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(tgt_dev, &rqd); - if (ret) { - pr_err("nvm: failed bb mark\n"); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); - static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) { int flags = 0; @@ -830,27 +790,159 @@ void nvm_end_io(struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_end_io); +static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) +{ + if (!dev->ops->submit_io_sync) + return -ENODEV; + + rqd->flags = nvm_set_flags(&dev->geo, rqd); + + return dev->ops->submit_io_sync(dev, rqd); +} + +static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) +{ + struct nvm_rq rqd = { NULL }; + struct bio bio; + struct bio_vec bio_vec; + struct page *page; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio_add_page(&bio, page, PAGE_SIZE, 0); + bio_set_op_attrs(&bio, REQ_OP_READ, 0); + + rqd.bio = &bio; + rqd.opcode = NVM_OP_PREAD; + rqd.is_seq = 1; + rqd.nr_ppas = 1; + rqd.ppa_addr = generic_to_dev_addr(dev, ppa); + + ret = nvm_submit_io_sync_raw(dev, &rqd); + if (ret) + return ret; + + __free_page(page); + + return rqd.error; +} + /* - * folds a bad block list from its plane representation to its virtual - * block representation. The fold is done in place and reduced size is - * returned. - * - * If any of the planes status are bad or grown bad block, the virtual block - * is marked bad. If not bad, the first plane state acts as the block state. + * Scans a 1.2 chunk first and last page to determine if its state. + * If the chunk is found to be open, also scan it to update the write + * pointer. */ -int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) +static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &dev->geo; - int blk, offset, pl, blktype; + int ret, pg, pl; - if (nr_blks != geo->num_chk * geo->pln_mode) - return -EINVAL; + /* sense first page */ + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) /* valid data */ + meta->state = NVM_CHK_ST_OPEN; + else if (ret > 0) { + /* + * If empty page, the chunk is free, else it is an + * actual io error. In that case, mark it offline. + */ + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + meta->state = NVM_CHK_ST_FREE; + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + goto scan; + default: + return -ret; /* other io error */ + } + } + + /* sense last page */ + ppa.g.pg = geo->num_pg - 1; + ppa.g.pl = geo->num_pln - 1; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { /* Chunk fully written */ + meta->state = NVM_CHK_ST_CLOSED; + meta->wp = geo->clba; + return 0; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + break; + default: + return -ret; /* other io error */ + } + } + +scan: + /* + * chunk is open, we scan sequentially to update the write pointer. + * We make the assumption that targets write data across all planes + * before moving to the next page. + */ + for (pg = 0; pg < geo->num_pg; pg++) { + for (pl = 0; pl < geo->num_pln; pl++) { + ppa.g.pg = pg; + ppa.g.pl = pl; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { + meta->wp += geo->ws_min; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->wp += geo->ws_min; + break; + default: + return -ret; /* other io error */ + } + } + } + } + + return 0; +} + +/* + * folds a bad block list from its plane representation to its + * chunk representation. + * + * If any of the planes status are bad or grown bad, the chunk is marked + * offline. If not bad, the first plane state acts as the chunk state. + */ +static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + int ret, blk, pl, offset, blktype; for (blk = 0; blk < geo->num_chk; blk++) { offset = blk * geo->pln_mode; blktype = blks[offset]; - /* Bad blocks on any planes take precedence over other types */ for (pl = 0; pl < geo->pln_mode; pl++) { if (blks[offset + pl] & (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { @@ -859,23 +951,124 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) } } - blks[blk] = blktype; + ppa.g.blk = blk; + + meta->wp = 0; + meta->type = NVM_CHK_TP_W_SEQ; + meta->wi = 0; + meta->slba = generic_to_dev_addr(dev, ppa).ppa; + meta->cnlb = dev->geo.clba; + + if (blktype == NVM_BLK_T_FREE) { + ret = nvm_bb_chunk_scan(dev, ppa, meta); + if (ret) + return ret; + } else { + meta->state = NVM_CHK_ST_OFFLINE; + } + + meta++; } - return geo->num_chk; + return 0; +} + +static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, + int nchks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + struct ppa_addr ppa; + u8 *blks; + int ch, lun, nr_blks; + int ret; + + ppa.ppa = slba; + ppa = dev_to_generic_addr(dev, ppa); + + if (ppa.g.blk != 0) + return -EINVAL; + + if ((nchks % geo->num_chk) != 0) + return -EINVAL; + + nr_blks = geo->num_chk * geo->pln_mode; + + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; + + for (ch = ppa.g.ch; ch < geo->num_ch; ch++) { + for (lun = ppa.g.lun; lun < geo->num_lun; lun++) { + struct ppa_addr ppa_gen, ppa_dev; + + if (!nchks) + goto done; + + ppa_gen.ppa = 0; + ppa_gen.g.ch = ch; + ppa_gen.g.lun = lun; + ppa_dev = generic_to_dev_addr(dev, ppa_gen); + + ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks); + if (ret) + goto done; + + ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks, + meta); + if (ret) + goto done; + + meta += geo->num_chk; + nchks -= geo->num_chk; + } + } +done: + kfree(blks); + return ret; } -EXPORT_SYMBOL(nvm_bb_tbl_fold); -int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, - u8 *blks) +int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, + int nchks, struct nvm_chk_meta *meta) { struct nvm_dev *dev = tgt_dev->parent; nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - return dev->ops->get_bb_tbl(dev, ppa, blks); + if (dev->geo.version == NVM_OCSSD_SPEC_12) + return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta); + + return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta); +} +EXPORT_SYMBOL_GPL(nvm_get_chunk_meta); + +int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, + int nr_ppas, int type) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_rq rqd; + int ret; + + if (dev->geo.version == NVM_OCSSD_SPEC_20) + return 0; + + if (nr_ppas > NVM_MAX_VLBA) { + pr_err("nvm: unable to update all blocks atomically\n"); + return -EINVAL; + } + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); + nvm_rq_tgt_to_dev(tgt_dev, &rqd); + + ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); + nvm_free_rqd_ppalist(tgt_dev, &rqd); + if (ret) + return -EINVAL; + + return 0; } -EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); +EXPORT_SYMBOL_GPL(nvm_set_chunk_meta); static int nvm_core_init(struct nvm_dev *dev) { diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 72de7456845b..e0b513d07e14 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -27,7 +27,7 @@ static void pblk_line_mark_bb(struct work_struct *work) struct ppa_addr *ppa = line_ws->priv; int ret; - ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); if (ret) { struct pblk_line *line; int pos; @@ -110,7 +110,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) * * The caller is responsible for freeing the returned structure */ -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -126,7 +126,7 @@ struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) if (!meta) return ERR_PTR(-ENOMEM); - ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks); + ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta); if (ret) { kfree(meta); return ERR_PTR(-EIO); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 039f62d05e84..53bd52114aee 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -540,67 +540,6 @@ static void pblk_lines_free(struct pblk *pblk) kfree(pblk->lines); } -static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun, - u8 *blks, int nr_blks) -{ - struct ppa_addr ppa; - int ret; - - ppa.ppa = 0; - ppa.g.ch = rlun->bppa.g.ch; - ppa.g.lun = rlun->bppa.g.lun; - - ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); - if (ret) - return ret; - - nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) - return -EIO; - - return 0; -} - -static void *pblk_bb_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - u8 *meta; - int i, nr_blks, blk_per_lun; - int ret; - - blk_per_lun = geo->num_chk * geo->pln_mode; - nr_blks = blk_per_lun * geo->all_luns; - - meta = kmalloc(nr_blks, GFP_KERNEL); - if (!meta) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < geo->all_luns; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - u8 *meta_pos = meta + i * blk_per_lun; - - ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun); - if (ret) { - kfree(meta); - return ERR_PTR(-EIO); - } - } - - return meta; -} - -static void *pblk_chunk_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) - return pblk_bb_get_meta(pblk); - else - return pblk_chunk_get_info(pblk); -} - static int pblk_luns_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -699,51 +638,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); } -static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line, - void *chunk_meta) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int i, chk_per_lun, nr_bad_chks = 0; - - chk_per_lun = geo->num_chk * geo->pln_mode; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - struct nvm_chk_meta *chunk; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun; - - chunk = &line->chks[pos]; - - /* - * In 1.2 spec. chunk state is not persisted by the device. Thus - * some of the values are reset each time pblk is instantiated, - * so we have to assume that the block is closed. - */ - if (lun_bb_meta[line->id] == NVM_BLK_T_FREE) - chunk->state = NVM_CHK_ST_CLOSED; - else - chunk->state = NVM_CHK_ST_OFFLINE; - - chunk->type = NVM_CHK_TP_W_SEQ; - chunk->wi = 0; - chunk->slba = -1; - chunk->cnlb = geo->clba; - chunk->wp = 0; - - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - - set_bit(pos, line->blk_bitmap); - nr_bad_chks++; - } - - return nr_bad_chks; -} - -static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, +static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, struct nvm_chk_meta *meta) { struct nvm_tgt_dev *dev = pblk->dev; @@ -790,8 +685,6 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, void *chunk_meta, int line_id) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; long nr_bad_chks, chk_in_line; @@ -804,10 +697,7 @@ static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, line->vsc = &l_mg->vsc_list[line_id]; spin_lock_init(&line->lock); - if (geo->version == NVM_OCSSD_SPEC_12) - nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta); - else - nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta); + nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); chk_in_line = lm->blk_per_line - nr_bad_chks; if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || @@ -1058,7 +948,7 @@ static int pblk_lines_init(struct pblk *pblk) if (ret) goto fail_free_meta; - chunk_meta = pblk_chunk_get_meta(pblk); + chunk_meta = pblk_get_chunk_meta(pblk); if (IS_ERR(chunk_meta)) { ret = PTR_ERR(chunk_meta); goto fail_free_luns; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 48b3035df3c4..579b4ea9716c 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -774,7 +774,7 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); void pblk_discard(struct pblk *pblk, struct bio *bio); -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk); +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, struct nvm_chk_meta *lp, struct ppa_addr ppa); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 1e4f97538838..e42af7771fe5 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -567,8 +567,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, * Expect the lba in device format */ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - struct nvm_chk_meta *meta, - sector_t slba, int nchks) + sector_t slba, int nchks, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &ndev->geo; struct nvme_ns *ns = ndev->q->queuedata; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f4a84694e5e2..0106984400bc 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -86,8 +86,8 @@ struct nvm_chk_meta; typedef int (nvm_id_fn)(struct nvm_dev *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); -typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, struct nvm_chk_meta *, - sector_t, int); +typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, + struct nvm_chk_meta *); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); @@ -532,18 +532,13 @@ extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); - -extern int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, - struct nvm_chk_meta *meta, struct ppa_addr ppa, - int nchks); - -extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, +extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr, + int, struct nvm_chk_meta *); +extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); extern void nvm_end_io(struct nvm_rq *); -extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); -extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); #else /* CONFIG_NVM */ struct nvm_dev_ops; -- cgit From afdc23c91e085c56d1b0c119563c202b07255599 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:37 +0200 Subject: lightnvm: pblk: unify vector max req constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both NVM_MAX_VLBA and PBLK_MAX_REQ_ADDRS define how many LBAs that are available in a vector command. pblk uses them interchangeably in its implementation. Use NVM_MAX_VLBA as the main one and remove usages of PBLK_MAX_REQ_ADDRS. Also remove the power representation that only has one user, and instead calculate it at runtime. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 6 ------ drivers/lightnvm/pblk-read.c | 6 +++--- drivers/lightnvm/pblk-rl.c | 4 ++-- drivers/lightnvm/pblk.h | 10 ++++------ 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 53bd52114aee..9119c64d6f62 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -376,12 +376,6 @@ static int pblk_core_init(struct pblk *pblk) pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); - if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { - pblk_err(pblk, "vector list too big(%u > %u)\n", - pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS); - return -EINVAL; - } - pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 07b7f82c7595..9c61c19be5dc 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned long *read_bitmap) { struct pblk_sec_meta *meta_list = rqd->meta_list; - struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppas[NVM_MAX_VLBA]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; int i, j = 0; @@ -450,7 +450,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) int ret = NVM_IO_ERR; /* logic error: lba out-of-bounds. Ignore read request */ - if (blba >= pblk->rl.nr_secs || nr_secs > PBLK_MAX_REQ_ADDRS) { + if (blba >= pblk->rl.nr_secs || nr_secs > NVM_MAX_VLBA) { WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", (unsigned long long)blba, nr_secs); return NVM_IO_ERR; @@ -547,7 +547,7 @@ static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_line *line, u64 *lba_list, u64 *paddr_list_gc, unsigned int nr_secs) { - struct ppa_addr ppa_list_l2p[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA]; struct ppa_addr ppa_gc; int valid_secs = 0; int i; diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 6a0616a6fcaf..a32790f7b7fc 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -127,7 +127,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl, } else if (free_blocks < rl->high) { int shift = rl->high_pw - rl->rb_windows_pw; int user_windows = free_blocks >> shift; - int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; + int user_max = user_windows << ilog2(NVM_MAX_VLBA); rl->rb_user_max = user_max; rl->rb_gc_max = max - user_max; @@ -228,7 +228,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) rl->rsv_blocks = min_blocks; /* This will always be a power-of-2 */ - rb_windows = budget / PBLK_MAX_REQ_ADDRS; + rb_windows = budget / NVM_MAX_VLBA; rl->rb_windows_pw = get_count_order(rb_windows); /* To start with, all buffer is available to user I/O writers */ diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 579b4ea9716c..64ae0c7ed3bb 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -37,8 +37,6 @@ #define PBLK_SECTOR (512) #define PBLK_EXPOSED_PAGE_SIZE (4096) -#define PBLK_MAX_REQ_ADDRS (64) -#define PBLK_MAX_REQ_ADDRS_PW (6) #define PBLK_NR_CLOSE_JOBS (4) @@ -99,8 +97,8 @@ enum { PBLK_RL_LOW = 4 }; -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) -#define pblk_dma_ppa_size (sizeof(u64) * PBLK_MAX_REQ_ADDRS) +#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA) +#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) /* write buffer completion context */ struct pblk_c_ctx { @@ -218,8 +216,8 @@ struct pblk_lun { struct pblk_gc_rq { struct pblk_line *line; void *data; - u64 paddr_list[PBLK_MAX_REQ_ADDRS]; - u64 lba_list[PBLK_MAX_REQ_ADDRS]; + u64 paddr_list[NVM_MAX_VLBA]; + u64 lba_list[NVM_MAX_VLBA]; int nr_secs; int secs_to_gc; struct list_head list; -- cgit From 8bbd45d02a118cbefdf4e1a6274bd965a6aa3c59 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:38 +0200 Subject: lightnvm: pblk: fix incorrect min_write_pgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The calculation of pblk->min_write_pgs should only use the optimal write size attribute provided by the drive, it does not correlate to the memory page size of the system, which can be smaller or larger than the LBA size reported. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 9119c64d6f62..8adc8ac8b03c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -371,7 +371,7 @@ static int pblk_core_init(struct pblk *pblk) atomic64_set(&pblk->nr_flush, 0); pblk->nr_flush_rst = 0; - pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE); + pblk->min_write_pgs = geo->ws_opt; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); -- cgit From d20be90ae0bb938ee0dfc8277d531191341d8e39 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:39 +0200 Subject: lightnvm: pblk: remove size and out of bounds read check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The I/O size and capacity checks are already done by the block layer. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 9c61c19be5dc..ba3dcb6be4c3 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -449,13 +449,6 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA); int ret = NVM_IO_ERR; - /* logic error: lba out-of-bounds. Ignore read request */ - if (blba >= pblk->rl.nr_secs || nr_secs > NVM_MAX_VLBA) { - WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", - (unsigned long long)blba, nr_secs); - return NVM_IO_ERR; - } - generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), &pblk->disk->part0); -- cgit From ae14cc044b5988148d819c377fd0cc1c7504bc3c Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:40 +0200 Subject: lightnvm: pblk: refactor put line fn on read completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read completion path uses the put_line variable to decide whether the reference on a line should be released. The function name used for that is pblk_read_put_rqd_kref, which could lead one to believe that it is the rqd that is releasing the reference, while it is the line reference that is put. Rename and also split the function in two to account for either rqd or single ppa callers and move it to core, such that it later can be used in the write path as well. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Reviewed-by: Heiner Litz Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 19 +++++++++++++++++++ drivers/lightnvm/pblk-read.c | 18 +----------------- drivers/lightnvm/pblk.h | 2 ++ 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index e0b513d07e14..5f99cf396072 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1426,6 +1426,25 @@ retry_setup: return line; } +void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) +{ + struct pblk_line *line; + + line = &pblk->lines[pblk_ppa_to_line(ppa)]; + kref_put(&line->ref, pblk_line_put_wq); +} + +void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int i; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + for (i = 0; i < rqd->nr_ppas; i++) + pblk_ppa_to_line_put(pblk, ppa_list[i]); +} + static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) { lockdep_assert_held(&pblk->l_mg.free_lock); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index ba3dcb6be4c3..ad1d7713bbda 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -165,22 +165,6 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); } -static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list; - int i; - - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - - for (i = 0; i < rqd->nr_ppas; i++) { - struct ppa_addr ppa = ppa_list[i]; - struct pblk_line *line; - - line = &pblk->lines[pblk_ppa_to_line(ppa)]; - kref_put(&line->ref, pblk_line_put_wq); - } -} - static void pblk_end_user_read(struct bio *bio) { #ifdef CONFIG_NVM_PBLK_DEBUG @@ -208,7 +192,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, bio_put(int_bio); if (put_line) - pblk_read_put_rqd_kref(pblk, rqd); + pblk_rq_to_line_put(pblk, rqd); #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 64ae0c7ed3bb..be67bbfa3d0a 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -787,6 +787,8 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, struct pblk_line *pblk_line_get(struct pblk *pblk); struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); struct pblk_line *pblk_line_replace_data(struct pblk *pblk); +void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa); +void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd); int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); struct pblk_line *pblk_line_get_data(struct pblk *pblk); -- cgit From 2cf99bbd106f89fc72f778e8ad9d5538f1ef939b Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:41 +0200 Subject: lightnvm: pblk: add helpers for chunk addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement helpers to go from ppas to a chunk within a line and an address within a chunk. These helpers will be used on the patches adding trace support in pblk, which will be sent in this window. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk.h | 19 +++++++++++++++++++ include/linux/lightnvm.h | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index be67bbfa3d0a..f95fe75fef6e 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1034,6 +1034,25 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, return ppa; } +static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk, + struct ppa_addr p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line *line = &pblk->lines[pblk_ppa_to_line(p)]; + int pos = pblk_ppa_to_pos(geo, p); + + return &line->chks[pos]; +} + +static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk, + struct ppa_addr p) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + return dev_to_chunk_addr(dev->parent, &pblk->addrf, p); +} + static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, struct ppa_addr p) { diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 0106984400bc..77743a02ec0d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -487,6 +487,25 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, return l; } +static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf, + struct ppa_addr p) +{ + struct nvm_geo *geo = &dev->geo; + u64 caddr; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf; + + caddr = (u64)p.g.pg << ppaf->pg_offset; + caddr |= (u64)p.g.pl << ppaf->pln_offset; + caddr |= (u64)p.g.sec << ppaf->sec_offset; + } else { + caddr = p.m.sec; + } + + return caddr; +} + typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, -- cgit From cb21665c8d13bb7e1f6e211442c53f4675f1569d Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:42 +0200 Subject: lightnvm: pblk: improve line helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current helper to obtain a line from a ppa returns the line id, which requires its users to explicitly retrieve the pointer to the line with the id. Make 2 different helpers: one returning the line id and one returning the line directly. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 15 ++++++--------- drivers/lightnvm/pblk-rb.c | 2 +- drivers/lightnvm/pblk-read.c | 4 ++-- drivers/lightnvm/pblk-write.c | 4 ++-- drivers/lightnvm/pblk.h | 13 +++++++++---- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 5f99cf396072..36ac9eff8ebd 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -32,7 +32,7 @@ static void pblk_line_mark_bb(struct work_struct *work) struct pblk_line *line; int pos; - line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + line = pblk_ppa_to_line(pblk, *ppa); pos = pblk_ppa_to_pos(&dev->geo, *ppa); pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n", @@ -80,7 +80,7 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) struct pblk_line *line; int pos; - line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)]; + line = pblk_ppa_to_line(pblk, rqd->ppa_addr); pos = pblk_ppa_to_pos(geo, rqd->ppa_addr); chunk = &line->chks[pos]; @@ -192,7 +192,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) { struct pblk_line *line; u64 paddr; - int line_id; #ifdef CONFIG_NVM_PBLK_DEBUG /* Callers must ensure that the ppa points to a device address */ @@ -200,8 +199,7 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) BUG_ON(pblk_ppa_empty(ppa)); #endif - line_id = pblk_ppa_to_line(ppa); - line = &pblk->lines[line_id]; + line = pblk_ppa_to_line(pblk, ppa); paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); __pblk_map_invalidate(pblk, line, paddr); @@ -1430,7 +1428,7 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) { struct pblk_line *line; - line = &pblk->lines[pblk_ppa_to_line(ppa)]; + line = pblk_ppa_to_line(pblk, ppa); kref_put(&line->ref, pblk_line_put_wq); } @@ -1688,7 +1686,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) struct nvm_geo *geo = &dev->geo; pblk_err(pblk, "could not async erase line:%d,blk:%d\n", - pblk_ppa_to_line(ppa), + pblk_ppa_to_line_id(ppa), pblk_ppa_to_pos(geo, ppa)); } @@ -2059,8 +2057,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, /* If the L2P entry maps to a line, the reference is valid */ if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { - int line_id = pblk_ppa_to_line(ppa); - struct pblk_line *line = &pblk->lines[line_id]; + struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); kref_get(&line->ref); } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index f6eec0212dfc..a7648e12f54f 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -225,7 +225,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, entry->cacheline); - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index ad1d7713bbda..49744caaa300 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -252,9 +252,9 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { - int line_id = pblk_ppa_to_line(rqd->ppa_list[i]); - struct pblk_line *line = &pblk->lines[line_id]; + struct pblk_line *line; + line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); kref_put(&line->ref, pblk_line_put); meta_list[hole].lba = lba_list_media[i]; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index df99c45778d4..c23b65aaa27b 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -113,7 +113,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) u64 paddr; int done = 0; - line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + line = pblk_ppa_to_line(pblk, *ppa); spin_lock(&line->lock); while (!done) { @@ -171,7 +171,7 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, /* Decrese the reference count to the line as we will * re-map these entries */ - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; + line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); pos = (pos + 1) & (rb->nr_entries - 1); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index f95fe75fef6e..af9477b7e803 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -984,11 +984,17 @@ static inline int pblk_pad_distance(struct pblk *pblk) return geo->mw_cunits * geo->all_luns * geo->ws_opt; } -static inline int pblk_ppa_to_line(struct ppa_addr p) +static inline int pblk_ppa_to_line_id(struct ppa_addr p) { return p.a.blk; } +static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk, + struct ppa_addr p) +{ + return &pblk->lines[pblk_ppa_to_line_id(p)]; +} + static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) { return p.a.lun * geo->num_ch + p.a.ch; @@ -1039,7 +1045,7 @@ static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk, { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_line *line = &pblk->lines[pblk_ppa_to_line(p)]; + struct pblk_line *line = pblk_ppa_to_line(pblk, p); int pos = pblk_ppa_to_pos(geo, p); return &line->chks[pos]; @@ -1371,8 +1377,7 @@ static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) int i; for (i = 0; i < rqd->nr_ppas; i++) { - ppa = ppa_list[i]; - line = &pblk->lines[pblk_ppa_to_line(ppa)]; + line = pblk_ppa_to_line(pblk, ppa_list[i]); spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_OPEN) { -- cgit From 2e696f9093d0778d66d486c42577d651b60534ab Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:43 +0200 Subject: lightnvm: pblk: fix comment typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix comment typo Decrese -> Decrease Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index c23b65aaa27b..625ed5a3a020 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -168,7 +168,7 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, /* Release flags on write context. Protect from writes */ smp_store_release(&w_ctx->flags, flags); - /* Decrese the reference count to the line as we will + /* Decrease the reference count to the line as we will * re-map these entries */ line = pblk_ppa_to_line(pblk, w_ctx->ppa); -- cgit From 7a7d6f9b48cce82a1a666ec958125f257e5786fc Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:44 +0200 Subject: lightnvm: pblk: remove unused variable. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed unused struct ppa_addr variable. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index af9477b7e803..1865ac1560fa 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1373,7 +1373,6 @@ static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) if (rqd->opcode == NVM_OP_PWRITE) { struct pblk_line *line; - struct ppa_addr ppa; int i; for (i = 0; i < rqd->nr_ppas; i++) { -- cgit From 9cc85bc761f83da41935cdd6edcdb7c122bc90bf Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:45 +0200 Subject: lightnvm: pblk: guarantee emeta on line close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a line is recovered from open chunks, the memory structures for emeta have not necessarily been properly set on line initialization. When closing a line, make sure that emeta is consistent so that the line can be recovered on the fast path on next reboot. Also, remove a couple of empty lines at the end of the function. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 36ac9eff8ebd..a31417682c90 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1775,6 +1775,17 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa)); wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa)); + if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) { + emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); + memcpy(emeta_buf->header.uuid, pblk->instance_uuid, 16); + emeta_buf->header.id = cpu_to_le32(line->id); + emeta_buf->header.type = cpu_to_le16(line->type); + emeta_buf->header.version_major = EMETA_VERSION_MAJOR; + emeta_buf->header.version_minor = EMETA_VERSION_MINOR; + emeta_buf->header.crc = cpu_to_le32( + pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); + } + emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); @@ -1792,8 +1803,6 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) spin_unlock(&l_mg->close_lock); pblk_line_should_sync_meta(pblk); - - } static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) -- cgit From d68a9344041b6dd304ff382d0c7805869f09944f Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:46 +0200 Subject: lightnvm: introduce nvm_rq_to_ppa_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a number of places in the lightnvm subsystem where the user iterates over the ppa list. Before iterating, the user must know if it is a single or multiple LBAs due to vector commands using either the nvm_rq ->ppa_addr or ->ppa_list fields on command submission, which leads to open-coding the if/else statement. Instead of having multiple if/else's, move it into a function that can be called by its users. A nice side effect of this cleanup is that this patch fixes up a bunch of cases where we don't consider the single-ppa case in pblk. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 14 ++++---------- drivers/lightnvm/pblk-map.c | 10 ++++++---- drivers/lightnvm/pblk-read.c | 11 ++++------- drivers/lightnvm/pblk-recovery.c | 9 ++++++--- drivers/lightnvm/pblk-write.c | 18 ++++++++---------- drivers/lightnvm/pblk.h | 4 +--- include/linux/lightnvm.h | 5 +++++ 7 files changed, 34 insertions(+), 37 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 8df188e0767e..efb976a863d2 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -603,22 +603,16 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - if (rqd->nr_ppas == 1) { - nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1); - return; - } + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas); + nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas); } static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - if (rqd->nr_ppas == 1) { - nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1); - return; - } + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas); + nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas); } int nvm_register_tgt_type(struct nvm_tgt_type *tt) diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 953ca31dda68..dc0efb852475 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -88,13 +88,14 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned int off) { struct pblk_sec_meta *meta_list = rqd->meta_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); unsigned int map_secs; int min = pblk->min_write_pgs; int i; for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], lun_bitmap, &meta_list[i], map_secs)) { bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); @@ -112,6 +113,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; struct pblk_sec_meta *meta_list = rqd->meta_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_line *e_line, *d_line; unsigned int map_secs; int min = pblk->min_write_pgs; @@ -119,14 +121,14 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], + if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], lun_bitmap, &meta_list[i], map_secs)) { bio_put(rqd->bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); pblk_pipeline_stop(pblk); } - erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]); + erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); /* line can change after page map. We might also be writing the * last line. @@ -141,7 +143,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, set_bit(erase_lun, e_line->erase_bitmap); atomic_dec(&e_line->left_eblks); - *erase_ppa = rqd->ppa_list[i]; + *erase_ppa = ppa_list[i]; erase_ppa->a.blk = e_line->id; spin_unlock(&e_line->lock); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 49744caaa300..d3ff8c3e9010 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -116,10 +116,9 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, if (lba != blba + i) { #ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *p; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr; - print_ppa(pblk, p, "seq", i); + print_ppa(pblk, &ppa_list[i], "seq", i); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", lba, (u64)blba + i); @@ -148,11 +147,9 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, if (lba != meta_lba) { #ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *p; - int nr_ppas = rqd->nr_ppas; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr; - print_ppa(pblk, p, "seq", j); + print_ppa(pblk, &ppa_list[j], "seq", j); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", lba, meta_lba); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index cf629ab016ba..3bd2b6b0a359 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -161,6 +161,8 @@ next_read_rq: if (pblk_io_aligned(pblk, rq_ppas)) rqd->is_seq = 1; + ppa_list = nvm_rq_to_ppa_list(rqd); + for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; @@ -175,7 +177,7 @@ next_read_rq: } for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) - rqd->ppa_list[i] = + ppa_list[i] = addr_to_gen_ppa(pblk, r_ptr_int, line->id); } @@ -202,7 +204,7 @@ next_read_rq: if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) continue; - pblk_update_map(pblk, lba, rqd->ppa_list[i]); + pblk_update_map(pblk, lba, ppa_list[i]); } left_ppas -= rq_ppas; @@ -221,10 +223,11 @@ static void pblk_recov_complete(struct kref *ref) static void pblk_end_io_recov(struct nvm_rq *rqd) { + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_pad_rq *pad_rq = rqd->private; struct pblk *pblk = pad_rq->pblk; - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_page(pblk, ppa_list, rqd->nr_ppas); pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 625ed5a3a020..c20bb7f6d703 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -208,15 +208,10 @@ static void pblk_submit_rec(struct work_struct *work) struct pblk *pblk = recovery->pblk; struct nvm_rq *rqd = recovery->rqd; struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct ppa_addr *ppa_list; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); pblk_log_write_err(pblk, rqd); - if (rqd->nr_ppas == 1) - ppa_list = &rqd->ppa_addr; - else - ppa_list = rqd->ppa_list; - pblk_map_remaining(pblk, ppa_list); pblk_queue_resubmit(pblk, c_ctx); @@ -273,9 +268,10 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); struct pblk_line *line = m_ctx->private; struct pblk_emeta *emeta = line->emeta; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); int sync; - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_page(pblk, ppa_list, rqd->nr_ppas); if (rqd->error) { pblk_log_write_err(pblk, rqd); @@ -375,6 +371,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct pblk_emeta *emeta = meta_line->emeta; + struct ppa_addr *ppa_list; struct pblk_g_ctx *m_ctx; struct bio *bio; struct nvm_rq *rqd; @@ -409,12 +406,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) if (ret) goto fail_free_bio; + ppa_list = nvm_rq_to_ppa_list(rqd); for (i = 0; i < rqd->nr_ppas; ) { spin_lock(&meta_line->lock); paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas); spin_unlock(&meta_line->lock); for (j = 0; j < rq_ppas; j++, i++, paddr++) - rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); } spin_lock(&l_mg->close_lock); @@ -423,7 +421,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) list_del(&meta_line->list); spin_unlock(&l_mg->close_lock); - pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_down_page(pblk, ppa_list, rqd->nr_ppas); ret = pblk_submit_io(pblk, rqd); if (ret) { @@ -434,7 +432,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) return NVM_IO_OK; fail_rollback: - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_page(pblk, ppa_list, rqd->nr_ppas); spin_lock(&l_mg->close_lock); pblk_dealloc_page(pblk, meta_line, rq_ppas); list_add(&meta_line->list, &meta_line->list); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 1865ac1560fa..60c509a00574 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1362,9 +1362,7 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa_list; - - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { WARN_ON(1); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 77743a02ec0d..50ac5b21297c 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -320,6 +320,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) return rqdata + 1; } +static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd) +{ + return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; +} + enum { NVM_BLK_ST_FREE = 0x1, /* Free block */ NVM_BLK_ST_TGT = 0x2, /* Block in use by target */ -- cgit From 53d82db693fe1fd1926066583fd24285fb5aae16 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:47 +0200 Subject: lightnvm: pblk: allocate line map bitmaps using a mempool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Line map bitmap allocations are fairly large and can fail. Allocation failures are fatal to pblk, stopping the write pipeline. To avoid this, allocate the bitmaps using a mempool instead. Mempool allocations never fail if called from a process context, and pblk *should* only allocate map bitmaps in process context, but keep the failure handling for robustness sake. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 22 +++++++++++++++------- drivers/lightnvm/pblk-init.c | 18 ++++++++++++++++++ drivers/lightnvm/pblk-recovery.c | 2 +- drivers/lightnvm/pblk.h | 4 ++++ 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index a31417682c90..e1207a4f9d54 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1049,15 +1049,18 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; - line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); if (!line->map_bitmap) return -ENOMEM; + memset(line->map_bitmap, 0, lm->sec_bitmap_len); + /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); if (!line->invalid_bitmap) { - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; return -ENOMEM; } @@ -1243,7 +1246,9 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) { - kfree(line->map_bitmap); + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1261,8 +1266,11 @@ static void pblk_line_reinit(struct pblk_line *line) void pblk_line_free(struct pblk_line *line) { - kfree(line->map_bitmap); - kfree(line->invalid_bitmap); + struct pblk *pblk = line->pblk; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + + mempool_free(line->map_bitmap, l_mg->bitmap_pool); + mempool_free(line->invalid_bitmap, l_mg->bitmap_pool); pblk_line_reinit(line); } @@ -1741,7 +1749,7 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) list_add_tail(&line->list, move_list); - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 8adc8ac8b03c..76a4a271b9cf 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -498,6 +498,9 @@ static void pblk_line_mg_free(struct pblk *pblk) pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } + + mempool_destroy(l_mg->bitmap_pool); + kmem_cache_destroy(l_mg->bitmap_cache); } static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, @@ -797,6 +800,17 @@ static int pblk_line_mg_init(struct pblk *pblk) goto fail_free_smeta; } + l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap", + lm->sec_bitmap_len, 0, 0, NULL); + if (!l_mg->bitmap_cache) + goto fail_free_smeta; + + /* the bitmap pool is used for both valid and map bitmaps */ + l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2, + l_mg->bitmap_cache); + if (!l_mg->bitmap_pool) + goto fail_destroy_bitmap_cache; + /* emeta allocates three different buffers for managing metadata with * in-memory and in-media layouts */ @@ -849,6 +863,10 @@ fail_free_emeta: kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } + + mempool_destroy(l_mg->bitmap_pool); +fail_destroy_bitmap_cache: + kmem_cache_destroy(l_mg->bitmap_cache); fail_free_smeta: for (i = 0; i < PBLK_DATA_LINES; i++) kfree(l_mg->sline_meta[i]); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 3bd2b6b0a359..eea901d7cebc 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -939,7 +939,7 @@ next: list_move_tail(&line->list, move_list); spin_unlock(&l_mg->gc_lock); - kfree(line->map_bitmap); + mempool_free(line->map_bitmap, l_mg->bitmap_pool); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 60c509a00574..9068b158de22 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -530,6 +530,10 @@ struct pblk_line_mgmt { struct pblk_emeta *eline_meta[PBLK_DATA_LINES]; unsigned long meta_bitmap; + /* Cache and mempool for map/invalid bitmaps */ + struct kmem_cache *bitmap_cache; + mempool_t *bitmap_pool; + /* Helpers for fast bitmap calculations */ unsigned long *bb_template; unsigned long *bb_aux; -- cgit From e99e802fc640b273cd070a9342a21635aa8a1f77 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:48 +0200 Subject: lightnvm: pblk: remove unused parameters in pblk_up_rq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameters nr_ppas and ppa_list are not used, so remove them. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 3 +-- drivers/lightnvm/pblk-write.c | 5 ++--- drivers/lightnvm/pblk.h | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index e1207a4f9d54..bb1a7cc24cbb 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1929,8 +1929,7 @@ void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) up(&rlun->wr_sem); } -void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, - unsigned long *lun_bitmap) +void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index c20bb7f6d703..cd579b440b56 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -81,8 +81,7 @@ static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); #endif - - pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + pblk_up_rq(pblk, c_ctx->lun_bitmap); pos = pblk_rb_sync_init(&pblk->rwb, &flags); if (pos == c_ctx->sentry) { @@ -215,7 +214,7 @@ static void pblk_submit_rec(struct work_struct *work) pblk_map_remaining(pblk, ppa_list); pblk_queue_resubmit(pblk, c_ctx); - pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap); + pblk_up_rq(pblk, c_ctx->lun_bitmap); if (c_ctx->nr_padded) pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, c_ctx->nr_padded); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 9068b158de22..0ca67e8f99d5 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -827,8 +827,7 @@ void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, unsigned long *lun_bitmap); void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); -void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, - unsigned long *lun_bitmap); +void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, -- cgit From ea1d24bc3ac0a0a544db0686d1ff8bb0c31ad683 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:49 +0200 Subject: lightnvm: pblk: fix up prints in pblk_read_check_rand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prefix when printing ppas in pblk_read_check_rand should be "rnd" not "seq", so fix this so we can differentiate between lba missmatches in random and sequential reads. Also change the print order so we align with pblk_read_check_seq, printing read lba first. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-read.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index d3ff8c3e9010..829e92857289 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -149,10 +149,10 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, #ifdef CONFIG_NVM_PBLK_DEBUG struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - print_ppa(pblk, &ppa_list[j], "seq", j); + print_ppa(pblk, &ppa_list[j], "rnd", j); #endif pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - lba, meta_lba); + meta_lba, lba); WARN_ON(1); } -- cgit From 765462fa4c4d0fd3eb718f2ba14cb04c35219854 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:50 +0200 Subject: lightnvm: pblk: fix write amplificiation calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user data counter exceeds 32 bits, the write amplification calculation does not provide the right value. Fix this by using div64_u64 in stead of div64. Fixes: 76758390f83e ("lightnvm: pblk: export write amplification counters to sysfs") Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 9fc3dfa168b4..e56eeeeb914e 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -337,7 +337,6 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, { int sz; - sz = snprintf(page, PAGE_SIZE, "user:%lld gc:%lld pad:%lld WA:", user, gc, pad); @@ -349,7 +348,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, u32 wa_frac; wa_int = (user + gc + pad) * 100000; - wa_int = div_u64(wa_int, user); + wa_int = div64_u64(wa_int, user); wa_int = div_u64_rem(wa_int, 100000, &wa_frac); sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", -- cgit From 43241cfe470850a590913a86e590fd4ad9939d59 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:51 +0200 Subject: lightnvm: pblk: remove debug from pblk_[down/up]_page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the debug only iteration within __pblk_down_page, which then allows us to reduce the number of arguments down to pblk and the parallel unit from the functions that calls it. Simplifying the callers logic considerably. Also, rename the functions pblk_[down/up]_page to pblk_[down/up]_chunk, to communicate that it manages the write pointer of the chunk. Note that it also protects the parallel unit such that at most one chunk is active per parallel unit. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 34 +++++++++------------------------- drivers/lightnvm/pblk-map.c | 2 +- drivers/lightnvm/pblk-recovery.c | 6 +++--- drivers/lightnvm/pblk-write.c | 6 +++--- drivers/lightnvm/pblk.h | 6 +++--- 5 files changed, 19 insertions(+), 35 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index bb1a7cc24cbb..968597d10cc2 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1861,8 +1861,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, queue_work(wq, &line_ws->ws); } -static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, - int nr_ppas, int pos) +static void __pblk_down_chunk(struct pblk *pblk, int pos) { struct pblk_lun *rlun = &pblk->luns[pos]; int ret; @@ -1871,13 +1870,6 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, * Only send one inflight I/O per LUN. Since we map at a page * granurality, all ppas in the I/O will map to the same LUN */ -#ifdef CONFIG_NVM_PBLK_DEBUG - int i; - - for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || - ppa_list[0].a.ch != ppa_list[i].a.ch); -#endif ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); if (ret == -ETIME || ret == -EINTR) @@ -1885,21 +1877,21 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, -ret); } -void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) +void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); + int pos = pblk_ppa_to_pos(geo, ppa); - __pblk_down_page(pblk, ppa_list, nr_ppas, pos); + __pblk_down_chunk(pblk, pos); } -void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, +void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); + int pos = pblk_ppa_to_pos(geo, ppa); /* If the LUN has been locked for this same request, do no attempt to * lock it again @@ -1907,23 +1899,15 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, if (test_and_set_bit(pos, lun_bitmap)) return; - __pblk_down_page(pblk, ppa_list, nr_ppas, pos); + __pblk_down_chunk(pblk, pos); } -void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) +void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_lun *rlun; - int pos = pblk_ppa_to_pos(geo, ppa_list[0]); - -#ifdef CONFIG_NVM_PBLK_DEBUG - int i; - - for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || - ppa_list[0].a.ch != ppa_list[i].a.ch); -#endif + int pos = pblk_ppa_to_pos(geo, ppa); rlun = &pblk->luns[pos]; up(&rlun->wr_sem); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index dc0efb852475..ff677ca6e4e1 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -79,7 +79,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, } } - pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); + pblk_down_rq(pblk, ppa_list[0], lun_bitmap); return 0; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index eea901d7cebc..cbcc0828517e 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -227,7 +227,7 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) struct pblk_pad_rq *pad_rq = rqd->private; struct pblk *pblk = pad_rq->pblk; - pblk_up_page(pblk, ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); @@ -339,12 +339,12 @@ next_pad_rq: } kref_get(&pad_rq->ref); - pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_down_chunk(pblk, rqd->ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); - pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, rqd->ppa_list[0]); goto fail_free_bio; } diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index cd579b440b56..674ba4d1a9f4 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -270,7 +270,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); int sync; - pblk_up_page(pblk, ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); if (rqd->error) { pblk_log_write_err(pblk, rqd); @@ -420,7 +420,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) list_del(&meta_line->list); spin_unlock(&l_mg->close_lock); - pblk_down_page(pblk, ppa_list, rqd->nr_ppas); + pblk_down_chunk(pblk, ppa_list[0]); ret = pblk_submit_io(pblk, rqd); if (ret) { @@ -431,7 +431,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) return NVM_IO_OK; fail_rollback: - pblk_up_page(pblk, ppa_list, rqd->nr_ppas); + pblk_up_chunk(pblk, ppa_list[0]); spin_lock(&l_mg->close_lock); pblk_dealloc_page(pblk, meta_line, rq_ppas); list_add(&meta_line->list, &meta_line->list); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 0ca67e8f99d5..429347bcd1fa 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -823,10 +823,10 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, unsigned long secs_to_flush); -void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); -void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, +void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap); -void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); +void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); +void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa); void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); -- cgit From 4c44abf43d00d81f5c648f376c436a9405980efc Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:52 +0200 Subject: lightnvm: pblk: add trace events for chunk states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce trace points for tracking chunk states in pblk - this is useful for inspection of the entire state of the drive, and real handy for both fw and pblk debugging. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 35 +++++++++++++++++++++++++++- drivers/lightnvm/pblk-init.c | 4 ++++ drivers/lightnvm/pblk-trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++ drivers/lightnvm/pblk-write.c | 10 ++++++-- drivers/lightnvm/pblk.h | 8 +++++++ 5 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 drivers/lightnvm/pblk-trace.h diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 968597d10cc2..8a5158607467 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -16,7 +16,10 @@ * */ +#define CREATE_TRACE_POINTS + #include "pblk.h" +#include "pblk-trace.h" static void pblk_line_mark_bb(struct work_struct *work) { @@ -93,6 +96,9 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) chunk->state = NVM_CHK_ST_FREE; } + trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr, + chunk->state); + atomic_dec(&pblk->inflight_io); } @@ -477,9 +483,30 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) return nvm_submit_io(dev, rqd); } +void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); + + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + struct ppa_addr *ppa = &ppa_list[i]; + struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa); + u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa); + + if (caddr == 0) + trace_pblk_chunk_state(pblk_disk_name(pblk), + ppa, NVM_CHK_ST_OPEN); + else if (caddr == chunk->cnlb) + trace_pblk_chunk_state(pblk_disk_name(pblk), + ppa, NVM_CHK_ST_CLOSED); + } +} + int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) { struct nvm_tgt_dev *dev = pblk->dev; + int ret; atomic_inc(&pblk->inflight_io); @@ -488,7 +515,13 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) return NVM_IO_ERR; #endif - return nvm_submit_io_sync(dev, rqd); + ret = nvm_submit_io_sync(dev, rqd); + + if (trace_pblk_chunk_state_enabled() && !ret && + rqd->opcode == NVM_OP_PWRITE) + pblk_check_chunk_state_update(pblk, rqd); + + return ret; } static void pblk_bio_map_addr_endio(struct bio *bio) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 76a4a271b9cf..4f2d9b502028 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -19,6 +19,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" static unsigned int write_buffer_size; @@ -664,6 +665,9 @@ static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, chunk->cnlb = chunk_meta->cnlb; chunk->wp = chunk_meta->wp; + trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa, + chunk->state); + if (chunk->type & NVM_CHK_TP_SZ_SPEC) { WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); continue; diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h new file mode 100644 index 000000000000..d985b729428f --- /dev/null +++ b/drivers/lightnvm/pblk-trace.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pblk + +#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PBLK_H + +#include + +struct ppa_addr; + +#define show_chunk_flags(state) __print_flags(state, "", \ + { NVM_CHK_ST_FREE, "FREE", }, \ + { NVM_CHK_ST_CLOSED, "CLOSED", }, \ + { NVM_CHK_ST_OPEN, "OPEN", }, \ + { NVM_CHK_ST_OFFLINE, "OFFLINE", }) + +TRACE_EVENT(pblk_chunk_state, + + TP_PROTO(const char *name, struct ppa_addr *ppa, int state), + + TP_ARGS(name, ppa, state), + + TP_STRUCT__entry( + __string(name, name) + __field(u64, ppa) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->ppa = ppa->ppa; + __entry->state = state; + ), + + TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), + show_chunk_flags((int)__entry->state)) + +); + + +#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../drivers/lightnvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE pblk-trace +#include diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 674ba4d1a9f4..61fe22ccc7a1 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -16,6 +16,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx) @@ -251,11 +252,13 @@ static void pblk_end_io_write(struct nvm_rq *rqd) if (rqd->error) { pblk_end_w_fail(pblk, rqd); return; - } + } else { + if (trace_pblk_chunk_state_enabled()) + pblk_check_chunk_state_update(pblk, rqd); #ifdef CONFIG_NVM_PBLK_DEBUG - else WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); #endif + } pblk_complete_write(pblk, rqd, c_ctx); atomic_dec(&pblk->inflight_io); @@ -276,6 +279,9 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) pblk_log_write_err(pblk, rqd); pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id); line->w_err_gc->has_write_err = 1; + } else { + if (trace_pblk_chunk_state_enabled()) + pblk_check_chunk_state_update(pblk, rqd); } sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 429347bcd1fa..b2746099ca1d 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -785,6 +785,7 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); +void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd); struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, unsigned int nr_secs, unsigned int len, int alloc_type, gfp_t gfp_mask); @@ -1427,4 +1428,11 @@ static inline void pblk_setup_uuid(struct pblk *pblk) uuid_le_gen(&uuid); memcpy(pblk->instance_uuid, uuid.b, 16); } + +static inline char *pblk_disk_name(struct pblk *pblk) +{ + struct gendisk *disk = pblk->disk; + + return disk->disk_name; +} #endif /* PBLK_H_ */ -- cgit From f29372322e415999f429d3e3883b865c87860b46 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:53 +0200 Subject: lightnvm: pblk: add trace events for line state changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add trace events for logging for line state changes. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 19 ++++++++++++++++++- drivers/lightnvm/pblk-gc.c | 8 ++++++++ drivers/lightnvm/pblk-init.c | 3 +++ drivers/lightnvm/pblk-recovery.c | 6 ++++++ drivers/lightnvm/pblk-trace.h | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 8a5158607467..ef52c1647f88 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -413,6 +413,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) } } else { line->state = PBLK_LINESTATE_CORRUPT; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); + line->gc_group = PBLK_LINEGC_NONE; move_list = &l_mg->corrupt_list; pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", @@ -1019,6 +1022,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, spin_lock(&l_mg->free_lock); spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1166,6 +1171,8 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1218,6 +1225,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) if (line->state == PBLK_LINESTATE_NEW) { blk_to_erase = pblk_prepare_new_line(pblk, line); line->state = PBLK_LINESTATE_FREE; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } else { blk_to_erase = blk_in_line; } @@ -1235,6 +1244,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) } line->state = PBLK_LINESTATE_OPEN; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); atomic_set(&line->left_eblks, blk_to_erase); atomic_set(&line->left_seblks, blk_to_erase); @@ -1331,6 +1342,8 @@ retry: if (unlikely(bit >= lm->blk_per_line)) { spin_lock(&line->lock); line->state = PBLK_LINESTATE_BAD; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_add_tail(&line->list, &l_mg->bad_list); @@ -1650,6 +1663,8 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_FREE; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); line->gc_group = PBLK_LINEGC_NONE; pblk_line_free(line); @@ -1779,7 +1794,6 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) WARN_ON(line->state != PBLK_LINESTATE_OPEN); line->state = PBLK_LINESTATE_CLOSED; move_list = pblk_line_gc_list(pblk, line); - list_add_tail(&line->list, move_list); mempool_free(line->map_bitmap, l_mg->bitmap_pool); @@ -1798,6 +1812,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 157c2567c9e8..b841d84c4342 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -16,8 +16,10 @@ */ #include "pblk.h" +#include "pblk-trace.h" #include + static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) { if (gc_rq->data) @@ -64,6 +66,8 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); line->state = PBLK_LINESTATE_CLOSED; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); @@ -405,6 +409,8 @@ void pblk_gc_free_full_lines(struct pblk *pblk) spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_del(&line->list); @@ -451,6 +457,8 @@ next_gc_group: spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); spin_unlock(&line->lock); list_del(&line->list); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 4f2d9b502028..fca37da16266 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -985,6 +985,9 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); + + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); } if (!nr_free_chks) { diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index cbcc0828517e..0322ab915ddc 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -15,6 +15,7 @@ */ #include "pblk.h" +#include "pblk-trace.h" int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) { @@ -932,6 +933,8 @@ next: spin_lock(&line->lock); line->state = PBLK_LINESTATE_CLOSED; + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); move_list = pblk_line_gc_list(pblk, line); spin_unlock(&line->lock); @@ -947,6 +950,9 @@ next: if (open_lines > 1) pblk_err(pblk, "failed to recover L2P\n"); + trace_pblk_line_state(pblk_disk_name(pblk), line->id, + line->state); + open_lines++; line->meta_line = meta_line; data_line = line; diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h index d985b729428f..f9aa9e25e428 100644 --- a/drivers/lightnvm/pblk-trace.h +++ b/drivers/lightnvm/pblk-trace.h @@ -15,6 +15,16 @@ struct ppa_addr; { NVM_CHK_ST_OPEN, "OPEN", }, \ { NVM_CHK_ST_OFFLINE, "OFFLINE", }) +#define show_line_state(state) __print_symbolic(state, \ + { PBLK_LINESTATE_NEW, "NEW", }, \ + { PBLK_LINESTATE_FREE, "FREE", }, \ + { PBLK_LINESTATE_OPEN, "OPEN", }, \ + { PBLK_LINESTATE_CLOSED, "CLOSED", }, \ + { PBLK_LINESTATE_GC, "GC", }, \ + { PBLK_LINESTATE_BAD, "BAD", }, \ + { PBLK_LINESTATE_CORRUPT, "CORRUPT" }) + + TRACE_EVENT(pblk_chunk_state, TP_PROTO(const char *name, struct ppa_addr *ppa, int state), @@ -41,6 +51,29 @@ TRACE_EVENT(pblk_chunk_state, ); +TRACE_EVENT(pblk_line_state, + + TP_PROTO(const char *name, int line, int state), + + TP_ARGS(name, line, state), + + TP_STRUCT__entry( + __string(name, name) + __field(int, line) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->line = line; + __entry->state = state; + ), + + TP_printk("dev=%s line=%d state=%s", __get_str(name), + (int)__entry->line, + show_line_state((int)__entry->state)) + +); #endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ -- cgit From 1b0dd0bf3dc8e9fb974d61139c182be15e43caf9 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:54 +0200 Subject: lightnvm: pblk: add trace events for pblk state changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add trace events for tracking pblk state changes. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 3 +++ drivers/lightnvm/pblk-init.c | 1 + drivers/lightnvm/pblk-trace.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index ef52c1647f88..c482ef7dd4b5 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1503,6 +1503,7 @@ static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) pblk_set_space_limit(pblk); pblk->state = PBLK_STATE_STOPPING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); } static void pblk_line_close_meta_sync(struct pblk *pblk) @@ -1552,6 +1553,7 @@ void __pblk_pipeline_flush(struct pblk *pblk) return; } pblk->state = PBLK_STATE_RECOVERING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); spin_unlock(&l_mg->free_lock); pblk_flush_writer(pblk); @@ -1573,6 +1575,7 @@ void __pblk_pipeline_stop(struct pblk *pblk) spin_lock(&l_mg->free_lock); pblk->state = PBLK_STATE_STOPPED; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); l_mg->data_line = NULL; l_mg->data_next = NULL; spin_unlock(&l_mg->free_lock); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index fca37da16266..9aebdee8e4c9 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1109,6 +1109,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + trace_pblk_state(pblk_disk_name(pblk), pblk->state); pblk->gc.gc_enabled = 0; if (!(geo->version == NVM_OCSSD_SPEC_12 || diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h index f9aa9e25e428..c171d0450c07 100644 --- a/drivers/lightnvm/pblk-trace.h +++ b/drivers/lightnvm/pblk-trace.h @@ -25,6 +25,13 @@ struct ppa_addr; { PBLK_LINESTATE_CORRUPT, "CORRUPT" }) +#define show_pblk_state(state) __print_symbolic(state, \ + { PBLK_STATE_RUNNING, "RUNNING", }, \ + { PBLK_STATE_STOPPING, "STOPPING", }, \ + { PBLK_STATE_RECOVERING, "RECOVERING", }, \ + { PBLK_STATE_STOPPED, "STOPPED" }) + + TRACE_EVENT(pblk_chunk_state, TP_PROTO(const char *name, struct ppa_addr *ppa, int state), @@ -75,6 +82,27 @@ TRACE_EVENT(pblk_line_state, ); +TRACE_EVENT(pblk_state, + + TP_PROTO(const char *name, int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string(name, name) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("dev=%s state=%s", __get_str(name), + show_pblk_state((int)__entry->state)) + +); + #endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ -- cgit From 4209c31c0c5cc0787e63b977dacc30e37378eb28 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:55 +0200 Subject: lightnvm: pblk: add tracing for chunk resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trace state of chunk resets. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 12 ++++++++++++ drivers/lightnvm/pblk-trace.h | 31 +++++++++++++++++++++++++++++++ drivers/lightnvm/pblk.h | 6 ++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index c482ef7dd4b5..84f3b4912b92 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -90,9 +90,15 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) atomic_dec(&line->left_seblks); if (rqd->error) { + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED); + chunk->state = NVM_CHK_ST_OFFLINE; pblk_mark_bb(pblk, line, rqd->ppa_addr); } else { + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE); + chunk->state = NVM_CHK_ST_FREE; } @@ -923,6 +929,9 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) struct nvm_rq rqd = {NULL}; int ret; + trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa, + PBLK_CHUNK_RESET_START); + pblk_setup_e_rq(pblk, &rqd, ppa); /* The write thread schedules erases so that it minimizes disturbances @@ -1736,6 +1745,9 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) rqd->end_io = pblk_end_io_erase; rqd->private = pblk; + trace_pblk_chunk_reset(pblk_disk_name(pblk), + &ppa, PBLK_CHUNK_RESET_START); + /* The write thread schedules erases so that it minimizes disturbances * with writes. Thus, there is no need to take the LUN semaphore. */ diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h index c171d0450c07..679e5c458ca6 100644 --- a/drivers/lightnvm/pblk-trace.h +++ b/drivers/lightnvm/pblk-trace.h @@ -31,6 +31,37 @@ struct ppa_addr; { PBLK_STATE_RECOVERING, "RECOVERING", }, \ { PBLK_STATE_STOPPED, "STOPPED" }) +#define show_chunk_erase_state(state) __print_symbolic(state, \ + { PBLK_CHUNK_RESET_START, "START", }, \ + { PBLK_CHUNK_RESET_DONE, "OK", }, \ + { PBLK_CHUNK_RESET_FAILED, "FAILED" }) + + +TRACE_EVENT(pblk_chunk_reset, + + TP_PROTO(const char *name, struct ppa_addr *ppa, int state), + + TP_ARGS(name, ppa, state), + + TP_STRUCT__entry( + __string(name, name) + __field(u64, ppa) + __field(int, state); + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->ppa = ppa->ppa; + __entry->state = state; + ), + + TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), + (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), + show_chunk_erase_state((int)__entry->state)) + +); TRACE_EVENT(pblk_chunk_state, diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index b2746099ca1d..7f4e46dfb0d7 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -79,6 +79,12 @@ enum { PBLK_BLK_ST_CLOSED = 0x2, }; +enum { + PBLK_CHUNK_RESET_START, + PBLK_CHUNK_RESET_DONE, + PBLK_CHUNK_RESET_FAILED, +}; + struct pblk_sec_meta { u64 reserved; __le64 lba; -- cgit From 7f985f9a691dc25ddcfc9dc7ff21199f5ee1569c Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:56 +0200 Subject: lightnvm: move ppa transformations to core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuing the effort of moving 1.2 and 2.0 specific code to core, move 64_to_32 and 32_to_64 ppa helpers from pblk to core. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk.h | 78 +++------------------------------------------ include/linux/lightnvm.h | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 74 deletions(-) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 7f4e46dfb0d7..a2cc581951ef 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1102,86 +1102,16 @@ static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) { - struct ppa_addr ppa64; - - ppa64.ppa = 0; - - if (ppa32 == -1) { - ppa64.ppa = ADDR_EMPTY; - } else if (ppa32 & (1U << 31)) { - ppa64.c.line = ppa32 & ((~0U) >> 1); - ppa64.c.is_cached = 1; - } else { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = - (struct nvm_addrf_12 *)&pblk->addrf; - - ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> - ppaf->ch_offset; - ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> - ppaf->lun_offset; - ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> - ppaf->blk_offset; - ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> - ppaf->pg_offset; - ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> - ppaf->pln_offset; - ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> - ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &pblk->addrf; - - ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> - lbaf->ch_offset; - ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> - lbaf->lun_offset; - ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> - lbaf->chk_offset; - ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> - lbaf->sec_offset; - } - } + struct nvm_tgt_dev *dev = pblk->dev; - return ppa64; + return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32); } static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) { - u32 ppa32 = 0; - - if (ppa64.ppa == ADDR_EMPTY) { - ppa32 = ~0U; - } else if (ppa64.c.is_cached) { - ppa32 |= ppa64.c.line; - ppa32 |= 1U << 31; - } else { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = - (struct nvm_addrf_12 *)&pblk->addrf; - - ppa32 |= ppa64.g.ch << ppaf->ch_offset; - ppa32 |= ppa64.g.lun << ppaf->lun_offset; - ppa32 |= ppa64.g.blk << ppaf->blk_offset; - ppa32 |= ppa64.g.pg << ppaf->pg_offset; - ppa32 |= ppa64.g.pl << ppaf->pln_offset; - ppa32 |= ppa64.g.sec << ppaf->sec_offset; - } else { - struct nvm_addrf *lbaf = &pblk->addrf; - - ppa32 |= ppa64.m.grp << lbaf->ch_offset; - ppa32 |= ppa64.m.pu << lbaf->lun_offset; - ppa32 |= ppa64.m.chk << lbaf->chk_offset; - ppa32 |= ppa64.m.sec << lbaf->sec_offset; - } - } + struct nvm_tgt_dev *dev = pblk->dev; - return ppa32; + return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64); } static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 50ac5b21297c..eb7300c20f24 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -511,6 +511,89 @@ static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf, return caddr; } +static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev, + void *addrf, u32 ppa32) +{ + struct ppa_addr ppa64; + + ppa64.ppa = 0; + + if (ppa32 == -1) { + ppa64.ppa = ADDR_EMPTY; + } else if (ppa32 & (1U << 31)) { + ppa64.c.line = ppa32 & ((~0U) >> 1); + ppa64.c.is_cached = 1; + } else { + struct nvm_geo *geo = &dev->geo; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = addrf; + + ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> + ppaf->ch_offset; + ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> + ppaf->lun_offset; + ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> + ppaf->blk_offset; + ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> + ppaf->pg_offset; + ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> + ppaf->pln_offset; + ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> + ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = addrf; + + ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> + lbaf->ch_offset; + ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> + lbaf->lun_offset; + ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> + lbaf->chk_offset; + ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> + lbaf->sec_offset; + } + } + + return ppa64; +} + +static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev, + void *addrf, struct ppa_addr ppa64) +{ + u32 ppa32 = 0; + + if (ppa64.ppa == ADDR_EMPTY) { + ppa32 = ~0U; + } else if (ppa64.c.is_cached) { + ppa32 |= ppa64.c.line; + ppa32 |= 1U << 31; + } else { + struct nvm_geo *geo = &dev->geo; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = addrf; + + ppa32 |= ppa64.g.ch << ppaf->ch_offset; + ppa32 |= ppa64.g.lun << ppaf->lun_offset; + ppa32 |= ppa64.g.blk << ppaf->blk_offset; + ppa32 |= ppa64.g.pg << ppaf->pg_offset; + ppa32 |= ppa64.g.pl << ppaf->pln_offset; + ppa32 |= ppa64.g.sec << ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = addrf; + + ppa32 |= ppa64.m.grp << lbaf->ch_offset; + ppa32 |= ppa64.m.pu << lbaf->lun_offset; + ppa32 |= ppa64.m.chk << lbaf->chk_offset; + ppa32 |= ppa64.m.sec << lbaf->sec_offset; + } + } + + return ppa32; +} + + typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, -- cgit From 63dee3a6c39a5bfa5f299ebb314f2e3e0273092b Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:11:57 +0200 Subject: lightnvm: pblk: calculate line pad distance in helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a line is padded, calculate the pad distance directly on the helper being used for this purpose. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-recovery.c | 13 ++++++++++--- drivers/lightnvm/pblk.h | 8 -------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 0322ab915ddc..8036c3eb6372 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -380,6 +380,15 @@ fail_free_pad: return ret; } +static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt; + + return (distance > line->left_msecs) ? line->left_msecs : distance; +} + /* When this function is called, it means that not all upper pages have been * written in a page that contains valid data. In order to recover this data, we * first find the write pointer on the device, then we pad all necessary @@ -495,9 +504,7 @@ next_rq: line->left_msecs += nr_error_bits; bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); - pad_secs = pblk_pad_distance(pblk); - if (pad_secs > line->left_msecs) - pad_secs = line->left_msecs; + pad_secs = pblk_pad_distance(pblk, line); ret = pblk_recov_pad_oob(pblk, line, pad_secs); if (ret) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index a2cc581951ef..d123cff82589 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -986,14 +986,6 @@ static inline int pblk_line_vsc(struct pblk_line *line) return le32_to_cpu(*line->vsc); } -static inline int pblk_pad_distance(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - return geo->mw_cunits * geo->all_luns * geo->ws_opt; -} - static inline int pblk_ppa_to_line_id(struct ppa_addr p) { return p.a.blk; -- cgit From 1864de94ec9d6e1ae4d481212847374df9b1be41 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:58 +0200 Subject: lightnvm: pblk: stop recreating global caches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pblk should not create a set of global caches every time a pblk instance is created. The global caches should be made available only when there is one or more pblk instances. This patch bundles the global caches together with a kref keeping track of whether the caches should be available or not. Also, turn the global pblk lock into a mutex that explicitly protects the caches (as this was the only purpose of the lock). Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 132 ++++++++++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 46 deletions(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 9aebdee8e4c9..fb66bc84d5ca 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -26,9 +26,24 @@ static unsigned int write_buffer_size; module_param(write_buffer_size, uint, 0644); MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); -static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache; -static DECLARE_RWSEM(pblk_lock); +struct pblk_global_caches { + struct kmem_cache *ws; + struct kmem_cache *rec; + struct kmem_cache *g_rq; + struct kmem_cache *w_rq; + + struct kref kref; + + struct mutex mutex; /* Ensures consistency between + * caches and kref + */ +}; + +static struct pblk_global_caches pblk_caches = { + .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex), + .kref = KREF_INIT(0), +}; + struct bio_set pblk_bio_set; static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, @@ -307,53 +322,80 @@ static int pblk_set_addrf(struct pblk *pblk) return 0; } -static int pblk_init_global_caches(struct pblk *pblk) +static int pblk_create_global_caches(void) { - down_write(&pblk_lock); - pblk_ws_cache = kmem_cache_create("pblk_blk_ws", + + pblk_caches.ws = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_ws_cache) { - up_write(&pblk_lock); + if (!pblk_caches.ws) return -ENOMEM; - } - pblk_rec_cache = kmem_cache_create("pblk_rec", + pblk_caches.rec = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); - if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_ws_cache); - up_write(&pblk_lock); - return -ENOMEM; - } + if (!pblk_caches.rec) + goto fail_destroy_ws; - pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, + pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); - if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - up_write(&pblk_lock); - return -ENOMEM; - } + if (!pblk_caches.g_rq) + goto fail_destroy_rec; - pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, + pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); - if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } - up_write(&pblk_lock); + if (!pblk_caches.w_rq) + goto fail_destroy_g_rq; return 0; + +fail_destroy_g_rq: + kmem_cache_destroy(pblk_caches.g_rq); +fail_destroy_rec: + kmem_cache_destroy(pblk_caches.rec); +fail_destroy_ws: + kmem_cache_destroy(pblk_caches.ws); + + return -ENOMEM; +} + +static int pblk_get_global_caches(void) +{ + int ret; + + mutex_lock(&pblk_caches.mutex); + + if (kref_read(&pblk_caches.kref) > 0) { + kref_get(&pblk_caches.kref); + mutex_unlock(&pblk_caches.mutex); + return 0; + } + + ret = pblk_create_global_caches(); + + if (!ret) + kref_get(&pblk_caches.kref); + + mutex_unlock(&pblk_caches.mutex); + + return ret; +} + +static void pblk_destroy_global_caches(struct kref *ref) +{ + struct pblk_global_caches *c; + + c = container_of(ref, struct pblk_global_caches, kref); + + kmem_cache_destroy(c->ws); + kmem_cache_destroy(c->rec); + kmem_cache_destroy(c->g_rq); + kmem_cache_destroy(c->w_rq); } -static void pblk_free_global_caches(struct pblk *pblk) +static void pblk_put_global_caches(void) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); + mutex_lock(&pblk_caches.mutex); + kref_put(&pblk_caches.kref, pblk_destroy_global_caches); + mutex_unlock(&pblk_caches.mutex); } static int pblk_core_init(struct pblk *pblk) @@ -382,7 +424,7 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->pad_dist) return -ENOMEM; - if (pblk_init_global_caches(pblk)) + if (pblk_get_global_caches()) goto fail_free_pad_dist; /* Internal bios can be at most the sectors signaled by the device. */ @@ -391,27 +433,27 @@ static int pblk_core_init(struct pblk *pblk) goto free_global_caches; ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, - pblk_ws_cache); + pblk_caches.ws); if (ret) goto free_page_bio_pool; ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, - pblk_rec_cache); + pblk_caches.rec); if (ret) goto free_gen_ws_pool; ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, - pblk_g_rq_cache); + pblk_caches.g_rq); if (ret) goto free_rec_pool; ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, - pblk_g_rq_cache); + pblk_caches.g_rq); if (ret) goto free_r_rq_pool; ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, - pblk_w_rq_cache); + pblk_caches.w_rq); if (ret) goto free_e_rq_pool; @@ -457,7 +499,7 @@ free_gen_ws_pool: free_page_bio_pool: mempool_exit(&pblk->page_bio_pool); free_global_caches: - pblk_free_global_caches(pblk); + pblk_put_global_caches(); fail_free_pad_dist: kfree(pblk->pad_dist); return -ENOMEM; @@ -481,7 +523,7 @@ static void pblk_core_free(struct pblk *pblk) mempool_exit(&pblk->e_rq_pool); mempool_exit(&pblk->w_rq_pool); - pblk_free_global_caches(pblk); + pblk_put_global_caches(); kfree(pblk->pad_dist); } @@ -1074,7 +1116,6 @@ static void pblk_exit(void *private, bool graceful) { struct pblk *pblk = private; - down_write(&pblk_lock); pblk_gc_exit(pblk, graceful); pblk_tear_down(pblk, graceful); @@ -1083,7 +1124,6 @@ static void pblk_exit(void *private, bool graceful) #endif pblk_free(pblk); - up_write(&pblk_lock); } static sector_t pblk_capacity(void *private) -- cgit From bf82fa2f584f1c6e12df8c04fca715d53e7f32d5 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Oct 2018 13:11:59 +0200 Subject: lightnvm: pblk: fix mapping issue on failed writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 1.2-devices, the mapping-out of remaning sectors in the failed-write's block can result in an infinite loop, stalling the write pipeline, fix this. Fixes: 6a3abf5beef6 ("lightnvm: pblk: rework write error recovery path") Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-write.c | 12 +----------- include/linux/lightnvm.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 61fe22ccc7a1..9554febee480 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -106,8 +106,6 @@ retry: /* Map remaining sectors in chunk, starting from ppa */ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line *line; struct ppa_addr map_ppa = *ppa; u64 paddr; @@ -125,15 +123,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) if (!test_and_set_bit(paddr, line->invalid_bitmap)) le32_add_cpu(line->vsc, -1); - if (geo->version == NVM_OCSSD_SPEC_12) { - map_ppa.ppa++; - if (map_ppa.g.pg == geo->num_pg) - done = 1; - } else { - map_ppa.m.sec++; - if (map_ppa.m.sec == geo->clba) - done = 1; - } + done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); } line->w_err_gc->has_write_err = 1; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index eb7300c20f24..2fdeac1a420d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -593,6 +593,42 @@ static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev, return ppa32; } +static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev, + struct ppa_addr *ppa) +{ + struct nvm_geo *geo = &dev->geo; + int last = 0; + + if (geo->version == NVM_OCSSD_SPEC_12) { + int sec = ppa->g.sec; + + sec++; + if (sec == geo->ws_min) { + int pg = ppa->g.pg; + + sec = 0; + pg++; + if (pg == geo->num_pg) { + int pl = ppa->g.pl; + + pg = 0; + pl++; + if (pl == geo->num_pln) + last = 1; + + ppa->g.pl = pl; + } + ppa->g.pg = pg; + } + ppa->g.sec = sec; + } else { + ppa->m.sec++; + if (ppa->m.sec == geo->clba) + last = 1; + } + + return last; +} typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); -- cgit From 7325b4bbe5952e3e939f15de812f2ee0c0d33ca9 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 9 Oct 2018 13:12:00 +0200 Subject: lightnvm: pblk: fix two sleep-in-atomic-context bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver may sleep with holding a spinlock. The function call paths (from bottom to top) in Linux-4.16 are: [FUNC] nvm_dev_dma_alloc(GFP_KERNEL) drivers/lightnvm/pblk-core.c, 754: nvm_dev_dma_alloc in pblk_line_submit_smeta_io drivers/lightnvm/pblk-core.c, 1048: pblk_line_submit_smeta_io in pblk_line_init_bb drivers/lightnvm/pblk-core.c, 1434: pblk_line_init_bb in pblk_line_replace_data drivers/lightnvm/pblk-recovery.c, 980: pblk_line_replace_data in pblk_recov_l2p drivers/lightnvm/pblk-recovery.c, 976: spin_lock in pblk_recov_l2p [FUNC] bio_map_kern(GFP_KERNEL) drivers/lightnvm/pblk-core.c, 762: bio_map_kern in pblk_line_submit_smeta_io drivers/lightnvm/pblk-core.c, 1048: pblk_line_submit_smeta_io in pblk_line_init_bb drivers/lightnvm/pblk-core.c, 1434: pblk_line_init_bb in pblk_line_replace_data drivers/lightnvm/pblk-recovery.c, 980: pblk_line_replace_data in pblk_recov_l2p drivers/lightnvm/pblk-recovery.c, 976: spin_lock in pblk_recov_l2p To fix these bugs, the call to pblk_line_replace_data() is moved out of the spinlock protection. These bugs are found by my static analysis tool DSAC. Signed-off-by: Jia-Ju Bai Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-recovery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 8036c3eb6372..2526722304bb 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -966,12 +966,14 @@ next: } } - spin_lock(&l_mg->free_lock); if (!open_lines) { + spin_lock(&l_mg->free_lock); WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); + spin_unlock(&l_mg->free_lock); pblk_line_replace_data(pblk); } else { + spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ l_mg->data_next = pblk_line_get(pblk); if (l_mg->data_next) { @@ -979,8 +981,8 @@ next: l_mg->data_next->type = PBLK_LINETYPE_DATA; is_next = 1; } + spin_unlock(&l_mg->free_lock); } - spin_unlock(&l_mg->free_lock); if (is_next) pblk_line_erase(pblk, l_mg->data_next); -- cgit From 090ee26fd51270cc3bd54a0efbc716ede320ad27 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:01 +0200 Subject: lightnvm: use internal allocation for chunk log page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lightnvm subsystem provides helpers to retrieve chunk metadata, where the target needs to provide a buffer to store the metadata. An implicit assumption is that this buffer is contiguous and can be used to retrieve the data from the device. If the device exposes too many chunks, then kmalloc might fail, thus failing instance creation. This patch removes this assumption by implementing an internal buffer in the lightnvm subsystem to retrieve chunk metadata. Targets can then use virtual memory allocations. Since this is a target API change, adapt pblk accordingly. Signed-off-by: Javier González Reviewed-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 4 ++-- drivers/lightnvm/pblk-init.c | 2 +- drivers/nvme/host/lightnvm.c | 25 +++++++++++++++++-------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 84f3b4912b92..875f3cf615ac 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -120,7 +120,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) /* * Get information for all chunks from the device. * - * The caller is responsible for freeing the returned structure + * The caller is responsible for freeing (vmalloc) the returned structure */ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) { @@ -134,7 +134,7 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) ppa.ppa = 0; len = geo->all_chunks * sizeof(*meta); - meta = kzalloc(len, GFP_KERNEL); + meta = vzalloc(len); if (!meta) return ERR_PTR(-ENOMEM); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index fb66bc84d5ca..7ef8249108f0 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1039,7 +1039,7 @@ static int pblk_lines_init(struct pblk *pblk) pblk_set_provision(pblk, nr_free_chks); - kfree(chunk_meta); + vfree(chunk_meta); return 0; fail_free_lines: diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index e42af7771fe5..7d0a4d3b0a48 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -573,7 +573,7 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, struct nvm_geo *geo = &ndev->geo; struct nvme_ns *ns = ndev->q->queuedata; struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; struct ppa_addr ppa; size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); size_t log_pos, offset, len; @@ -585,6 +585,10 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, */ max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); + dev_meta = kmalloc(max_len, GFP_KERNEL); + if (!dev_meta) + return -ENOMEM; + /* Normalize lba address space to obtain log offset */ ppa.ppa = slba; ppa = dev_to_generic_addr(ndev, ppa); @@ -598,6 +602,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, while (left) { len = min_t(unsigned int, left, max_len); + memset(dev_meta, 0, max_len); + dev_meta_off = dev_meta; + ret = nvme_get_log(ctrl, ns->head->ns_id, NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len, offset); @@ -607,21 +614,23 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, } for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta->state; - meta->type = dev_meta->type; - meta->wi = dev_meta->wi; - meta->slba = le64_to_cpu(dev_meta->slba); - meta->cnlb = le64_to_cpu(dev_meta->cnlb); - meta->wp = le64_to_cpu(dev_meta->wp); + meta->state = dev_meta_off->state; + meta->type = dev_meta_off->type; + meta->wi = dev_meta_off->wi; + meta->slba = le64_to_cpu(dev_meta_off->slba); + meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); + meta->wp = le64_to_cpu(dev_meta_off->wp); meta++; - dev_meta++; + dev_meta_off++; } offset += len; left -= len; } + kfree(dev_meta); + return ret; } -- cgit From 45dcf29b98377bbdc40aa4a23a79ade60295dbaf Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:02 +0200 Subject: lightnvm: pblk: encapsulate rqd dma allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dma allocations for ppa_list and meta_list in rqd are replicated in several places across the pblk codebase. Make helpers to encapsulate creation and deletion to simplify the code. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 51 +++++++++++++++++++++++++++------------- drivers/lightnvm/pblk-read.c | 31 ++++++++---------------- drivers/lightnvm/pblk-recovery.c | 30 ++++++++--------------- drivers/lightnvm/pblk-write.c | 15 ++---------- drivers/lightnvm/pblk.h | 2 ++ 5 files changed, 59 insertions(+), 70 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 875f3cf615ac..8ae40855d4c9 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -237,6 +237,33 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, spin_unlock(&pblk->trans_lock); } +int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &rqd->dma_meta_list); + if (!rqd->meta_list) + return -ENOMEM; + + if (rqd->nr_ppas == 1) + return 0; + + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + + return 0; +} + +void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + + if (rqd->meta_list) + nvm_dev_dma_free(dev->parent, rqd->meta_list, + rqd->dma_meta_list); +} + /* Caller must guarantee that the request is a valid type */ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) { @@ -268,7 +295,6 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) /* Typically used on completion path. Cannot guarantee request consistency */ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) { - struct nvm_tgt_dev *dev = pblk->dev; mempool_t *pool; switch (type) { @@ -289,9 +315,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) return; } - if (rqd->meta_list) - nvm_dev_dma_free(dev->parent, rqd->meta_list, - rqd->dma_meta_list); + pblk_free_rqd_meta(pblk, rqd); mempool_free(rqd, pool); } @@ -838,18 +862,14 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, memset(&rqd, 0, sizeof(struct nvm_rq)); - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd.dma_meta_list); - if (!rqd.meta_list) - return -ENOMEM; - - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto free_ppa_list; + goto clear_rqd; } bio->bi_iter.bi_sector = 0; /* internal bio */ @@ -881,7 +901,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, if (ret) { pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); - goto free_ppa_list; + goto clear_rqd; } atomic_dec(&pblk->inflight_io); @@ -894,9 +914,8 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, pblk_log_read_err(pblk, &rqd); } -free_ppa_list: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - +clear_rqd: + pblk_free_rqd_meta(pblk, &rqd); return ret; } diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 829e92857289..f5fe01d3a07f 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -453,21 +453,13 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) */ bio_init_idx = pblk_get_bi_idx(bio); - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) { - pblk_err(pblk, "not able to allocate ppa list\n"); + if (pblk_alloc_rqd_meta(pblk, rqd)) goto fail_rqd_free; - } - - if (nr_secs > 1) { - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + if (nr_secs > 1) pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); - } else { + else pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); - } if (bitmap_full(read_bitmap, nr_secs)) { atomic_inc(&pblk->inflight_io); @@ -594,15 +586,11 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) memset(&rqd, 0, sizeof(struct nvm_rq)); - rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd.dma_meta_list); - if (!rqd.meta_list) - return -ENOMEM; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; if (gc_rq->nr_secs > 1) { - rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; - rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; - gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, gc_rq->lba_list, gc_rq->paddr_list, @@ -623,7 +611,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { pblk_err(pblk, "could not allocate GC bio (%lu)\n", - PTR_ERR(bio)); + PTR_ERR(bio)); + ret = PTR_ERR(bio); goto err_free_dma; } @@ -658,12 +647,12 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) #endif out: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + pblk_free_rqd_meta(pblk, &rqd); return ret; err_free_bio: bio_put(bio); err_free_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + pblk_free_rqd_meta(pblk, &rqd); return ret; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 2526722304bb..218292979953 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -241,13 +241,11 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; struct pblk_sec_meta *meta_list; struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; void *data; - dma_addr_t dma_ppa_list, dma_meta_list; __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); u64 w_ptr = line->cur_sec; int left_line_ppas, rq_ppas, rq_len; @@ -281,20 +279,11 @@ next_pad_rq: rq_len = rq_ppas * geo->csecs; - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) { - ret = -ENOMEM; - goto fail_free_pad; - } - - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto fail_free_meta; + goto fail_free_pad; } bio->bi_iter.bi_sector = 0; /* internal bio */ @@ -302,17 +291,19 @@ next_pad_rq: rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); + ret = pblk_alloc_rqd_meta(pblk, rqd); + if (ret) + goto fail_free_rqd; + rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; rqd->is_seq = 1; - rqd->meta_list = meta_list; rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; rqd->end_io = pblk_end_io_recov; rqd->private = pad_rq; + meta_list = rqd->meta_list; + for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; @@ -346,7 +337,7 @@ next_pad_rq: if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); pblk_up_chunk(pblk, rqd->ppa_list[0]); - goto fail_free_bio; + goto fail_free_rqd; } left_line_ppas -= rq_ppas; @@ -370,10 +361,9 @@ free_rq: kfree(pad_rq); return ret; -fail_free_bio: +fail_free_rqd: + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); bio_put(bio); -fail_free_meta: - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); fail_free_pad: kfree(pad_rq); vfree(data); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 9554febee480..a276f25d3931 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -285,11 +285,8 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) } static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int nr_secs, - nvm_end_io_fn(*end_io)) + unsigned int nr_secs, nvm_end_io_fn(*end_io)) { - struct nvm_tgt_dev *dev = pblk->dev; - /* Setup write request */ rqd->opcode = NVM_OP_PWRITE; rqd->nr_ppas = nr_secs; @@ -297,15 +294,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, rqd->private = pblk; rqd->end_io = end_io; - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) - return -ENOMEM; - - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - - return 0; + return pblk_alloc_rqd_meta(pblk, rqd); } static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index d123cff82589..b06ab0edab69 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -778,6 +778,8 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); */ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); +int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); -- cgit From af3fac1664b978f70a838571f3f35298ce1786da Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:03 +0200 Subject: lightnvm: pblk: refactor metadata paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk maintains two different metadata paths for smeta and emeta, which store metadata at the start of the line and at the end of the line, respectively. Until now, these path has been common for writing and retrieving metadata, however, as these paths diverge, the common code becomes less clear and unnecessary complicated. In preparation for further changes to the metadata write path, this patch separates the write and read paths for smeta and emeta and removes the synchronous emeta path as it not used anymore (emeta is scheduled asynchronously to prevent jittering due to internal I/Os). Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 307 ++++++++++++++++++--------------------- drivers/lightnvm/pblk-gc.c | 2 +- drivers/lightnvm/pblk-recovery.c | 4 +- drivers/lightnvm/pblk.h | 4 +- 4 files changed, 146 insertions(+), 171 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 8ae40855d4c9..49cef93e328e 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -685,180 +685,80 @@ u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line) return paddr; } -/* - * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when - * taking the per LUN semaphore. - */ -static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf, u64 paddr, int dir) +u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - void *ppa_list, *meta_list; - struct bio *bio; - struct nvm_rq rqd; - dma_addr_t dma_ppa_list, dma_meta_list; - int min = pblk->min_write_pgs; - int left_ppas = lm->emeta_sec[0]; - int id = line->id; - int rq_ppas, rq_len; - int cmd_op, bio_op; - int i, j; - int ret; + int bit; - if (dir == PBLK_WRITE) { - bio_op = REQ_OP_WRITE; - cmd_op = NVM_OP_PWRITE; - } else if (dir == PBLK_READ) { - bio_op = REQ_OP_READ; - cmd_op = NVM_OP_PREAD; - } else - return -EINVAL; + /* This usually only happens on bad lines */ + bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); + if (bit >= lm->blk_per_line) + return -1; - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &dma_meta_list); - if (!meta_list) - return -ENOMEM; + return bit * geo->ws_opt; +} - ppa_list = meta_list + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; + struct bio *bio; + struct nvm_rq rqd; + u64 paddr = pblk_line_smeta_start(pblk, line); + int i, ret; -next_rq: memset(&rqd, 0, sizeof(struct nvm_rq)); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - rq_len = rq_ppas * geo->csecs; + ret = pblk_alloc_rqd_meta(pblk, &rqd); + if (ret) + return ret; - bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, - l_mg->emeta_alloc_type, GFP_KERNEL); + bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto free_rqd_dma; + goto clear_rqd; } bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, bio_op, 0); + bio_set_op_attrs(bio, REQ_OP_READ, 0); rqd.bio = bio; - rqd.meta_list = meta_list; - rqd.ppa_list = ppa_list; - rqd.dma_meta_list = dma_meta_list; - rqd.dma_ppa_list = dma_ppa_list; - rqd.opcode = cmd_op; - rqd.nr_ppas = rq_ppas; - - if (dir == PBLK_WRITE) { - struct pblk_sec_meta *meta_list = rqd.meta_list; - - rqd.is_seq = 1; - for (i = 0; i < rqd.nr_ppas; ) { - spin_lock(&line->lock); - paddr = __pblk_alloc_page(pblk, line, min); - spin_unlock(&line->lock); - for (j = 0; j < min; j++, i++, paddr++) { - meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); - rqd.ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, id); - } - } - } else { - for (i = 0; i < rqd.nr_ppas; ) { - struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); - int pos = pblk_ppa_to_pos(geo, ppa); - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd.is_seq = 1; - - while (test_bit(pos, line->blk_bitmap)) { - paddr += min; - if (pblk_boundary_paddr_checks(pblk, paddr)) { - pblk_err(pblk, "corrupt emeta line:%d\n", - line->id); - bio_put(bio); - ret = -EINTR; - goto free_rqd_dma; - } - - ppa = addr_to_gen_ppa(pblk, paddr, id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - if (pblk_boundary_paddr_checks(pblk, paddr + min)) { - pblk_err(pblk, "corrupt emeta line:%d\n", - line->id); - bio_put(bio); - ret = -EINTR; - goto free_rqd_dma; - } + rqd.opcode = NVM_OP_PREAD; + rqd.nr_ppas = lm->smeta_sec; + rqd.is_seq = 1; - for (j = 0; j < min; j++, i++, paddr++) - rqd.ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, line->id); - } - } + for (i = 0; i < lm->smeta_sec; i++, paddr++) + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); + pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); - goto free_rqd_dma; + goto clear_rqd; } atomic_dec(&pblk->inflight_io); - if (rqd.error) { - if (dir == PBLK_WRITE) - pblk_log_write_err(pblk, &rqd); - else - pblk_log_read_err(pblk, &rqd); - } + if (rqd.error) + pblk_log_read_err(pblk, &rqd); - emeta_buf += rq_len; - left_ppas -= rq_ppas; - if (left_ppas) - goto next_rq; -free_rqd_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); +clear_rqd: + pblk_free_rqd_meta(pblk, &rqd); return ret; } -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int bit; - - /* This usually only happens on bad lines */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (bit >= lm->blk_per_line) - return -1; - - return bit * geo->ws_opt; -} - -static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, - u64 paddr, int dir) +static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, + u64 paddr) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_meta *lm = &pblk->lm; struct bio *bio; struct nvm_rq rqd; - __le64 *lba_list = NULL; + __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); int i, ret; - int cmd_op, bio_op; - - if (dir == PBLK_WRITE) { - bio_op = REQ_OP_WRITE; - cmd_op = NVM_OP_PWRITE; - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - } else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) { - bio_op = REQ_OP_READ; - cmd_op = NVM_OP_PREAD; - } else - return -EINVAL; memset(&rqd, 0, sizeof(struct nvm_rq)); @@ -873,30 +773,20 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, } bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, bio_op, 0); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); rqd.bio = bio; - rqd.opcode = cmd_op; - rqd.is_seq = 1; + rqd.opcode = NVM_OP_PWRITE; rqd.nr_ppas = lm->smeta_sec; + rqd.is_seq = 1; for (i = 0; i < lm->smeta_sec; i++, paddr++) { struct pblk_sec_meta *meta_list = rqd.meta_list; rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - if (dir == PBLK_WRITE) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta_list[i].lba = lba_list[paddr] = addr_empty; - } + meta_list[i].lba = lba_list[paddr] = addr_empty; } - /* - * This I/O is sent by the write thread when a line is replace. Since - * the write thread is the only one sending write and erase commands, - * there is no need to take the LUN semaphore. - */ ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); @@ -907,11 +797,8 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == PBLK_WRITE) { - pblk_log_write_err(pblk, &rqd); - ret = 1; - } else if (dir == PBLK_READ) - pblk_log_read_err(pblk, &rqd); + pblk_log_write_err(pblk, &rqd); + ret = -EIO; } clear_rqd: @@ -919,18 +806,106 @@ clear_rqd: return ret; } -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, + void *emeta_buf) { - u64 bpaddr = pblk_line_smeta_start(pblk, line); + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + void *ppa_list, *meta_list; + struct bio *bio; + struct nvm_rq rqd; + u64 paddr = line->emeta_ssec; + dma_addr_t dma_ppa_list, dma_meta_list; + int min = pblk->min_write_pgs; + int left_ppas = lm->emeta_sec[0]; + int line_id = line->id; + int rq_ppas, rq_len; + int i, j; + int ret; - return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ_RECOV); -} + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, + &dma_meta_list); + if (!meta_list) + return -ENOMEM; -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf) -{ - return pblk_line_submit_emeta_io(pblk, line, emeta_buf, - line->emeta_ssec, PBLK_READ); + ppa_list = meta_list + pblk_dma_meta_size; + dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + +next_rq: + memset(&rqd, 0, sizeof(struct nvm_rq)); + + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_len = rq_ppas * geo->csecs; + + bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, + l_mg->emeta_alloc_type, GFP_KERNEL); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + goto free_rqd_dma; + } + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_READ, 0); + + rqd.bio = bio; + rqd.meta_list = meta_list; + rqd.ppa_list = ppa_list; + rqd.dma_meta_list = dma_meta_list; + rqd.dma_ppa_list = dma_ppa_list; + rqd.opcode = NVM_OP_PREAD; + rqd.nr_ppas = rq_ppas; + + for (i = 0; i < rqd.nr_ppas; ) { + struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); + int pos = pblk_ppa_to_pos(geo, ppa); + + if (pblk_io_aligned(pblk, rq_ppas)) + rqd.is_seq = 1; + + while (test_bit(pos, line->blk_bitmap)) { + paddr += min; + if (pblk_boundary_paddr_checks(pblk, paddr)) { + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + ppa = addr_to_gen_ppa(pblk, paddr, line_id); + pos = pblk_ppa_to_pos(geo, ppa); + } + + if (pblk_boundary_paddr_checks(pblk, paddr + min)) { + bio_put(bio); + ret = -EINTR; + goto free_rqd_dma; + } + + for (j = 0; j < min; j++, i++, paddr++) + rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); + } + + ret = pblk_submit_io_sync(pblk, &rqd); + if (ret) { + pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); + bio_put(bio); + goto free_rqd_dma; + } + + atomic_dec(&pblk->inflight_io); + + if (rqd.error) + pblk_log_read_err(pblk, &rqd); + + emeta_buf += rq_len; + left_ppas -= rq_ppas; + if (left_ppas) + goto next_rq; + +free_rqd_dma: + nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); + return ret; } static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -1169,7 +1144,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, line->smeta_ssec = off; line->cur_sec = off + lm->smeta_sec; - if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { + if (init && pblk_line_smeta_write(pblk, line, off)) { pblk_debug(pblk, "line smeta I/O failed. Retry\n"); return 0; } diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index b841d84c4342..e05d06bd5b83 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -148,7 +148,7 @@ static __le64 *get_lba_list_from_emeta(struct pblk *pblk, if (!emeta_buf) return NULL; - ret = pblk_line_read_emeta(pblk, line, emeta_buf); + ret = pblk_line_emeta_read(pblk, line, emeta_buf); if (ret) { pblk_err(pblk, "line %d read emeta failed (%d)\n", line->id, ret); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 218292979953..6c57eb00a7f1 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -836,7 +836,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) continue; /* Lines that cannot be read are assumed as not written here */ - if (pblk_line_read_smeta(pblk, line)) + if (pblk_line_smeta_read(pblk, line)) continue; crc = pblk_calc_smeta_crc(pblk, smeta_buf); @@ -906,7 +906,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->emeta = emeta; memset(line->emeta->buf, 0, lm->emeta_len[0]); - if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) { + if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) { pblk_recov_l2p_from_oob(pblk, line); goto next; } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index b06ab0edab69..02e2c02b0cf4 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -819,8 +819,8 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq); u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); -int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); -int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, +int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line); +int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, void *emeta_buf); int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); void pblk_line_put(struct kref *ref); -- cgit From 253babc3f677461a9f73b707bbbd56d2962e48c0 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:04 +0200 Subject: lightnvm: pblk: take write semaphore on metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk guarantees write ordering at a chunk level through a per open chunk semaphore. At this point, since we only have an open I/O stream for both user and GC data, the semaphore is per parallel unit. For the metadata I/O that is synchronous, the semaphore is not needed as ordering is guaranteed. However, if the metadata scheme changes or multiple streams are open, this guarantee might not be preserved. This patch makes sure that all writes go through the semaphore, even for synchronous I/O. This is consistent with pblk's write I/O model. It also simplifies maintenance since changes in the metadata scheme could cause ordering issues. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 16 +++++++++++++++- drivers/lightnvm/pblk.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 49cef93e328e..a3ce4a36dd33 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -557,6 +557,20 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) return ret; } +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int ret; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + pblk_down_chunk(pblk, ppa_list[0]); + ret = pblk_submit_io_sync(pblk, rqd); + pblk_up_chunk(pblk, ppa_list[0]); + + return ret; +} + static void pblk_bio_map_addr_endio(struct bio *bio) { bio_put(bio); @@ -787,7 +801,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, meta_list[i].lba = lba_list[paddr] = addr_empty; } - ret = pblk_submit_io_sync(pblk, &rqd); + ret = pblk_submit_io_sync_sem(pblk, &rqd); if (ret) { pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); bio_put(bio); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 02e2c02b0cf4..4c015c457197 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -792,6 +792,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd); struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, -- cgit From 6ad2f619b2b278467242e7a084c4f77d79c31a63 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:05 +0200 Subject: lightnvm: pblk: recover open lines on 2.0 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the OCSSD 2.0 spec, each chunk reports its write pointer. This means that pblk does not need to scan open lines to find the write pointer, but instead, it can retrieve it directly (and verify it). This patch uses the write pointer on open lines to (i) recover the line up until the last written lba and (ii) reconstruct the map bitmap and rest of line metadata so that the line can be used for new data. Since the 1.2 path in lightnvm core has been re-implemented to populate the chunk structure and thus recover the write pointer on initialization, this patch removes 1.2 specific recovery, as the 2.0 path can be reused. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-recovery.c | 400 ++++++++++++--------------------------- 1 file changed, 121 insertions(+), 279 deletions(-) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 6c57eb00a7f1..fccf65bc70b3 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -86,15 +86,39 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) return 0; } -static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) +static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, + u64 written_secs) +{ + int i; + + for (i = 0; i < written_secs; i += pblk->min_write_pgs) + pblk_alloc_page(pblk, line, pblk->min_write_pgs); +} + +static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); + u64 written_secs = 0; + int valid_chunks = 0; + int i; + + for (i = 0; i < lm->blk_per_line; i++) { + struct nvm_chk_meta *chunk = &line->chks[i]; - return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - - nr_bb * geo->clba; + if (chunk->state & NVM_CHK_ST_OFFLINE) + continue; + + written_secs += chunk->wp; + valid_chunks++; + } + + if (lm->blk_per_line - nr_bb != valid_chunks) + pblk_err(pblk, "recovery line %d is bad\n", line->id); + + pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec); + + return written_secs; } struct pblk_recov_alloc { @@ -106,115 +130,6 @@ struct pblk_recov_alloc { dma_addr_t dma_meta_list; }; -static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, u64 r_ptr) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - struct nvm_rq *rqd; - struct bio *bio; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - u64 r_ptr_int; - int left_ppas; - int rq_ppas, rq_len; - int i, j; - int ret = 0; - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - left_ppas = line->cur_sec - r_ptr; - if (!left_ppas) - return 0; - - r_ptr_int = r_ptr; - -next_read_rq: - memset(rqd, 0, pblk_g_rq_size); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->csecs; - - bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_READ, 0); - - rqd->bio = bio; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->is_seq = 1; - - ppa_list = nvm_rq_to_ppa_list(rqd); - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - r_ptr_int += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) - ppa_list[i] = - addr_to_gen_ppa(pblk, r_ptr_int, line->id); - } - - /* If read fails, more padding is needed */ - ret = pblk_submit_io_sync(pblk, rqd); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* At this point, the read should not fail. If it does, it is a problem - * we cannot recover from here. Need FTL log. - */ - if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { - pblk_err(pblk, "L2P recovery failed (%d)\n", rqd->error); - return -EINTR; - } - - for (i = 0; i < rqd->nr_ppas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); - - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) - continue; - - pblk_update_map(pblk, lba, ppa_list[i]); - } - - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_read_rq; - - return 0; -} - static void pblk_recov_complete(struct kref *ref) { struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); @@ -236,8 +151,9 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) kref_put(&pad_rq->ref, pblk_recov_complete); } -static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, - int left_ppas) +/* pad line using line bitmap. */ +static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, + int left_ppas) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -379,143 +295,42 @@ static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) return (distance > line->left_msecs) ? line->left_msecs : distance; } -/* When this function is called, it means that not all upper pages have been - * written in a page that contains valid data. In order to recover this data, we - * first find the write pointer on the device, then we pad all necessary - * sectors, and finally attempt to read the valid data - */ -static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p) +static int pblk_line_wp_is_unbalanced(struct pblk *pblk, + struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; - struct nvm_rq *rqd; - struct bio *bio; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - u64 w_ptr = 0, r_ptr; - int rq_ppas, rq_len; - int i, j; - int ret = 0; - int rec_round; - int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - /* we could recover up until the line write pointer */ - r_ptr = line->cur_sec; - rec_round = 0; - -next_rq: - memset(rqd, 0, pblk_g_rq_size); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->csecs; - - bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); - if (IS_ERR(bio)) - return PTR_ERR(bio); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_READ, 0); - - rqd->bio = bio; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_lun *rlun; + struct nvm_chk_meta *chunk; + struct ppa_addr ppa; + u64 line_wp; + int pos, i; - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->is_seq = 1; + rlun = &pblk->luns[0]; + ppa = rlun->bppa; + pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; + line_wp = chunk->wp; - w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); + for (i = 1; i < lm->blk_per_line; i++) { + rlun = &pblk->luns[i]; + ppa = rlun->bppa; pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; - while (test_bit(pos, line->blk_bitmap)) { - w_ptr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) - rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, w_ptr, line->id); - } - - ret = pblk_submit_io_sync(pblk, rqd); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* This should not happen since the read failed during normal recovery, - * but the media works funny sometimes... - */ - if (!rec_round++ && !rqd->error) { - rec_round = 0; - for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) { - u64 lba = le64_to_cpu(meta_list[i].lba); - - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) - continue; - - pblk_update_map(pblk, lba, rqd->ppa_list[i]); - } - } - - /* Reached the end of the written line */ - if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { - int pad_secs, nr_error_bits, bit; - int ret; - - bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); - nr_error_bits = rqd->nr_ppas - bit; - - /* Roll back failed sectors */ - line->cur_sec -= nr_error_bits; - line->left_msecs += nr_error_bits; - bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); - - pad_secs = pblk_pad_distance(pblk, line); - - ret = pblk_recov_pad_oob(pblk, line, pad_secs); - if (ret) - pblk_err(pblk, "OOB padding failed (err:%d)\n", ret); - - ret = pblk_recov_read_oob(pblk, line, p, r_ptr); - if (ret) - pblk_err(pblk, "OOB read failed (err:%d)\n", ret); - - left_ppas = 0; + if (chunk->wp > line_wp) + return 1; + else if (chunk->wp < line_wp) + line_wp = chunk->wp; } - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_rq; - - return ret; + return 0; } static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p, int *done) + struct pblk_recov_alloc p) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -525,11 +340,16 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, struct bio *bio; void *data; dma_addr_t dma_ppa_list, dma_meta_list; - u64 paddr; + __le64 *lba_list; + u64 paddr = 0; + bool padded = false; int rq_ppas, rq_len; int i, j; - int ret = 0; - int left_ppas = pblk_calc_sec_in_line(pblk, line); + int ret; + u64 left_ppas = pblk_sec_in_open_line(pblk, line); + + if (pblk_line_wp_is_unbalanced(pblk, line)) + pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -538,7 +358,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, dma_ppa_list = p.dma_ppa_list; dma_meta_list = p.dma_meta_list; - *done = 1; + lba_list = emeta_to_lbas(pblk, line->emeta->buf); next_rq: memset(rqd, 0, pblk_g_rq_size); @@ -566,11 +386,11 @@ next_rq: if (pblk_io_aligned(pblk, rq_ppas)) rqd->is_seq = 1; +retry_rq: for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; - paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); ppa = addr_to_gen_ppa(pblk, paddr, line->id); pos = pblk_ppa_to_pos(geo, ppa); @@ -580,9 +400,9 @@ next_rq: pos = pblk_ppa_to_pos(geo, ppa); } - for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) + for (j = 0; j < pblk->min_write_pgs; j++, i++) rqd->ppa_list[i] = - addr_to_gen_ppa(pblk, paddr, line->id); + addr_to_gen_ppa(pblk, paddr + j, line->id); } ret = pblk_submit_io_sync(pblk, rqd); @@ -594,31 +414,33 @@ next_rq: atomic_dec(&pblk->inflight_io); - /* Reached the end of the written line */ + /* If a read fails, do a best effort by padding the line and retrying */ if (rqd->error) { - int nr_error_bits, bit; - - bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); - nr_error_bits = rqd->nr_ppas - bit; + int pad_distance, ret; - /* Roll back failed sectors */ - line->cur_sec -= nr_error_bits; - line->left_msecs += nr_error_bits; - bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); + if (padded) { + pblk_log_read_err(pblk, rqd); + return -EINTR; + } - left_ppas = 0; - rqd->nr_ppas = bit; + pad_distance = pblk_pad_distance(pblk, line); + ret = pblk_recov_pad_line(pblk, line, pad_distance); + if (ret) + return ret; - if (rqd->error != NVM_RSP_ERR_EMPTYPAGE) - *done = 0; + padded = true; + goto retry_rq; } for (i = 0; i < rqd->nr_ppas; i++) { u64 lba = le64_to_cpu(meta_list[i].lba); + lba_list[paddr++] = cpu_to_le64(lba); + if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) continue; + line->nr_valid_lbas++; pblk_update_map(pblk, lba, rqd->ppa_list[i]); } @@ -626,7 +448,11 @@ next_rq: if (left_ppas > 0) goto next_rq; - return ret; +#ifdef CONFIG_NVM_PBLK_DEBUG + WARN_ON(padded && !pblk_line_is_full(line)); +#endif + + return 0; } /* Scan line for lbas on out of bound area */ @@ -640,7 +466,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) struct pblk_recov_alloc p; void *data; dma_addr_t dma_ppa_list, dma_meta_list; - int done, ret = 0; + int ret = 0; meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); if (!meta_list) @@ -655,7 +481,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) goto free_meta_list; } - rqd = pblk_alloc_rqd(pblk, PBLK_READ); + rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL); + memset(rqd, 0, pblk_g_rq_size); p.ppa_list = ppa_list; p.meta_list = meta_list; @@ -664,24 +491,17 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) p.dma_ppa_list = dma_ppa_list; p.dma_meta_list = dma_meta_list; - ret = pblk_recov_scan_oob(pblk, line, p, &done); + ret = pblk_recov_scan_oob(pblk, line, p); if (ret) { - pblk_err(pblk, "could not recover L2P from OOB\n"); + pblk_err(pblk, "could not recover L2P form OOB\n"); goto out; } - if (!done) { - ret = pblk_recov_scan_all_oob(pblk, line, p); - if (ret) { - pblk_err(pblk, "could not recover L2P from OOB\n"); - goto out; - } - } - if (pblk_line_is_full(line)) pblk_line_recov_close(pblk, line); out: + mempool_free(rqd, &pblk->r_rq_pool); kfree(data); free_meta_list: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); @@ -770,7 +590,7 @@ static void pblk_recov_wa_counters(struct pblk *pblk, } static int pblk_line_was_written(struct pblk_line *line, - struct pblk *pblk) + struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -796,6 +616,18 @@ static int pblk_line_was_written(struct pblk_line *line, return 1; } +static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + int i; + + for (i = 0; i < lm->blk_per_line; i++) + if (line->chks[i].state & NVM_CHK_ST_OPEN) + return true; + + return false; +} + struct pblk_line *pblk_recov_l2p(struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -906,6 +738,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->emeta = emeta; memset(line->emeta->buf, 0, lm->emeta_len[0]); + if (pblk_line_is_open(pblk, line)) { + pblk_recov_l2p_from_oob(pblk, line); + goto next; + } + if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) { pblk_recov_l2p_from_oob(pblk, line); goto next; @@ -944,15 +781,20 @@ next: line->smeta = NULL; line->emeta = NULL; } else { - if (open_lines > 1) - pblk_err(pblk, "failed to recover L2P\n"); + spin_lock(&line->lock); + line->state = PBLK_LINESTATE_OPEN; + spin_unlock(&line->lock); + + line->emeta->mem = 0; + atomic_set(&line->emeta->sync, 0); trace_pblk_line_state(pblk_disk_name(pblk), line->id, line->state); - open_lines++; - line->meta_line = meta_line; data_line = line; + line->meta_line = meta_line; + + open_lines++; } } @@ -1000,7 +842,7 @@ int pblk_recov_pad(struct pblk *pblk) left_msecs = line->left_msecs; spin_unlock(&l_mg->free_lock); - ret = pblk_recov_pad_oob(pblk, line, left_msecs); + ret = pblk_recov_pad_line(pblk, line, left_msecs); if (ret) { pblk_err(pblk, "tear down padding failed (%d)\n", ret); return ret; -- cgit From 02a1520d56d11982ccc8eab56e4c562fd05d2c86 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:06 +0200 Subject: lightnvm: pblk: add SPDX license tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add GLP-2.0 SPDX license tag to all pblk files Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-cache.c | 1 + drivers/lightnvm/pblk-core.c | 1 + drivers/lightnvm/pblk-gc.c | 1 + drivers/lightnvm/pblk-init.c | 1 + drivers/lightnvm/pblk-map.c | 1 + drivers/lightnvm/pblk-rb.c | 1 + drivers/lightnvm/pblk-read.c | 1 + drivers/lightnvm/pblk-recovery.c | 1 + drivers/lightnvm/pblk-rl.c | 1 + drivers/lightnvm/pblk-sysfs.c | 1 + drivers/lightnvm/pblk-write.c | 1 + drivers/lightnvm/pblk.h | 1 + 12 files changed, 12 insertions(+) diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index f565a56b898a..c9fa26f95659 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index a3ce4a36dd33..4045a9b1ee74 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index e05d06bd5b83..2fa118c8eb71 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 7ef8249108f0..f84c428a76f1 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) * Copyright (C) 2016 CNEX Labs diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index ff677ca6e4e1..6dcbd44e3acb 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index a7648e12f54f..c26eab2ba8bd 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index f5fe01d3a07f..9fba614adeeb 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index fccf65bc70b3..5740b7509bd8 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index a32790f7b7fc..db55a1c89997 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index e56eeeeb914e..cba83ac43e62 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index a276f25d3931..277abc8633f7 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 CNEX Labs * Initial release: Javier Gonzalez diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 4c015c457197..e710f6db2a35 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) * Copyright (C) 2016 CNEX Labs -- cgit From 44cdbdc657b23f75736eca3e88b781f009104363 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:07 +0200 Subject: lightnvm: pblk: fix race on sysfs line state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk exposes a sysfs interface that represents its internal state. Part of this state is the map bitmap for the current open line, which should be protected by the line lock to avoid a race when freeing the line metadata. Currently, it is not. This patch makes sure that the line state is consistent and NULL bitmap pointers are not dereferenced. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 5 +++-- drivers/lightnvm/pblk-sysfs.c | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 4045a9b1ee74..6944aac43b01 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1611,13 +1611,14 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) struct pblk_line *cur, *new = NULL; unsigned int left_seblks; - cur = l_mg->data_line; new = l_mg->data_next; if (!new) goto out; - l_mg->data_line = new; spin_lock(&l_mg->free_lock); + cur = l_mg->data_line; + l_mg->data_line = new; + pblk_line_setup_metadata(new, l_mg, &pblk->lm); spin_unlock(&l_mg->free_lock); diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index cba83ac43e62..2d2818155aa8 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -263,8 +263,14 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) sec_in_line = l_mg->data_line->sec_in_line; meta_weight = bitmap_weight(&l_mg->meta_bitmap, PBLK_DATA_LINES); - map_weight = bitmap_weight(l_mg->data_line->map_bitmap, + + spin_lock(&l_mg->data_line->lock); + if (l_mg->data_line->map_bitmap) + map_weight = bitmap_weight(l_mg->data_line->map_bitmap, lm->sec_per_line); + else + map_weight = 0; + spin_unlock(&l_mg->data_line->lock); } spin_unlock(&l_mg->free_lock); -- cgit From dde4aac20babee040875f76842a31b14b76dd369 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:08 +0200 Subject: lightnvm: pblk: remove unused function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed unused function in pblk-rb.c Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 26 -------------------------- drivers/lightnvm/pblk.h | 2 -- 2 files changed, 28 deletions(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index c26eab2ba8bd..82829e8151db 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -729,32 +729,6 @@ unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) return (submitted < to_flush) ? (to_flush - submitted) : 0; } -/* - * Scan from the current position of the sync pointer to find the entry that - * corresponds to the given ppa. This is necessary since write requests can be - * completed out of order. The assumption is that the ppa is close to the sync - * pointer thus the search will not take long. - * - * The caller of this function must guarantee that the sync pointer will no - * reach the entry while it is using the metadata associated with it. With this - * assumption in mind, there is no need to take the sync lock. - */ -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, - struct ppa_addr *ppa) -{ - unsigned int sync, subm, count; - unsigned int i; - - sync = READ_ONCE(rb->sync); - subm = READ_ONCE(rb->subm); - count = pblk_rb_ring_count(subm, sync, rb->nr_entries); - - for (i = 0; i < count; i++) - sync = (sync + 1) & (rb->nr_entries - 1); - - return NULL; -} - int pblk_rb_tear_down_check(struct pblk_rb *rb) { struct pblk_rb_entry *entry; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index e710f6db2a35..22586b78198a 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -760,8 +760,6 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, - struct ppa_addr *ppa); void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); -- cgit From 40b8657dcc0bbd4a293cac1acd4b0b01c0359416 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:09 +0200 Subject: lightnvm: pblk: encapsulate rb pointer operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk's read/write buffer is always a power-of-2, thus wrapping up the buffer can be done with a bit mask. Since this is an implementation detail internal to the write buffer, make a helper that hides pointer increment + wrap, and allows to transparently relax this assumption in the future. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 21 +++++++++++++-------- drivers/lightnvm/pblk-write.c | 7 ++----- drivers/lightnvm/pblk.h | 2 ++ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 82829e8151db..e46d8cb9d28b 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -169,6 +169,12 @@ static unsigned int pblk_rb_space(struct pblk_rb *rb) return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); } +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, + unsigned int nr_entries) +{ + return (p + nr_entries) & (rb->nr_entries - 1); +} + /* * Buffer count is calculated with respect to the submission entry signaling the * entries that are available to send to the media @@ -195,8 +201,7 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) subm = READ_ONCE(rb->subm); /* Commit read means updating submission pointer */ - smp_store_release(&rb->subm, - (subm + nr_entries) & (rb->nr_entries - 1)); + smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries)); return subm; } @@ -229,7 +234,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); - rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); + rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1); } pblk_rl_out(&pblk->rl, user_io, gc_io); @@ -408,7 +413,7 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, return 0; /* Protect from read count */ - smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1)); + smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries)); return 1; } @@ -432,7 +437,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, if (!__pblk_rb_may_write(rb, nr_entries, pos)) return 0; - mem = (*pos + nr_entries) & (rb->nr_entries - 1); + mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries); *io_ret = NVM_IO_DONE; if (bio->bi_opf & REQ_PREFLUSH) { @@ -572,7 +577,7 @@ try: /* Release flags on context. Protect from writes */ smp_store_release(&entry->w_ctx.flags, flags); - pos = (pos + 1) & (rb->nr_entries - 1); + pos = pblk_rb_ptr_wrap(rb, pos, 1); } if (pad) { @@ -652,7 +657,7 @@ out: struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) { - unsigned int entry = pos & (rb->nr_entries - 1); + unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0); return &rb->entries[entry].w_ctx; } @@ -698,7 +703,7 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) } } - sync = (sync + nr_entries) & (rb->nr_entries - 1); + sync = pblk_rb_ptr_wrap(rb, sync, nr_entries); /* Protect from counts */ smp_store_release(&rb->sync, sync); diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 277abc8633f7..fa8726493b39 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -140,12 +140,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, struct pblk_w_ctx *w_ctx; struct ppa_addr ppa_l2p; int flags; - unsigned int pos, i; + unsigned int i; spin_lock(&pblk->trans_lock); - pos = sentry; for (i = 0; i < nr_entries; i++) { - entry = &rb->entries[pos]; + entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)]; w_ctx = &entry->w_ctx; /* Check if the lba has been overwritten */ @@ -164,8 +163,6 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, */ line = pblk_ppa_to_line(pblk, w_ctx->ppa); kref_put(&line->ref, pblk_line_put); - - pos = (pos + 1) & (rb->nr_entries - 1); } spin_unlock(&pblk->trans_lock); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 22586b78198a..7660811aa8af 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -760,6 +760,8 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, + unsigned int nr_entries); void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); -- cgit From 9bd1f875c047a8a619a3e5233c1eb23eace02f31 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:10 +0200 Subject: lightnvm: pblk: move ring buffer alloc/free rb init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk's read/write buffer currently takes a buffer and its size and uses it to create the metadata around it to use it as a ring buffer. This puts the responsibility of allocating/freeing ring buffer memory on the ring buffer user. Instead, move it inside of the ring buffer helpers (pblk-rb.c). This simplifies creation/destruction routines. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 18 +++------------ drivers/lightnvm/pblk-rb.c | 53 +++++++++++++++++++++++++++----------------- drivers/lightnvm/pblk.h | 7 ++---- 3 files changed, 38 insertions(+), 40 deletions(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index f84c428a76f1..b2c49fc006c9 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -185,17 +185,14 @@ static void pblk_rwb_free(struct pblk *pblk) if (pblk_rb_tear_down_check(&pblk->rwb)) pblk_err(pblk, "write buffer error on tear down\n"); - pblk_rb_data_free(&pblk->rwb); - vfree(pblk_rb_entries_ref(&pblk->rwb)); + pblk_rb_free(&pblk->rwb); } static int pblk_rwb_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_rb_entry *entries; - unsigned long nr_entries, buffer_size; - unsigned int power_size, power_seg_sz; + unsigned long buffer_size; int pgs_in_buffer; pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns; @@ -205,16 +202,7 @@ static int pblk_rwb_init(struct pblk *pblk) else buffer_size = pgs_in_buffer; - nr_entries = pblk_rb_calculate_size(buffer_size); - - entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); - if (!entries) - return -ENOMEM; - - power_size = get_count_order(nr_entries); - power_seg_sz = get_count_order(geo->csecs); - - return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); + return pblk_rb_init(&pblk->rwb, buffer_size, geo->csecs); } /* Minimum pages needed within a lun */ diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index e46d8cb9d28b..f653faa6a9ed 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -23,7 +23,7 @@ static DECLARE_RWSEM(pblk_rb_lock); -void pblk_rb_data_free(struct pblk_rb *rb) +static void pblk_rb_data_free(struct pblk_rb *rb) { struct pblk_rb_pages *p, *t; @@ -36,22 +36,46 @@ void pblk_rb_data_free(struct pblk_rb *rb) up_write(&pblk_rb_lock); } +void pblk_rb_free(struct pblk_rb *rb) +{ + pblk_rb_data_free(rb); + vfree(rb->entries); +} + +/* + * pblk_rb_calculate_size -- calculate the size of the write buffer + */ +static unsigned int pblk_rb_calculate_size(unsigned int nr_entries) +{ + /* Alloc a write buffer that can at least fit 128 entries */ + return (1 << max(get_count_order(nr_entries), 7)); +} + /* * Initialize ring buffer. The data and metadata buffers must be previously * allocated and their size must be a power of two * (Documentation/core-api/circular-buffers.rst) */ -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, - unsigned int power_size, unsigned int power_seg_sz) +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_size) { struct pblk *pblk = container_of(rb, struct pblk, rwb); + struct pblk_rb_entry *entries; unsigned int init_entry = 0; - unsigned int alloc_order = power_size; unsigned int max_order = MAX_ORDER - 1; - unsigned int order, iter; + unsigned int power_size, power_seg_sz; + unsigned int alloc_order, order, iter; + unsigned int nr_entries; + + nr_entries = pblk_rb_calculate_size(size); + entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); + if (!entries) + return -ENOMEM; + + power_size = get_count_order(size); + power_seg_sz = get_count_order(seg_size); down_write(&pblk_rb_lock); - rb->entries = rb_entry_base; + rb->entries = entries; rb->seg_size = (1 << power_seg_sz); rb->nr_entries = (1 << power_size); rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; @@ -62,6 +86,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, INIT_LIST_HEAD(&rb->pages); + alloc_order = power_size; if (alloc_order >= max_order) { order = max_order; iter = (1 << (alloc_order - max_order)); @@ -80,6 +105,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); if (!page_set) { up_write(&pblk_rb_lock); + vfree(entries); return -ENOMEM; } @@ -89,6 +115,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, kfree(page_set); pblk_rb_data_free(rb); up_write(&pblk_rb_lock); + vfree(entries); return -ENOMEM; } kaddr = page_address(page_set->pages); @@ -125,20 +152,6 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, return 0; } -/* - * pblk_rb_calculate_size -- calculate the size of the write buffer - */ -unsigned int pblk_rb_calculate_size(unsigned int nr_entries) -{ - /* Alloc a write buffer that can at least fit 128 entries */ - return (1 << max(get_count_order(nr_entries), 7)); -} - -void *pblk_rb_entries_ref(struct pblk_rb *rb) -{ - return rb->entries; -} - static void clean_wctx(struct pblk_w_ctx *w_ctx) { int flags; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 7660811aa8af..0f98ea24ee59 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -734,10 +734,7 @@ struct pblk_line_ws { /* * pblk ring buffer operations */ -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, - unsigned int power_size, unsigned int power_seg_sz); -unsigned int pblk_rb_calculate_size(unsigned int nr_entries); -void *pblk_rb_entries_ref(struct pblk_rb *rb); +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_sz); int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, unsigned int nr_entries, unsigned int *pos); int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, @@ -771,7 +768,7 @@ unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); int pblk_rb_tear_down_check(struct pblk_rb *rb); int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); -void pblk_rb_data_free(struct pblk_rb *rb); +void pblk_rb_free(struct pblk_rb *rb); ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); /* -- cgit From d672d92d9c433c365fd6cdb4da1c02562b5f1178 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:11 +0200 Subject: lightnvm: pblk: guarantee mw_cunits on read buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OCSSD 2.0 defines the amount of data that the host must buffer per chunk to guarantee reads through the geometry field mw_cunits. This value is the base that pblk uses to determine the size of its read buffer. Currently, this size is set to be the closes power-of-2 to mw_cunits times the number of parallel units available to the pblk instance for each open line (currently one). When an entry (4KB) is put in the buffer, the L2P table points to it. As the buffer wraps up, the L2P is updated to point to addresses on the device, thus guaranteeing mw_cunits at a chunk level. However, given that pblk cannot write to the device under ws_min (normally ws_opt), there might be a window in which the buffer starts wrapping up and updating L2P entries before the mw_cunits value in a chunk has been surpassed. In order not to violate the mw_cunits constrain in this case, account for ws_opt on the read buffer creation. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index b2c49fc006c9..e0db0cb3122d 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -195,7 +195,8 @@ static int pblk_rwb_init(struct pblk *pblk) unsigned long buffer_size; int pgs_in_buffer; - pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns; + pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) + * geo->all_luns; if (write_buffer_size && (write_buffer_size > pgs_in_buffer)) buffer_size = write_buffer_size; -- cgit From 6fd05cad5ee1290b276dd8ed90a1e019b1fa577a Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:12 +0200 Subject: lightnvm: do no update csecs and sos on 1.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.2 devices exposes their data and metadata size through the separate identify command. Make sure that the NVMe LBA format does not override these values. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 7d0a4d3b0a48..a4f3b263cd6c 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -977,6 +977,9 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns) struct nvm_dev *ndev = ns->ndev; struct nvm_geo *geo = &ndev->geo; + if (geo->version == NVM_OCSSD_SPEC_12) + return; + geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; } -- cgit From a70985f83c625a5eaf618be81621e5e4521a66c6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Oct 2018 13:12:13 +0200 Subject: lightnvm: pblk: fix error handling of pblk_lines_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the too many bad blocks error handling case, we should release all the allocated resources, otherwise it will cause memory leak. Fixes: 2deeefc02dff ("lightnvm: pblk: fail gracefully on line alloc. failure") Signed-off-by: Wei Yongjun Reviewed-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index e0db0cb3122d..e3573880dbda 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1024,7 +1024,8 @@ static int pblk_lines_init(struct pblk *pblk) if (!nr_free_chks) { pblk_err(pblk, "too many bad blocks prevent for sane instance\n"); - return -EINTR; + ret = -EINTR; + goto fail_free_lines; } pblk_set_provision(pblk, nr_free_chks); -- cgit From 8a57fc3823d08edb1661a06d9e0a8c2365ac561e Mon Sep 17 00:00:00 2001 From: Zhoujie Wu Date: Tue, 9 Oct 2018 13:12:14 +0200 Subject: lightnvm: pblk: consider max hw sectors supported for max_write_pgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When do GC, the number of read/write sectors are determined by max_write_pgs(see gc_rq preparation in pblk_gc_line_prepare_ws). Due to max_write_pgs doesn't consider max hw sectors supported by nvme controller(128K), which leads to GC tries to read 64 * 4K in one command, and see below error caused by pblk_bio_map_addr in function pblk_submit_read_gc. [ 2923.005376] pblk: could not add page to bio [ 2923.005377] pblk: could not allocate GC bio (18446744073709551604) Signed-off-by: Zhoujie Wu Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index e3573880dbda..e5239aba806b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -407,6 +407,8 @@ static int pblk_core_init(struct pblk *pblk) pblk->min_write_pgs = geo->ws_opt; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); + pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, + queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), -- cgit From 766c8ceb16fce904d6b8985ca2c0a547e053d1d5 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:15 +0200 Subject: lightnvm: pblk: guarantee that backpointer is respected on writer stall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk's write buffer must guarantee that it respects the device's constrains for reads (i.e., mw_cunits). This is done by maintaining a backpointer that updates the L2P table as entries wrap up, making them point to the media instead of pointing to the write buffer. This mechanism can race in case that the write thread stalls, as the write pointer will protect the last written entry, thus disregarding the read constrains. This patch adds an extra check on wrap up, making sure that the threshold is respected at all times, preventing new entries to overwrite committed data, also in case of write thread stall. Reported-by: Heiner Litz Signed-off-by: Javier González Reviewed-by: Heiner Litz Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 5 +++-- drivers/lightnvm/pblk-rb.c | 9 +++++++-- drivers/lightnvm/pblk.h | 8 +++++++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index e5239aba806b..13822594647c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -193,8 +193,9 @@ static int pblk_rwb_init(struct pblk *pblk) struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; unsigned long buffer_size; - int pgs_in_buffer; + int pgs_in_buffer, threshold; + threshold = geo->mw_cunits * geo->all_luns; pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) * geo->all_luns; @@ -203,7 +204,7 @@ static int pblk_rwb_init(struct pblk *pblk) else buffer_size = pgs_in_buffer; - return pblk_rb_init(&pblk->rwb, buffer_size, geo->csecs); + return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } /* Minimum pages needed within a lun */ diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index f653faa6a9ed..b1f4b51783f4 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -56,7 +56,8 @@ static unsigned int pblk_rb_calculate_size(unsigned int nr_entries) * allocated and their size must be a power of two * (Documentation/core-api/circular-buffers.rst) */ -int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_size) +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, + unsigned int seg_size) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk_rb_entry *entries; @@ -79,6 +80,7 @@ int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_size) rb->seg_size = (1 << power_seg_sz); rb->nr_entries = (1 << power_size); rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; + rb->back_thres = threshold; rb->flush_point = EMPTY_ENTRY; spin_lock_init(&rb->w_lock); @@ -404,11 +406,14 @@ static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, { unsigned int mem; unsigned int sync; + unsigned int threshold; sync = READ_ONCE(rb->sync); mem = READ_ONCE(rb->mem); - if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries) + threshold = nr_entries + rb->back_thres; + + if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold) return 0; if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 0f98ea24ee59..02bb2e98f8a9 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -203,6 +203,11 @@ struct pblk_rb { * will be 4KB */ + unsigned int back_thres; /* Threshold that shall be maintained by + * the backpointer in order to respect + * geo->mw_cunits on a per chunk basis + */ + struct list_head pages; /* List of data pages */ spinlock_t w_lock; /* Write lock */ @@ -734,7 +739,8 @@ struct pblk_line_ws { /* * pblk ring buffer operations */ -int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int seg_sz); +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, + unsigned int seg_sz); int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, unsigned int nr_entries, unsigned int *pos); int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, -- cgit From 1306ad4e60de57022a90b1904870763a39adcb42 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 9 Oct 2018 16:32:54 +0200 Subject: block: remove redundant 'default n' from Kconfig-s 'default n' is the default value for any bool or tristate Kconfig setting so there is no need to write it explicitly. Also since commit f467c5640c29 ("kconfig: only write '# CONFIG_FOO is not set' for visible symbols") the Kconfig behavior is the same regardless of 'default n' being present or not: ... One side effect of (and the main motivation for) this change is making the following two definitions behave exactly the same: config FOO bool config FOO bool default n With this change, neither of these will generate a '# CONFIG_FOO is not set' line (assuming FOO isn't selected/implied). That might make it clearer to people that a bare 'default n' is redundant. ... Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- block/Kconfig | 7 ------- block/Kconfig.iosched | 3 --- 2 files changed, 10 deletions(-) diff --git a/block/Kconfig b/block/Kconfig index 85263e7bded6..f7045aa47edb 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -74,7 +74,6 @@ config BLK_DEV_BSG config BLK_DEV_BSGLIB bool "Block layer SG support v4 helper lib" - default n select BLK_DEV_BSG select BLK_SCSI_REQUEST help @@ -107,7 +106,6 @@ config BLK_DEV_ZONED config BLK_DEV_THROTTLING bool "Block layer bio throttling support" depends on BLK_CGROUP=y - default n ---help--- Block layer bio throttling support. It can be used to limit the IO rate to a device. IO rate policies are per cgroup and @@ -119,7 +117,6 @@ config BLK_DEV_THROTTLING config BLK_DEV_THROTTLING_LOW bool "Block throttling .low limit interface support (EXPERIMENTAL)" depends on BLK_DEV_THROTTLING - default n ---help--- Add .low limit interface for block throttling. The low limit is a best effort limit to prioritize cgroups. Depending on the setting, the limit @@ -130,7 +127,6 @@ config BLK_DEV_THROTTLING_LOW config BLK_CMDLINE_PARSER bool "Block device command line partition parser" - default n ---help--- Enabling this option allows you to specify the partition layout from the kernel boot args. This is typically of use for embedded devices @@ -141,7 +137,6 @@ config BLK_CMDLINE_PARSER config BLK_WBT bool "Enable support for block device writeback throttling" - default n ---help--- Enabling this option enables the block layer to throttle buffered background writeback from the VM, making it more smooth and having @@ -152,7 +147,6 @@ config BLK_WBT config BLK_CGROUP_IOLATENCY bool "Enable support for latency based cgroup IO protection" depends on BLK_CGROUP=y - default n ---help--- Enabling this option enables the .latency interface for IO throttling. The IO controller will attempt to maintain average IO latencies below @@ -163,7 +157,6 @@ config BLK_CGROUP_IOLATENCY config BLK_WBT_SQ bool "Single queue writeback throttling" - default n depends on BLK_WBT ---help--- Enable writeback throttling by default on legacy single queue devices diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index a4a8914bf7a4..f95a48b0d7b2 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -36,7 +36,6 @@ config IOSCHED_CFQ config CFQ_GROUP_IOSCHED bool "CFQ Group Scheduling support" depends on IOSCHED_CFQ && BLK_CGROUP - default n ---help--- Enable group IO scheduling in CFQ. @@ -82,7 +81,6 @@ config MQ_IOSCHED_KYBER config IOSCHED_BFQ tristate "BFQ I/O scheduler" - default n ---help--- BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of of the device among all processes according to their weights, @@ -94,7 +92,6 @@ config IOSCHED_BFQ config BFQ_GROUP_IOSCHED bool "BFQ hierarchical scheduling support" depends on IOSCHED_BFQ && BLK_CGROUP - default n ---help--- Enable hierarchical scheduling in BFQ, using the blkio -- cgit From 486c6fba90f64d8d6e09f5a36aa722af6b52828c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 9 Oct 2018 16:41:01 +0200 Subject: drivers/block: remove redundant 'default n' from Kconfig-s 'default n' is the default value for any bool or tristate Kconfig setting so there is no need to write it explicitly. Also since commit f467c5640c29 ("kconfig: only write '# CONFIG_FOO is not set' for visible symbols") the Kconfig behavior is the same regardless of 'default n' being present or not: ... One side effect of (and the main motivation for) this change is making the following two definitions behave exactly the same: config FOO bool config FOO bool default n With this change, neither of these will generate a '# CONFIG_FOO is not set' line (assuming FOO isn't selected/implied). That might make it clearer to people that a bare 'default n' is redundant. ... Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 1 - drivers/block/drbd/Kconfig | 1 - drivers/block/zram/Kconfig | 2 -- 3 files changed, 4 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index d4913516823f..8aec33277fda 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -461,7 +461,6 @@ config BLK_DEV_RBD select LIBCRC32C select CRYPTO_AES select CRYPTO - default n help Say Y here if you want include the Rados block device, which stripes a block device over objects stored in the Ceph distributed object diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 87aab6910d2d..52d885cdccb5 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -11,7 +11,6 @@ config BLK_DEV_DRBD depends on PROC_FS && INET select LRU_CACHE select LIBCRC32C - default n help NOTE: In order to authenticate connections you have to select diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 635235759a0a..fcd055457364 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -3,7 +3,6 @@ config ZRAM tristate "Compressed RAM block device support" depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO select CRYPTO_LZO - default n help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). Pages written to these disks are compressed and stored in memory @@ -18,7 +17,6 @@ config ZRAM config ZRAM_WRITEBACK bool "Write back incompressible page to backing device" depends on ZRAM - default n help With incompressible page, there is no memory saving to keep it in memory. Instead, write it out to backing device. -- cgit From 4822e902f9bdffaea2817471365e000966f0d1a1 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 11 Oct 2018 10:07:06 +0300 Subject: block: describe difference between flags IO_STAT and STATS This adds reasonable comments, but they definitely needs better names. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dee46c20701b..61207560e826 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -108,7 +108,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_QUIET ((__force req_flags_t)(1 << 11)) /* elevator private data attached */ #define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) -/* account I/O stat */ +/* account into disk and partition IO statistics */ #define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) /* request came from our alloc pool */ #define RQF_ALLOCED ((__force req_flags_t)(1 << 14)) @@ -116,7 +116,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_PM ((__force req_flags_t)(1 << 15)) /* on IO scheduler merge hash */ #define RQF_HASHED ((__force req_flags_t)(1 << 16)) -/* IO stats tracking on */ +/* track IO completion time */ #define RQF_STATS ((__force req_flags_t)(1 << 17)) /* Look at ->special_vec for the actual data payload instead of the bio chain. */ @@ -685,7 +685,7 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ #define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */ +#define QUEUE_FLAG_IO_STAT 10 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */ @@ -699,7 +699,7 @@ struct request_queue { #define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ #define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */ #define QUEUE_FLAG_DAX 23 /* device supports DAX */ -#define QUEUE_FLAG_STATS 24 /* track rq completion times */ +#define QUEUE_FLAG_STATS 24 /* track IO start and completion times */ #define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ -- cgit From a2fa8a19b75b5a649db2a6bec892ff5e03a23e76 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 10 Oct 2018 23:16:50 +0200 Subject: cfq: clear queue pointers from cfqg after unpinning them in cfq_pd_offline BFQ is already doing a similar thing in its .pd_offline_fn() method implementation. While it seems that after commit 4c6994806f70 ("blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()") was reverted leaving these pointers intact no longer causes crashes clearing them is still a sensible thing to do to make the code more robust. Signed-off-by: Maciej S. Szmigiero Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d219e9a1af65..6a3d87dd3c1a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1644,14 +1644,20 @@ static void cfq_pd_offline(struct blkg_policy_data *pd) int i; for (i = 0; i < IOPRIO_BE_NR; i++) { - if (cfqg->async_cfqq[0][i]) + if (cfqg->async_cfqq[0][i]) { cfq_put_queue(cfqg->async_cfqq[0][i]); - if (cfqg->async_cfqq[1][i]) + cfqg->async_cfqq[0][i] = NULL; + } + if (cfqg->async_cfqq[1][i]) { cfq_put_queue(cfqg->async_cfqq[1][i]); + cfqg->async_cfqq[1][i] = NULL; + } } - if (cfqg->async_idle_cfqq) + if (cfqg->async_idle_cfqq) { cfq_put_queue(cfqg->async_idle_cfqq); + cfqg->async_idle_cfqq = NULL; + } /* * @blkg is going offline and will be ignored by -- cgit From 2d29c9f89fcd9bf408fcdaaf515c90a169f22ecd Mon Sep 17 00:00:00 2001 From: Federico Motta Date: Fri, 12 Oct 2018 11:55:57 +0200 Subject: block, bfq: improve asymmetric scenarios detection bfq defines as asymmetric a scenario where an active entity, say E (representing either a single bfq_queue or a group of other entities), has a higher weight than some other entities. If the entity E does sync I/O in such a scenario, then bfq plugs the dispatch of the I/O of the other entities in the following situation: E is in service but temporarily has no pending I/O request. In fact, without this plugging, all the times that E stops being temporarily idle, it may find the internal queues of the storage device already filled with an out-of-control number of extra requests, from other entities. So E may have to wait for the service of these extra requests, before finally having its own requests served. This may easily break service guarantees, with E getting less than its fair share of the device throughput. Usually, the end result is that E gets the same fraction of the throughput as the other entities, instead of getting more, according to its higher weight. Yet there are two other more subtle cases where E, even if its weight is actually equal to or even lower than the weight of any other active entities, may get less than its fair share of the throughput in case the above I/O plugging is not performed: 1. other entities issue larger requests than E; 2. other entities contain more active child entities than E (or in general tend to have more backlog than E). In the first case, other entities may get more service than E because they get larger requests, than those of E, served during the temporary idle periods of E. In the second case, other entities get more service because, by having many child entities, they have many requests ready for dispatching while E is temporarily idle. This commit addresses this issue by extending the definition of asymmetric scenario: a scenario is asymmetric when - active entities representing bfq_queues have differentiated weights, as in the original definition or (inclusive) - one or more entities representing groups of entities are active. This broader definition makes sure that I/O plugging will be performed in all the above cases, provided that there is at least one active group. Of course, this definition is very coarse, so it will trigger I/O plugging also in cases where it is not needed, such as, e.g., multiple active entities with just one child each, and all with the same I/O-request size. The reason for this coarse definition is just that a finer-grained definition would be rather heavy to compute. On the opposite end, even this new definition does not trigger I/O plugging in all cases where there is no active group, and all bfq_queues have the same weight. So, in these cases some unfairness may occur if there are asymmetries in I/O-request sizes. We made this choice because I/O plugging may lower throughput, and probably a user that has not created any group cares more about throughput than about perfect fairness. At any rate, as for possible applications that may care about service guarantees, bfq already guarantees a high responsiveness and a low latency to soft real-time applications automatically. Signed-off-by: Federico Motta Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 223 +++++++++++++++++++++++++++++----------------------- block/bfq-iosched.h | 27 +++---- block/bfq-wf2q.c | 36 +++++---- 3 files changed, 155 insertions(+), 131 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 1a1b80dfd69d..6075100f03a5 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -624,12 +624,13 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) } /* - * Tell whether there are active queues or groups with differentiated weights. + * Tell whether there are active queues with different weights or + * active groups. */ -static bool bfq_differentiated_weights(struct bfq_data *bfqd) +static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd) { /* - * For weights to differ, at least one of the trees must contain + * For queue weights to differ, queue_weights_tree must contain * at least two nodes. */ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && @@ -637,9 +638,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) bfqd->queue_weights_tree.rb_node->rb_right) #ifdef CONFIG_BFQ_GROUP_IOSCHED ) || - (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && - (bfqd->group_weights_tree.rb_node->rb_left || - bfqd->group_weights_tree.rb_node->rb_right) + (bfqd->num_active_groups > 0 #endif ); } @@ -657,26 +656,25 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) * 3) all active groups at the same level in the groups tree have the same * number of children. * - * Unfortunately, keeping the necessary state for evaluating exactly the - * above symmetry conditions would be quite complex and time-consuming. - * Therefore this function evaluates, instead, the following stronger - * sub-conditions, for which it is much easier to maintain the needed - * state: + * Unfortunately, keeping the necessary state for evaluating exactly + * the last two symmetry sub-conditions above would be quite complex + * and time consuming. Therefore this function evaluates, instead, + * only the following stronger two sub-conditions, for which it is + * much easier to maintain the needed state: * 1) all active queues have the same weight, - * 2) all active groups have the same weight, - * 3) all active groups have at most one active child each. - * In particular, the last two conditions are always true if hierarchical - * support and the cgroups interface are not enabled, thus no state needs - * to be maintained in this case. + * 2) there are no active groups. + * In particular, the last condition is always true if hierarchical + * support or the cgroups interface are not enabled, thus no state + * needs to be maintained in this case. */ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) { - return !bfq_differentiated_weights(bfqd); + return !bfq_varied_queue_weights_or_active_groups(bfqd); } /* * If the weight-counter tree passed as input contains no counter for - * the weight of the input entity, then add that counter; otherwise just + * the weight of the input queue, then add that counter; otherwise just * increment the existing counter. * * Note that weight-counter trees contain few nodes in mostly symmetric @@ -687,25 +685,25 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) * In most scenarios, the rate at which nodes are created/destroyed * should be low too. */ -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, +void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root *root) { + struct bfq_entity *entity = &bfqq->entity; struct rb_node **new = &(root->rb_node), *parent = NULL; /* - * Do not insert if the entity is already associated with a + * Do not insert if the queue is already associated with a * counter, which happens if: - * 1) the entity is associated with a queue, - * 2) a request arrival has caused the queue to become both + * 1) a request arrival has caused the queue to become both * non-weight-raised, and hence change its weight, and * backlogged; in this respect, each of the two events * causes an invocation of this function, - * 3) this is the invocation of this function caused by the + * 2) this is the invocation of this function caused by the * second event. This second invocation is actually useless, * and we handle this fact by exiting immediately. More * efficient or clearer solutions might possibly be adopted. */ - if (entity->weight_counter) + if (bfqq->weight_counter) return; while (*new) { @@ -715,7 +713,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, parent = *new; if (entity->weight == __counter->weight) { - entity->weight_counter = __counter; + bfqq->weight_counter = __counter; goto inc_counter; } if (entity->weight < __counter->weight) @@ -724,66 +722,67 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, new = &((*new)->rb_right); } - entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), - GFP_ATOMIC); + bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), + GFP_ATOMIC); /* * In the unlucky event of an allocation failure, we just - * exit. This will cause the weight of entity to not be - * considered in bfq_differentiated_weights, which, in its - * turn, causes the scenario to be deemed wrongly symmetric in - * case entity's weight would have been the only weight making - * the scenario asymmetric. On the bright side, no unbalance - * will however occur when entity becomes inactive again (the - * invocation of this function is triggered by an activation - * of entity). In fact, bfq_weights_tree_remove does nothing - * if !entity->weight_counter. + * exit. This will cause the weight of queue to not be + * considered in bfq_varied_queue_weights_or_active_groups, + * which, in its turn, causes the scenario to be deemed + * wrongly symmetric in case bfqq's weight would have been + * the only weight making the scenario asymmetric. On the + * bright side, no unbalance will however occur when bfqq + * becomes inactive again (the invocation of this function + * is triggered by an activation of queue). In fact, + * bfq_weights_tree_remove does nothing if + * !bfqq->weight_counter. */ - if (unlikely(!entity->weight_counter)) + if (unlikely(!bfqq->weight_counter)) return; - entity->weight_counter->weight = entity->weight; - rb_link_node(&entity->weight_counter->weights_node, parent, new); - rb_insert_color(&entity->weight_counter->weights_node, root); + bfqq->weight_counter->weight = entity->weight; + rb_link_node(&bfqq->weight_counter->weights_node, parent, new); + rb_insert_color(&bfqq->weight_counter->weights_node, root); inc_counter: - entity->weight_counter->num_active++; + bfqq->weight_counter->num_active++; } /* - * Decrement the weight counter associated with the entity, and, if the + * Decrement the weight counter associated with the queue, and, if the * counter reaches 0, remove the counter from the tree. * See the comments to the function bfq_weights_tree_add() for considerations * about overhead. */ void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_entity *entity, + struct bfq_queue *bfqq, struct rb_root *root) { - if (!entity->weight_counter) + if (!bfqq->weight_counter) return; - entity->weight_counter->num_active--; - if (entity->weight_counter->num_active > 0) + bfqq->weight_counter->num_active--; + if (bfqq->weight_counter->num_active > 0) goto reset_entity_pointer; - rb_erase(&entity->weight_counter->weights_node, root); - kfree(entity->weight_counter); + rb_erase(&bfqq->weight_counter->weights_node, root); + kfree(bfqq->weight_counter); reset_entity_pointer: - entity->weight_counter = NULL; + bfqq->weight_counter = NULL; } /* - * Invoke __bfq_weights_tree_remove on bfqq and all its inactive - * parent entities. + * Invoke __bfq_weights_tree_remove on bfqq and decrement the number + * of active groups for each queue's inactive parent entity. */ void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_entity *entity = bfqq->entity.parent; - __bfq_weights_tree_remove(bfqd, &bfqq->entity, + __bfq_weights_tree_remove(bfqd, bfqq, &bfqd->queue_weights_tree); for_each_entity(entity) { @@ -797,17 +796,13 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, * next_in_service for details on why * in_service_entity must be checked too). * - * As a consequence, the weight of entity is - * not to be removed. In addition, if entity - * is active, then its parent entities are - * active as well, and thus their weights are - * not to be removed either. In the end, this - * loop must stop here. + * As a consequence, its parent entities are + * active as well, and thus this loop must + * stop here. */ break; } - __bfq_weights_tree_remove(bfqd, entity, - &bfqd->group_weights_tree); + bfqd->num_active_groups--; } } @@ -3506,9 +3501,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * symmetric scenario where: * (i) each of these processes must get the same throughput as * the others; - * (ii) all these processes have the same I/O pattern - (either sequential or random). - * In fact, in such a scenario, the drive will tend to treat + * (ii) the I/O of each process has the same properties, in + * terms of locality (sequential or random), direction + * (reads or writes), request sizes, greediness + * (from I/O-bound to sporadic), and so on. + * In fact, in such a scenario, the drive tends to treat * the requests of each of these processes in about the same * way as the requests of the others, and thus to provide * each of these processes with about the same throughput @@ -3517,18 +3514,50 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * certainly needed to guarantee that bfqq receives its * assigned fraction of the device throughput (see [1] for * details). + * The problem is that idling may significantly reduce + * throughput with certain combinations of types of I/O and + * devices. An important example is sync random I/O, on flash + * storage with command queueing. So, unless bfqq falls in the + * above cases where idling also boosts throughput, it would + * be important to check conditions (i) and (ii) accurately, + * so as to avoid idling when not strictly needed for service + * guarantees. + * + * Unfortunately, it is extremely difficult to thoroughly + * check condition (ii). And, in case there are active groups, + * it becomes very difficult to check condition (i) too. In + * fact, if there are active groups, then, for condition (i) + * to become false, it is enough that an active group contains + * more active processes or sub-groups than some other active + * group. We address this issue with the following bi-modal + * behavior, implemented in the function + * bfq_symmetric_scenario(). * - * We address this issue by controlling, actually, only the - * symmetry sub-condition (i), i.e., provided that - * sub-condition (i) holds, idling is not performed, - * regardless of whether sub-condition (ii) holds. In other - * words, only if sub-condition (i) holds, then idling is + * If there are active groups, then the scenario is tagged as + * asymmetric, conservatively, without checking any of the + * conditions (i) and (ii). So the device is idled for bfqq. + * This behavior matches also the fact that groups are created + * exactly if controlling I/O (to preserve bandwidth and + * latency guarantees) is a primary concern. + * + * On the opposite end, if there are no active groups, then + * only condition (i) is actually controlled, i.e., provided + * that condition (i) holds, idling is not performed, + * regardless of whether condition (ii) holds. In other words, + * only if condition (i) does not hold, then idling is * allowed, and the device tends to be prevented from queueing - * many requests, possibly of several processes. The reason - * for not controlling also sub-condition (ii) is that we - * exploit preemption to preserve guarantees in case of - * symmetric scenarios, even if (ii) does not hold, as - * explained in the next two paragraphs. + * many requests, possibly of several processes. Since there + * are no active groups, then, to control condition (i) it is + * enough to check whether all active queues have the same + * weight. + * + * Not checking condition (ii) evidently exposes bfqq to the + * risk of getting less throughput than its fair share. + * However, for queues with the same weight, a further + * mechanism, preemption, mitigates or even eliminates this + * problem. And it does so without consequences on overall + * throughput. This mechanism and its benefits are explained + * in the next three paragraphs. * * Even if a queue, say Q, is expired when it remains idle, Q * can still preempt the new in-service queue if the next @@ -3542,11 +3571,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * idling allows the internal queues of the device to contain * many requests, and thus to reorder requests, we can rather * safely assume that the internal scheduler still preserves a - * minimum of mid-term fairness. The motivation for using - * preemption instead of idling is that, by not idling, - * service guarantees are preserved without minimally - * sacrificing throughput. In other words, both a high - * throughput and its desired distribution are obtained. + * minimum of mid-term fairness. * * More precisely, this preemption-based, idleless approach * provides fairness in terms of IOPS, and not sectors per @@ -3565,27 +3590,27 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * 1024/8 times as high as the service received by the other * queue. * - * On the other hand, device idling is performed, and thus - * pure sector-domain guarantees are provided, for the - * following queues, which are likely to need stronger - * throughput guarantees: weight-raised queues, and queues - * with a higher weight than other queues. When such queues - * are active, sub-condition (i) is false, which triggers - * device idling. + * The motivation for using preemption instead of idling (for + * queues with the same weight) is that, by not idling, + * service guarantees are preserved (completely or at least in + * part) without minimally sacrificing throughput. And, if + * there is no active group, then the primary expectation for + * this device is probably a high throughput. * - * According to the above considerations, the next variable is - * true (only) if sub-condition (i) holds. To compute the - * value of this variable, we not only use the return value of - * the function bfq_symmetric_scenario(), but also check - * whether bfqq is being weight-raised, because - * bfq_symmetric_scenario() does not take into account also - * weight-raised queues (see comments on - * bfq_weights_tree_add()). In particular, if bfqq is being - * weight-raised, it is important to idle only if there are - * other, non-weight-raised queues that may steal throughput - * to bfqq. Actually, we should be even more precise, and - * differentiate between interactive weight raising and - * soft real-time weight raising. + * We are now left only with explaining the additional + * compound condition that is checked below for deciding + * whether the scenario is asymmetric. To explain this + * compound condition, we need to add that the function + * bfq_symmetric_scenario checks the weights of only + * non-weight-raised queues, for efficiency reasons (see + * comments on bfq_weights_tree_add()). Then the fact that + * bfqq is weight-raised is checked explicitly here. More + * precisely, the compound condition below takes into account + * also the fact that, even if bfqq is being weight-raised, + * the scenario is still symmetric if all active queues happen + * to be weight-raised. Actually, we should be even more + * precise here, and differentiate between interactive weight + * raising and soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -5392,7 +5417,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->idle_slice_timer.function = bfq_idle_slice_timer; bfqd->queue_weights_tree = RB_ROOT; - bfqd->group_weights_tree = RB_ROOT; + bfqd->num_active_groups = 0; INIT_LIST_HEAD(&bfqd->active_list); INIT_LIST_HEAD(&bfqd->idle_list); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 37d627afdc2e..77651d817ecd 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -108,15 +108,14 @@ struct bfq_sched_data { }; /** - * struct bfq_weight_counter - counter of the number of all active entities + * struct bfq_weight_counter - counter of the number of all active queues * with a given weight. */ struct bfq_weight_counter { - unsigned int weight; /* weight of the entities this counter refers to */ - unsigned int num_active; /* nr of active entities with this weight */ + unsigned int weight; /* weight of the queues this counter refers to */ + unsigned int num_active; /* nr of active queues with this weight */ /* - * Weights tree member (see bfq_data's @queue_weights_tree and - * @group_weights_tree) + * Weights tree member (see bfq_data's @queue_weights_tree) */ struct rb_node weights_node; }; @@ -151,8 +150,6 @@ struct bfq_weight_counter { struct bfq_entity { /* service_tree member */ struct rb_node rb_node; - /* pointer to the weight counter associated with this entity */ - struct bfq_weight_counter *weight_counter; /* * Flag, true if the entity is on a tree (either the active or @@ -266,6 +263,9 @@ struct bfq_queue { /* entity representing this queue in the scheduler */ struct bfq_entity entity; + /* pointer to the weight counter associated with this entity */ + struct bfq_weight_counter *weight_counter; + /* maximum budget allowed from the feedback mechanism */ int max_budget; /* budget expiration (in jiffies) */ @@ -449,14 +449,9 @@ struct bfq_data { */ struct rb_root queue_weights_tree; /* - * rbtree of non-queue @bfq_entity weight counters, sorted by - * weight. Used to keep track of whether all @bfq_groups have - * the same weight. The tree contains one counter for each - * distinct weight associated to some active @bfq_group (see - * the comments to the functions bfq_weights_tree_[add|remove] - * for further details). + * number of groups with requests still waiting for completion */ - struct rb_root group_weights_tree; + unsigned int num_active_groups; /* * Number of bfq_queues containing requests (including the @@ -851,10 +846,10 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity, +void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root *root); void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_entity *entity, + struct bfq_queue *bfqq, struct rb_root *root); void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq); diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ff7c2d470bb8..476b5a90a5a4 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -788,25 +788,29 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, new_weight = entity->orig_weight * (bfqq ? bfqq->wr_coeff : 1); /* - * If the weight of the entity changes, remove the entity - * from its old weight counter (if there is a counter - * associated with the entity), and add it to the counter - * associated with its new weight. + * If the weight of the entity changes, and the entity is a + * queue, remove the entity from its old weight counter (if + * there is a counter associated with the entity). */ if (prev_weight != new_weight) { - root = bfqq ? &bfqd->queue_weights_tree : - &bfqd->group_weights_tree; - __bfq_weights_tree_remove(bfqd, entity, root); + if (bfqq) { + root = &bfqd->queue_weights_tree; + __bfq_weights_tree_remove(bfqd, bfqq, root); + } else + bfqd->num_active_groups--; } entity->weight = new_weight; /* - * Add the entity to its weights tree only if it is - * not associated with a weight-raised queue. + * Add the entity, if it is not a weight-raised queue, + * to the counter associated with its new weight. */ - if (prev_weight != new_weight && - (bfqq ? bfqq->wr_coeff == 1 : 1)) - /* If we get here, root has been initialized. */ - bfq_weights_tree_add(bfqd, entity, root); + if (prev_weight != new_weight) { + if (bfqq && bfqq->wr_coeff == 1) { + /* If we get here, root has been initialized. */ + bfq_weights_tree_add(bfqd, bfqq, root); + } else + bfqd->num_active_groups++; + } new_st->wsum += entity->weight; @@ -1012,9 +1016,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity, if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */ struct bfq_group *bfqg = container_of(entity, struct bfq_group, entity); + struct bfq_data *bfqd = bfqg->bfqd; - bfq_weights_tree_add(bfqg->bfqd, entity, - &bfqd->group_weights_tree); + bfqd->num_active_groups++; } #endif @@ -1692,7 +1696,7 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) if (!bfqq->dispatched) if (bfqq->wr_coeff == 1) - bfq_weights_tree_add(bfqd, &bfqq->entity, + bfq_weights_tree_add(bfqd, bfqq, &bfqd->queue_weights_tree); if (bfqq->wr_coeff > 1) -- cgit From 477e19dedc9d3e1f4443a1d4ae00572a988120ea Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Fri, 12 Oct 2018 18:07:25 +0800 Subject: blk-mq: adjust debugfs and sysfs register when updating nr_hw_queues blk-mq debugfs and sysfs entries need to be removed before updating queue map, otherwise, we get get wrong result there. This patch fixes it and remove the redundant debugfs and sysfs register/unregister operations during __blk_mq_update_nr_hw_queues. Signed-off-by: Jianchao Wang Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 89bd9cb9defc..99a72c650728 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2154,8 +2154,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - blk_mq_debugfs_unregister_hctx(hctx); - if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); @@ -2182,6 +2180,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + blk_mq_debugfs_unregister_hctx(hctx); blk_mq_exit_hctx(q, set, hctx, i); } } @@ -2239,8 +2238,6 @@ static int blk_mq_init_hctx(struct request_queue *q, if (hctx->flags & BLK_MQ_F_BLOCKING) init_srcu_struct(hctx->srcu); - blk_mq_debugfs_register_hctx(q, hctx); - return 0; free_fq: @@ -2529,8 +2526,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, int i, j; struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; - blk_mq_sysfs_unregister(q); - /* protect against switching io scheduler */ mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { @@ -2578,7 +2573,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, } q->nr_hw_queues = i; mutex_unlock(&q->sysfs_lock); - blk_mq_sysfs_register(q); } struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, @@ -2676,25 +2670,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); } -/* Basically redo blk_mq_init_queue with queue frozen */ -static void blk_mq_queue_reinit(struct request_queue *q) -{ - WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); - - blk_mq_debugfs_unregister_hctxs(q); - blk_mq_sysfs_unregister(q); - - /* - * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe - * we should change hctx numa_node according to the new topology (this - * involves freeing and re-allocating memory, worth doing?) - */ - blk_mq_map_swqueue(q); - - blk_mq_sysfs_register(q); - blk_mq_debugfs_register_hctxs(q); -} - static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i; @@ -3004,11 +2979,21 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (!blk_mq_elv_switch_none(&head, q)) goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_debugfs_unregister_hctxs(q); + blk_mq_sysfs_unregister(q); + } + set->nr_hw_queues = nr_hw_queues; blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); - blk_mq_queue_reinit(q); + blk_mq_map_swqueue(q); + } + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + blk_mq_sysfs_register(q); + blk_mq_debugfs_register_hctxs(q); } switch_back: -- cgit From 5b202853ffbc54b29f23c4b1b5f3948efab489a2 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Fri, 12 Oct 2018 18:07:26 +0800 Subject: blk-mq: change gfp flags to GFP_NOIO in blk_mq_realloc_hw_ctxs blk_mq_realloc_hw_ctxs could be invoked during update hw queues. At the momemt, IO is blocked. Change the gfp flags from GFP_KERNEL to GFP_NOIO to avoid forever hang during memory allocation in blk_mq_realloc_hw_ctxs. Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-flush.c | 6 +++--- block/blk-mq.c | 17 ++++++++++------- block/blk.h | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f12d2b65e5a5..c5539eed0202 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1163,7 +1163,7 @@ int blk_init_allocated_queue(struct request_queue *q) { WARN_ON_ONCE(q->mq_ops); - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL); if (!q->fq) return -ENOMEM; diff --git a/block/blk-flush.c b/block/blk-flush.c index ce41f666de3e..8b44b86779da 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -566,12 +566,12 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, EXPORT_SYMBOL(blkdev_issue_flush); struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size) + int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; int rq_sz = sizeof(struct request); - fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node); + fq = kzalloc_node(sizeof(*fq), flags, node); if (!fq) goto fail; @@ -579,7 +579,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, spin_lock_init(&fq->mq_flush_lock); rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); - fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); + fq->flush_rq = kzalloc_node(rq_sz, flags, node); if (!fq->flush_rq) goto fail_rq; diff --git a/block/blk-mq.c b/block/blk-mq.c index 99a72c650728..6b734461fd39 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2210,12 +2210,12 @@ static int blk_mq_init_hctx(struct request_queue *q, * runtime */ hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *), - GFP_KERNEL, node); + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node); if (!hctx->ctxs) goto unregister_cpu_notifier; - if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL, - node)) + if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node)) goto free_ctxs; hctx->nr_ctx = 0; @@ -2228,7 +2228,8 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; - hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY); if (!hctx->fq) goto exit_hctx; @@ -2536,12 +2537,14 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, node = blk_mq_hw_queue_to_node(q->mq_map, i); hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), - GFP_KERNEL, node); + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node); if (!hctxs[i]) break; - if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL, - node)) { + if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node)) { kfree(hctxs[i]); hctxs[i] = NULL; break; diff --git a/block/blk.h b/block/blk.h index 58c030f727e9..3d2aecba96a4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -125,7 +125,7 @@ static inline void __blk_get_queue(struct request_queue *q) } struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size); + int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, -- cgit From 34d11ffac1f56c3895dad32153abd6814452dc77 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Fri, 12 Oct 2018 18:07:27 +0800 Subject: blk-mq: realloc hctx when hw queue is mapped to another node When the hw queues and mq_map are updated, a hctx could be mapped to a different numa node. At this moment, we need to realloc the hctx. If fail to do that, go on using previous hctx. Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-mq.c | 82 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6b734461fd39..941f51380077 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2521,6 +2521,39 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) return hw_ctx_size; } +static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( + struct blk_mq_tag_set *set, struct request_queue *q, + int hctx_idx, int node) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = kzalloc_node(blk_mq_hw_ctx_size(set), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node); + if (!hctx) + return NULL; + + if (!zalloc_cpumask_var_node(&hctx->cpumask, + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + node)) { + kfree(hctx); + return NULL; + } + + atomic_set(&hctx->nr_active, 0); + hctx->numa_node = node; + hctx->queue_num = hctx_idx; + + if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) { + free_cpumask_var(hctx->cpumask); + kfree(hctx); + return NULL; + } + blk_mq_hctx_kobj_init(hctx); + + return hctx; +} + static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct request_queue *q) { @@ -2531,37 +2564,34 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { int node; - - if (hctxs[i]) - continue; + struct blk_mq_hw_ctx *hctx; node = blk_mq_hw_queue_to_node(q->mq_map, i); - hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set), - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - node); - if (!hctxs[i]) - break; - - if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, - GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - node)) { - kfree(hctxs[i]); - hctxs[i] = NULL; - break; - } - - atomic_set(&hctxs[i]->nr_active, 0); - hctxs[i]->numa_node = node; - hctxs[i]->queue_num = i; + /* + * If the hw queue has been mapped to another numa node, + * we need to realloc the hctx. If allocation fails, fallback + * to use the previous one. + */ + if (hctxs[i] && (hctxs[i]->numa_node == node)) + continue; - if (blk_mq_init_hctx(q, set, hctxs[i], i)) { - free_cpumask_var(hctxs[i]->cpumask); - kfree(hctxs[i]); - hctxs[i] = NULL; - break; + hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); + if (hctx) { + if (hctxs[i]) { + blk_mq_exit_hctx(q, set, hctxs[i], i); + kobject_put(&hctxs[i]->kobj); + } + hctxs[i] = hctx; + } else { + if (hctxs[i]) + pr_warn("Allocate new hctx on node %d fails,\ + fallback to previous one on node %d\n", + node, hctxs[i]->numa_node); + else + break; } - blk_mq_hctx_kobj_init(hctxs[i]); } + for (j = i; j < q->nr_hw_queues; j++) { struct blk_mq_hw_ctx *hctx = hctxs[j]; -- cgit From e01ad46d53b59720c6ae69963ee1756506954c85 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Fri, 12 Oct 2018 18:07:28 +0800 Subject: blk-mq: fallback to previous nr_hw_queues when updating fails When we try to increate the nr_hw_queues, we may fail due to shortage of memory or other reason, then blk_mq_realloc_hw_ctxs stops and some entries in q->queue_hw_ctx are left with NULL. However, because queue map has been updated with new nr_hw_queues, some cpus have been mapped to hw queue which just encounters allocation failure, thus blk_mq_map_queue could return NULL. This will cause panic in following blk_mq_map_swqueue. To fix it, when increase nr_hw_queues fails, fallback to previous nr_hw_queues and post warning. At the same time, driver's .map_queues usually use completion irq affinity to map hw and cpu, fallback nr_hw_queues will cause lack of some cpu's map to hw, so use default blk_mq_map_queues to do that. Reported-by: syzbot+83e8cbe702263932d9d4@syzkaller.appspotmail.com Signed-off-by: Jianchao Wang Signed-off-by: Jens Axboe --- block/blk-mq.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 941f51380077..c2ecd64a2403 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2557,7 +2557,7 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct request_queue *q) { - int i, j; + int i, j, end; struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; /* protect against switching io scheduler */ @@ -2591,8 +2591,20 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, break; } } + /* + * Increasing nr_hw_queues fails. Free the newly allocated + * hctxs and keep the previous q->nr_hw_queues. + */ + if (i != set->nr_hw_queues) { + j = q->nr_hw_queues; + end = i; + } else { + j = i; + end = q->nr_hw_queues; + q->nr_hw_queues = set->nr_hw_queues; + } - for (j = i; j < q->nr_hw_queues; j++) { + for (; j < end; j++) { struct blk_mq_hw_ctx *hctx = hctxs[j]; if (hctx) { @@ -2604,7 +2616,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, } } - q->nr_hw_queues = i; mutex_unlock(&q->sysfs_lock); } @@ -2989,6 +3000,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, { struct request_queue *q; LIST_HEAD(head); + int prev_nr_hw_queues; lockdep_assert_held(&set->tag_list_lock); @@ -3017,10 +3029,19 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister(q); } + prev_nr_hw_queues = set->nr_hw_queues; set->nr_hw_queues = nr_hw_queues; blk_mq_update_queue_map(set); +fallback: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); + if (q->nr_hw_queues != set->nr_hw_queues) { + pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n", + nr_hw_queues, prev_nr_hw_queues); + set->nr_hw_queues = prev_nr_hw_queues; + blk_mq_map_queues(set); + goto fallback; + } blk_mq_map_swqueue(q); } -- cgit From 3582dd291788e9441c3ba9047e55089edb98da5c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Oct 2018 10:03:14 -0600 Subject: aoe: convert aoeblk to blk-mq Straight forward conversion - instead of rewriting the internal buffer retrieval logic, just replace the previous elevator peeking with an internal list of requests. Reviewed-by: "Ed L. Cashin" Signed-off-by: Jens Axboe --- drivers/block/aoe/aoe.h | 4 ++++ drivers/block/aoe/aoeblk.c | 49 ++++++++++++++++++++++++++++++++++------------ drivers/block/aoe/aoecmd.c | 19 ++++++++++++------ drivers/block/aoe/aoedev.c | 14 +++++++------ 4 files changed, 61 insertions(+), 25 deletions(-) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 015c68017a1c..7ca76ed2e71a 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,4 +1,6 @@ /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ +#include + #define VERSION "85" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -164,6 +166,8 @@ struct aoedev { struct gendisk *gd; struct dentry *debugfs; struct request_queue *blkq; + struct list_head rq_list; + struct blk_mq_tag_set tag_set; struct hd_geometry geo; sector_t ssize; struct timer_list timer; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index ff770e7d9e52..ed26b7287256 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -268,23 +268,25 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) spin_unlock_irqrestore(&d->lock, flags); } -static void -aoeblk_request(struct request_queue *q) +static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct aoedev *d; - struct request *rq; + struct aoedev *d = hctx->queue->queuedata; + + spin_lock_irq(&d->lock); - d = q->queuedata; if ((d->flags & DEVFL_UP) == 0) { pr_info_ratelimited("aoe: device %ld.%d is not up\n", d->aoemajor, d->aoeminor); - while ((rq = blk_peek_request(q))) { - blk_start_request(rq); - aoe_end_request(d, rq, 1); - } - return; + spin_unlock_irq(&d->lock); + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; } + + list_add_tail(&bd->rq->queuelist, &d->rq_list); aoecmd_work(d); + spin_unlock_irq(&d->lock); + return BLK_STS_OK; } static int @@ -339,6 +341,10 @@ static const struct block_device_operations aoe_bdops = { .owner = THIS_MODULE, }; +static const struct blk_mq_ops aoeblk_mq_ops = { + .queue_rq = aoeblk_queue_rq, +}; + /* alloc_disk and add_disk can sleep */ void aoeblk_gdalloc(void *vp) @@ -347,9 +353,11 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct request_queue *q; + struct blk_mq_tag_set *set; enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; ulong flags; int late = 0; + int err; spin_lock_irqsave(&d->lock, flags); if (d->flags & DEVFL_GDALLOC @@ -376,10 +384,25 @@ aoeblk_gdalloc(void *vp) d->aoemajor, d->aoeminor); goto err_disk; } - q = blk_init_queue(aoeblk_request, &d->lock); - if (q == NULL) { + + set = &d->tag_set; + set->ops = &aoeblk_mq_ops; + set->nr_hw_queues = 1; + set->queue_depth = 128; + set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(set); + if (err) { + pr_err("aoe: cannot allocate tag set for %ld.%d\n", + d->aoemajor, d->aoeminor); + goto err_mempool; + } + + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { pr_err("aoe: cannot allocate block queue for %ld.%d\n", d->aoemajor, d->aoeminor); + blk_mq_free_tag_set(set); goto err_mempool; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 136dc507d020..bb2fba651bd2 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -813,7 +813,7 @@ rexmit_timer(struct timer_list *timer) out: if ((d->flags & DEVFL_KICKME) && d->blkq) { d->flags &= ~DEVFL_KICKME; - d->blkq->request_fn(d->blkq); + blk_mq_run_hw_queues(d->blkq, true); } d->timer.expires = jiffies + TIMERTICK; @@ -857,10 +857,12 @@ nextbuf(struct aoedev *d) return d->ip.buf; rq = d->ip.rq; if (rq == NULL) { - rq = blk_peek_request(q); + rq = list_first_entry_or_null(&d->rq_list, struct request, + queuelist); if (rq == NULL) return NULL; - blk_start_request(rq); + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); d->ip.rq = rq; d->ip.nxbio = rq->bio; rq->special = (void *) rqbiocnt(rq); @@ -1045,6 +1047,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) struct bio *bio; int bok; struct request_queue *q; + blk_status_t err = BLK_STS_OK; q = d->blkq; if (rq == d->ip.rq) @@ -1052,11 +1055,15 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) do { bio = rq->bio; bok = !fastfail && !bio->bi_status; - } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size)); + if (!bok) + err = BLK_STS_IOERR; + } while (blk_update_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size)); + + __blk_mq_end_request(rq, err); /* cf. http://lkml.org/lkml/2006/10/31/28 */ if (!fastfail) - __blk_run_queue(q); + blk_mq_run_hw_queues(q, true); } static void diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index f29a140cdbc1..9063f8efbd3b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -5,7 +5,7 @@ */ #include -#include +#include #include #include #include @@ -197,7 +197,6 @@ aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; struct list_head *head, *pos, *nx; - struct request *rq; int i; d->flags &= ~DEVFL_UP; @@ -225,10 +224,11 @@ aoedev_downdev(struct aoedev *d) /* fast fail all pending I/O */ if (d->blkq) { - while ((rq = blk_peek_request(d->blkq))) { - blk_start_request(rq); - aoe_end_request(d, rq, 1); - } + /* UP is cleared, freeze+quiesce to insure all are errored */ + blk_mq_freeze_queue(d->blkq); + blk_mq_quiesce_queue(d->blkq); + blk_mq_unquiesce_queue(d->blkq); + blk_mq_unfreeze_queue(d->blkq); } if (d->gd) @@ -277,6 +277,7 @@ freedev(struct aoedev *d) aoedisk_rm_debugfs(d); del_gendisk(d->gd); put_disk(d->gd); + blk_mq_free_tag_set(&d->tag_set); blk_cleanup_queue(d->blkq); } t = d->targets; @@ -463,6 +464,7 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) d->ntargets = NTARGETS; INIT_WORK(&d->work, aoecmd_sleepwork); spin_lock_init(&d->lock); + INIT_LIST_HEAD(&d->rq_list); skb_queue_head_init(&d->skbpool); timer_setup(&d->timer, dummy_timer, 0); d->timer.expires = jiffies + HZ; -- cgit From 6d1f9dfde7343c4ebfb8f84dcb333af571bb3b22 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Oct 2018 14:56:14 -0600 Subject: skd: fixup usage of legacy IO API We need to be using the mq variant of request requeue here. Fixes: ca33dd92968b ("skd: Convert to blk-mq") Signed-off-by: Jens Axboe --- drivers/block/skd_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index a85c9a622c41..80a5806ede03 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -1416,7 +1416,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_BUSY_IMMINENT: skd_log_skreq(skdev, skreq, "retry(busy)"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); dev_info(&skdev->pdev->dev, "drive BUSY imminent\n"); skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT; skdev->timer_countdown = SKD_TIMER_MINUTES(20); @@ -1426,7 +1426,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_REQUEUE_REQUEST: if ((unsigned long) ++req->special < SKD_MAX_RETRIES) { skd_log_skreq(skdev, skreq, "retry"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); break; } /* fall through */ -- cgit From 4e6da0fe8058df9bfa937902fcd9cb2b7b89b2df Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 26 Nov 2017 13:33:11 +0100 Subject: um: Convert ubd driver to blk-mq Convert the driver to the modern blk-mq framework. As byproduct we get rid of our open coded restart logic and let blk-mq handle it. Signed-off-by: Richard Weinberger Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 178 +++++++++++++++++++++++---------------------- 1 file changed, 93 insertions(+), 85 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 6ee4c56032f7..9cb0cabb4e02 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -142,7 +143,6 @@ struct cow { #define MAX_SG 64 struct ubd { - struct list_head restart; /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; @@ -156,9 +156,12 @@ struct ubd { struct cow cow; struct platform_device pdev; struct request_queue *queue; + struct blk_mq_tag_set tag_set; spinlock_t lock; +}; + +struct ubd_pdu { struct scatterlist sg[MAX_SG]; - struct request *request; int start_sg, end_sg; sector_t rq_pos; }; @@ -182,10 +185,6 @@ struct ubd { .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ - .request = NULL, \ - .start_sg = 0, \ - .end_sg = 0, \ - .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -196,6 +195,12 @@ static int fake_ide = 0; static struct proc_dir_entry *proc_ide_root = NULL; static struct proc_dir_entry *proc_ide = NULL; +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); +static int ubd_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node); + static void make_proc_ide(void) { proc_ide_root = proc_mkdir("ide", NULL); @@ -436,11 +441,8 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static void do_ubd_request(struct request_queue * q); - /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; -static LIST_HEAD(restart); /* Function to read several request pointers at a time * handling fractional reads if (and as) needed @@ -498,9 +500,6 @@ static int bulk_req_safe_read( /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) { - struct ubd *ubd; - struct list_head *list, *next_ele; - unsigned long flags; int n; int count; @@ -520,23 +519,17 @@ static void ubd_handler(void) return; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - blk_end_request( - (*irq_req_buffer)[count]->req, - BLK_STS_OK, - (*irq_req_buffer)[count]->length - ); - kfree((*irq_req_buffer)[count]); + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; + + if (!blk_update_request(io_req->req, err, io_req->length)) + __blk_mq_end_request(io_req->req, err); + + kfree(io_req); } } - reactivate_fd(thread_fd, UBD_IRQ); - list_for_each_safe(list, next_ele, &restart){ - ubd = container_of(list, struct ubd, restart); - list_del_init(&ubd->restart); - spin_lock_irqsave(&ubd->lock, flags); - do_ubd_request(ubd->queue); - spin_unlock_irqrestore(&ubd->lock, flags); - } + reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -857,6 +850,7 @@ static void ubd_device_release(struct device *dev) struct ubd *ubd_dev = dev_get_drvdata(dev); blk_cleanup_queue(ubd_dev->queue); + blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -899,6 +893,11 @@ static int ubd_disk_register(int major, u64 size, int unit, #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, + .init_request = ubd_init_request, +}; + static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; @@ -915,15 +914,24 @@ static int ubd_add(int n, char **error_out) ubd_dev->size = ROUND_BLOCK(ubd_dev->size); - INIT_LIST_HEAD(&ubd_dev->restart); - sg_init_table(ubd_dev->sg, MAX_SG); + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.cmd_size = sizeof(struct ubd_pdu); + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; - err = -ENOMEM; - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); - if (ubd_dev->queue == NULL) { - *error_out = "Failed to initialize device queue"; + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) goto out; + + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); + if (IS_ERR(ubd_dev->queue)) { + err = PTR_ERR(ubd_dev->queue); + goto out_cleanup; } + ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); @@ -931,7 +939,7 @@ static int ubd_add(int n, char **error_out) err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; - goto out_cleanup; + goto out_cleanup_tags; } if (fake_major != UBD_MAJOR) @@ -949,6 +957,8 @@ static int ubd_add(int n, char **error_out) out: return err; +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); out_cleanup: blk_cleanup_queue(ubd_dev->queue); goto out; @@ -1333,80 +1343,78 @@ static void prepare_flush_request(struct request *req, io_req->op = UBD_FLUSH; } -static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) +static void submit_request(struct io_thread_req *io_req, struct ubd *dev) { int n = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (n != sizeof(io_req)) { if (n != -EAGAIN) - printk("write to io thread failed, " - "errno = %d\n", -n); - else if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); + pr_err("write to io thread failed: %d\n", -n); + blk_mq_requeue_request(io_req->req, true); kfree(io_req); - return false; } - return true; } -/* Called with dev->lock held */ -static void do_ubd_request(struct request_queue *q) +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request *req = bd->rq; + struct ubd *dev = hctx->queue->queuedata; + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); struct io_thread_req *io_req; - struct request *req; - while(1){ - struct ubd *dev = q->queuedata; - if(dev->request == NULL){ - struct request *req = blk_fetch_request(q); - if(req == NULL) - return; + blk_mq_start_request(req); + + pdu->rq_pos = blk_rq_pos(req); + pdu->start_sg = 0; + pdu->end_sg = blk_rq_map_sg(req->q, req, pdu->sg); - dev->request = req; - dev->rq_pos = blk_rq_pos(req); - dev->start_sg = 0; - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); + if (req_op(req) == REQ_OP_FLUSH) { + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (io_req == NULL) { + blk_mq_requeue_request(req, true); + goto done; } + prepare_flush_request(req, io_req); + submit_request(io_req, dev); - req = dev->request; + goto done; + } - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_flush_request(req, io_req); - if (submit_request(io_req, dev) == false) - return; + while (pdu->start_sg < pdu->end_sg) { + struct scatterlist *sg = &pdu->sg[pdu->start_sg]; + + io_req = kmalloc(sizeof(struct io_thread_req), + GFP_ATOMIC); + if (io_req == NULL) { + blk_mq_requeue_request(req, true); + goto done; } + prepare_request(req, io_req, + (unsigned long long)pdu->rq_pos << 9, + sg->offset, sg->length, sg_page(sg)); - while(dev->start_sg < dev->end_sg){ - struct scatterlist *sg = &dev->sg[dev->start_sg]; + submit_request(io_req, dev); - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if(io_req == NULL){ - if(list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_request(req, io_req, - (unsigned long long)dev->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); + pdu->rq_pos += sg->length >> 9; + pdu->start_sg++; + } - if (submit_request(io_req, dev) == false) - return; +done: + return BLK_STS_OK; +} - dev->rq_pos += sg->length >> 9; - dev->start_sg++; - } - dev->end_sg = 0; - dev->request = NULL; - } +static int ubd_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) +{ + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); + + sg_init_table(pdu->sg, MAX_SG); + + return 0; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -- cgit From e50b1e327aeb4b224364aa6f85c1713ff8b3654b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Oct 2018 17:58:17 -0600 Subject: null_blk: remove legacy IO path We're planning on removing this code completely, kill the old path. Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 107 +++--------------------------------------- 1 file changed, 6 insertions(+), 101 deletions(-) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 093b614d6524..0b302d707095 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -616,10 +616,6 @@ static void end_cmd(struct nullb_cmd *cmd) case NULL_Q_MQ: blk_mq_end_request(cmd->rq, cmd->error); return; - case NULL_Q_RQ: - INIT_LIST_HEAD(&cmd->rq->queuelist); - blk_end_request_all(cmd->rq, cmd->error); - break; case NULL_Q_BIO: cmd->bio->bi_status = cmd->error; bio_endio(cmd->bio); @@ -627,15 +623,6 @@ static void end_cmd(struct nullb_cmd *cmd) } free_cmd(cmd); - - /* Restart queue if needed, as we are freeing a tag */ - if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } } static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) @@ -1136,25 +1123,14 @@ static void null_stop_queue(struct nullb *nullb) if (nullb->dev->queue_mode == NULL_Q_MQ) blk_mq_stop_hw_queues(q); - else { - spin_lock_irq(q->queue_lock); - blk_stop_queue(q); - spin_unlock_irq(q->queue_lock); - } } static void null_restart_queue_async(struct nullb *nullb) { struct request_queue *q = nullb->q; - unsigned long flags; if (nullb->dev->queue_mode == NULL_Q_MQ) blk_mq_start_stopped_hw_queues(q, true); - else { - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } } static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd) @@ -1197,17 +1173,8 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) /* race with timer */ if (atomic_long_read(&nullb->cur_bytes) > 0) null_restart_queue_async(nullb); - if (dev->queue_mode == NULL_Q_RQ) { - struct request_queue *q = nullb->q; - - spin_lock_irq(q->queue_lock); - rq->rq_flags |= RQF_DONTPREP; - blk_requeue_request(q, rq); - spin_unlock_irq(q->queue_lock); - return BLK_STS_OK; - } else - /* requeue request */ - return BLK_STS_DEV_RESOURCE; + /* requeue request */ + return BLK_STS_DEV_RESOURCE; } } @@ -1278,9 +1245,6 @@ out: case NULL_Q_MQ: blk_mq_complete_request(cmd->rq); break; - case NULL_Q_RQ: - blk_complete_request(cmd->rq); - break; case NULL_Q_BIO: /* * XXX: no proper submitting cpu information available. @@ -1349,30 +1313,6 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } -static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) -{ - pr_info("null: rq %p timed out\n", rq); - __blk_complete_request(rq); - return BLK_EH_DONE; -} - -static int null_rq_prep_fn(struct request_queue *q, struct request *req) -{ - struct nullb *nullb = q->queuedata; - struct nullb_queue *nq = nullb_to_queue(nullb); - struct nullb_cmd *cmd; - - cmd = alloc_cmd(nq, 0); - if (cmd) { - cmd->rq = req; - req->special = cmd; - return BLKPREP_OK; - } - blk_stop_queue(q); - - return BLKPREP_DEFER; -} - static bool should_timeout_request(struct request *rq) { #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION @@ -1391,27 +1331,6 @@ static bool should_requeue_request(struct request *rq) return false; } -static void null_request_fn(struct request_queue *q) -{ - struct request *rq; - - while ((rq = blk_fetch_request(q)) != NULL) { - struct nullb_cmd *cmd = rq->special; - - /* just ignore the request */ - if (should_timeout_request(rq)) - continue; - if (should_requeue_request(rq)) { - blk_requeue_request(q, rq); - continue; - } - - spin_unlock_irq(q->queue_lock); - null_handle_cmd(cmd); - spin_lock_irq(q->queue_lock); - } -} - static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) { pr_info("null: rq %p timed out\n", rq); @@ -1766,24 +1685,6 @@ static int null_add_dev(struct nullb_device *dev) rv = init_driver_queues(nullb); if (rv) goto out_cleanup_blk_queue; - } else { - nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, - dev->home_node); - if (!nullb->q) { - rv = -ENOMEM; - goto out_cleanup_queues; - } - - if (!null_setup_fault()) - goto out_cleanup_blk_queue; - - blk_queue_prep_rq(nullb->q, null_rq_prep_fn); - blk_queue_softirq_done(nullb->q, null_softirq_done_fn); - blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn); - nullb->q->rq_timeout = 5 * HZ; - rv = init_driver_queues(nullb); - if (rv) - goto out_cleanup_blk_queue; } if (dev->mbps) { @@ -1865,6 +1766,10 @@ static int __init null_init(void) return -EINVAL; } + if (g_queue_mode == NULL_Q_RQ) { + pr_err("null_blk: legacy IO path no longer available\n"); + return -EINVAL; + } if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.\n", -- cgit From 5e27891e88555fecd8262e110e1a29feca4b0166 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Oct 2018 09:24:57 -0600 Subject: block: remove bogus check for queue_lock assignment We just allocated the queue and haven't even set it up yet, hence we know that checking if ->mq_ops is NULL is always going to be true. In fact we do need to assign a lock to ->queue_lock always, as we need it for the queue flags modifications. Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c5539eed0202..cdfabc5646da 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1054,8 +1054,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - if (!q->mq_ops) - q->queue_lock = lock ? : &q->__queue_lock; + q->queue_lock = lock ? : &q->__queue_lock; /* * A queue starts its life with bypass turned on to avoid -- cgit From 8f94004e2a51a3ea195cf3447eb5d5906f36d8b3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Oct 2018 13:20:48 -0600 Subject: cdrom: don't attempt to fiddle with cdo->capability We can't modify cdo->capability as it is defined as a const. Change the modification hack to just WARN_ON_ONCE() if we hit any of the invalid combinations. This fixes a regression for pcd, which doesn't work after the constify patch. Fixes: 853fe1bf7554 ("cdrom: Make device operations read-only") Tested-by: Ondrej Zary Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 10802d1fc554..614ecdbb4ab7 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -410,10 +410,10 @@ static int cdrom_get_disc_info(struct cdrom_device_info *cdi, * hack to have the capability flags defined const, while we can still * change it here without gcc complaining at every line. */ -#define ENSURE(call, bits) \ -do { \ - if (cdo->call == NULL) \ - *change_capability &= ~(bits); \ +#define ENSURE(cdo, call, bits) \ +do { \ + if (cdo->call == NULL) \ + WARN_ON_ONCE((cdo)->capability & (bits)); \ } while (0) /* @@ -589,7 +589,6 @@ int register_cdrom(struct cdrom_device_info *cdi) { static char banner_printed; const struct cdrom_device_ops *cdo = cdi->ops; - int *change_capability = (int *)&cdo->capability; /* hack */ cd_dbg(CD_OPEN, "entering register_cdrom\n"); @@ -601,16 +600,16 @@ int register_cdrom(struct cdrom_device_info *cdi) cdrom_sysctl_register(); } - ENSURE(drive_status, CDC_DRIVE_STATUS); + ENSURE(cdo, drive_status, CDC_DRIVE_STATUS); if (cdo->check_events == NULL && cdo->media_changed == NULL) - *change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC); - ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); - ENSURE(lock_door, CDC_LOCK); - ENSURE(select_speed, CDC_SELECT_SPEED); - ENSURE(get_last_session, CDC_MULTI_SESSION); - ENSURE(get_mcn, CDC_MCN); - ENSURE(reset, CDC_RESET); - ENSURE(generic_packet, CDC_GENERIC_PACKET); + WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC)); + ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); + ENSURE(cdo, lock_door, CDC_LOCK); + ENSURE(cdo, select_speed, CDC_SELECT_SPEED); + ENSURE(cdo, get_last_session, CDC_MULTI_SESSION); + ENSURE(cdo, get_mcn, CDC_MCN); + ENSURE(cdo, reset, CDC_RESET); + ENSURE(cdo, generic_packet, CDC_GENERIC_PACKET); cdi->mc_flags = 0; cdi->options = CDO_USE_FFLAGS; -- cgit From de038597be8809619c4f17d70e86dab936bb1a10 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 16 Oct 2018 01:45:26 +0000 Subject: null_blk: remove set but not used variable 'q' Fixes gcc '-Wunused-but-set-variable' warning: drivers/block/null_blk_main.c: In function 'end_cmd': drivers/block/null_blk_main.c:609:24: warning: variable 'q' set but not used [-Wunused-but-set-variable] It not used any more after commit e50b1e327aeb ("null_blk: remove legacy IO path") Signed-off-by: YueHaibing Signed-off-by: Jens Axboe --- drivers/block/null_blk_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 0b302d707095..e94591021682 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -606,12 +606,8 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) static void end_cmd(struct nullb_cmd *cmd) { - struct request_queue *q = NULL; int queue_mode = cmd->nq->dev->queue_mode; - if (cmd->rq) - q = cmd->rq->q; - switch (queue_mode) { case NULL_Q_MQ: blk_mq_end_request(cmd->rq, cmd->error); -- cgit From 9316a9ed6895c4ad2f0cde171d486f80c55d8283 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 08:40:37 -0600 Subject: blk-mq: provide helper for setting up an SQ queue and tag set This pattern is repeated throughout all the blk-mq conversions. Provide a basic helper to get it done. Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 33 +++++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 4 ++++ 2 files changed, 37 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index c2ecd64a2403..dcf10e39995a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2507,6 +2507,39 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) } EXPORT_SYMBOL(blk_mq_init_queue); +/* + * Helper for setting up a queue with mq ops, given queue depth, and + * the passed in mq ops flags. + */ +struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, + unsigned int queue_depth, + unsigned int set_flags) +{ + struct request_queue *q; + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->nr_hw_queues = 1; + set->queue_depth = queue_depth; + set->numa_node = NUMA_NO_NODE; + set->flags = set_flags; + + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ERR_PTR(ret); + + q = blk_mq_init_queue(set); + if (IS_ERR(q)) { + blk_mq_free_tag_set(set); + return q; + } + + return q; +} +EXPORT_SYMBOL(blk_mq_init_sq_queue); + static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) { int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1da59c16f637..2286dc12c6bc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -203,6 +203,10 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); +struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, + unsigned int queue_depth, + unsigned int set_flags); int blk_mq_register_dev(struct device *, struct request_queue *); void blk_mq_unregister_dev(struct device *, struct request_queue *); -- cgit From fab1adcf9503ccf191aec9ee42580f7dc1eb6237 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 13:32:01 -0600 Subject: ps3disk: convert to blk-mq Convert from the old request_fn style driver to blk-mq. Cc: Benjamin Herrenschmidt Tested-by: Geoff Levand Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 86 +++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 29a4419e8ba3..4e1d9b31f60c 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include @@ -42,6 +42,7 @@ struct ps3disk_private { spinlock_t lock; /* Request queue spinlock */ struct request_queue *queue; + struct blk_mq_tag_set tag_set; struct gendisk *gendisk; unsigned int blocking_factor; struct request *req; @@ -118,8 +119,8 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, } } -static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, - struct request *req) +static blk_status_t ps3disk_submit_request_sg(struct ps3_storage_device *dev, + struct request *req) { struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); int write = rq_data_dir(req), res; @@ -158,16 +159,15 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, __LINE__, op, res); - __blk_end_request_all(req, BLK_STS_IOERR); - return 0; + return BLK_STS_IOERR; } priv->req = req; - return 1; + return BLK_STS_OK; } -static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, - struct request *req) +static blk_status_t ps3disk_submit_flush_request(struct ps3_storage_device *dev, + struct request *req) { struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); u64 res; @@ -180,50 +180,45 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", __func__, __LINE__, res); - __blk_end_request_all(req, BLK_STS_IOERR); - return 0; + return BLK_STS_IOERR; } priv->req = req; - return 1; + return BLK_STS_OK; } -static void ps3disk_do_request(struct ps3_storage_device *dev, - struct request_queue *q) +static blk_status_t ps3disk_do_request(struct ps3_storage_device *dev, + struct request *req) { - struct request *req; - dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - while ((req = blk_fetch_request(q))) { - switch (req_op(req)) { - case REQ_OP_FLUSH: - if (ps3disk_submit_flush_request(dev, req)) - return; - break; - case REQ_OP_READ: - case REQ_OP_WRITE: - if (ps3disk_submit_request_sg(dev, req)) - return; - break; - default: - blk_dump_rq_flags(req, DEVICE_NAME " bad request"); - __blk_end_request_all(req, BLK_STS_IOERR); - } + switch (req_op(req)) { + case REQ_OP_FLUSH: + return ps3disk_submit_flush_request(dev, req); + case REQ_OP_READ: + case REQ_OP_WRITE: + return ps3disk_submit_request_sg(dev, req); + default: + blk_dump_rq_flags(req, DEVICE_NAME " bad request"); + return BLK_STS_IOERR; } } -static void ps3disk_request(struct request_queue *q) +static blk_status_t ps3disk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request_queue *q = hctx->queue; struct ps3_storage_device *dev = q->queuedata; struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd); + blk_status_t ret; - if (priv->req) { - dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__); - return; - } + blk_mq_start_request(bd->rq); + + spin_lock_irq(&priv->lock); + ret = ps3disk_do_request(dev, bd->rq); + spin_unlock_irq(&priv->lock); - ps3disk_do_request(dev, q); + return ret; } static irqreturn_t ps3disk_interrupt(int irq, void *data) @@ -280,11 +275,11 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) } spin_lock(&priv->lock); - __blk_end_request_all(req, error); priv->req = NULL; - ps3disk_do_request(dev, priv->queue); + blk_mq_end_request(req, error); spin_unlock(&priv->lock); + blk_mq_run_hw_queues(priv->queue, true); return IRQ_HANDLED; } @@ -404,6 +399,10 @@ static unsigned long ps3disk_mask; static DEFINE_MUTEX(ps3disk_mask_mutex); +static const struct blk_mq_ops ps3disk_mq_ops = { + .queue_rq = ps3disk_queue_rq, +}; + static int ps3disk_probe(struct ps3_system_bus_device *_dev) { struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core); @@ -454,11 +453,12 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) ps3disk_identify(dev); - queue = blk_init_queue(ps3disk_request, &priv->lock); - if (!queue) { - dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n", + queue = blk_mq_init_sq_queue(&priv->tag_set, &ps3disk_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(queue)) { + dev_err(&dev->sbd.core, "%s:%u: blk_mq_init_queue failed\n", __func__, __LINE__); - error = -ENOMEM; + error = PTR_ERR(queue); goto fail_teardown; } @@ -505,6 +505,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) fail_cleanup_queue: blk_cleanup_queue(queue); + blk_mq_free_tag_set(&priv->tag_set); fail_teardown: ps3stor_teardown(dev); fail_free_bounce: @@ -530,6 +531,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); blk_cleanup_queue(priv->queue); + blk_mq_free_tag_set(&priv->tag_set); put_disk(priv->gendisk); dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); ps3disk_sync_cache(dev); -- cgit From 89c6b16509693332fe13d4bb6b812134de56c16f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 08:38:52 -0600 Subject: paride: convert pcd to blk-mq Tested-by: Ondrej Zary Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 88 +++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a026211afb51..96670eefaeb2 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -137,7 +137,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; #include #include #include -#include +#include #include #include @@ -186,7 +186,8 @@ static int pcd_packet(struct cdrom_device_info *cdi, static int pcd_detect(void); static void pcd_probe_capabilities(void); static void do_pcd_read_drq(void); -static void do_pcd_request(struct request_queue * q); +static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); static void do_pcd_read(void); struct pcd_unit { @@ -199,6 +200,8 @@ struct pcd_unit { char *name; /* pcd0, pcd1, etc */ struct cdrom_device_info info; /* uniform cdrom interface */ struct gendisk *disk; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; }; static struct pcd_unit pcd[PCD_UNITS]; @@ -292,6 +295,10 @@ static const struct cdrom_device_ops pcd_dops = { CDC_CD_RW, }; +static const struct blk_mq_ops pcd_mq_ops = { + .queue_rq = pcd_queue_rq, +}; + static void pcd_init_units(void) { struct pcd_unit *cd; @@ -300,13 +307,19 @@ static void pcd_init_units(void) pcd_drive_count = 0; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { struct gendisk *disk = alloc_disk(1); + if (!disk) continue; - disk->queue = blk_init_queue(do_pcd_request, &pcd_lock); - if (!disk->queue) { - put_disk(disk); + + disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops, + 1, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { + disk->queue = NULL; continue; } + + INIT_LIST_HEAD(&cd->rq_list); + disk->queue->queuedata = cd; blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); cd->disk = disk; cd->pi = &cd->pia; @@ -748,18 +761,18 @@ static int pcd_queue; static int set_next_request(void) { struct pcd_unit *cd; - struct request_queue *q; int old_pos = pcd_queue; do { cd = &pcd[pcd_queue]; - q = cd->present ? cd->disk->queue : NULL; if (++pcd_queue == PCD_UNITS) pcd_queue = 0; - if (q) { - pcd_req = blk_fetch_request(q); - if (pcd_req) - break; + if (cd->present && !list_empty(&cd->rq_list)) { + pcd_req = list_first_entry(&cd->rq_list, struct request, + queuelist); + list_del_init(&pcd_req->queuelist); + blk_mq_start_request(pcd_req); + break; } } while (pcd_queue != old_pos); @@ -768,33 +781,41 @@ static int set_next_request(void) static void pcd_request(void) { + struct pcd_unit *cd; + if (pcd_busy) return; - while (1) { - if (!pcd_req && !set_next_request()) - return; - if (rq_data_dir(pcd_req) == READ) { - struct pcd_unit *cd = pcd_req->rq_disk->private_data; - if (cd != pcd_current) - pcd_bufblk = -1; - pcd_current = cd; - pcd_sector = blk_rq_pos(pcd_req); - pcd_count = blk_rq_cur_sectors(pcd_req); - pcd_buf = bio_data(pcd_req->bio); - pcd_busy = 1; - ps_set_intr(do_pcd_read, NULL, 0, nice); - return; - } else { - __blk_end_request_all(pcd_req, BLK_STS_IOERR); - pcd_req = NULL; - } - } + if (!pcd_req && !set_next_request()) + return; + + cd = pcd_req->rq_disk->private_data; + if (cd != pcd_current) + pcd_bufblk = -1; + pcd_current = cd; + pcd_sector = blk_rq_pos(pcd_req); + pcd_count = blk_rq_cur_sectors(pcd_req); + pcd_buf = bio_data(pcd_req->bio); + pcd_busy = 1; + ps_set_intr(do_pcd_read, NULL, 0, nice); } -static void do_pcd_request(struct request_queue *q) +static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct pcd_unit *cd = hctx->queue->queuedata; + + if (rq_data_dir(bd->rq) != READ) { + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; + } + + spin_lock_irq(&pcd_lock); + list_add_tail(&bd->rq->queuelist, &cd->rq_list); pcd_request(); + spin_unlock_irq(&pcd_lock); + + return BLK_STS_OK; } static inline void next_request(blk_status_t err) @@ -802,8 +823,10 @@ static inline void next_request(blk_status_t err) unsigned long saved_flags; spin_lock_irqsave(&pcd_lock, saved_flags); - if (!__blk_end_request_cur(pcd_req, err)) + if (!blk_update_request(pcd_req, err, blk_rq_cur_bytes(pcd_req))) { + __blk_mq_end_request(pcd_req, err); pcd_req = NULL; + } pcd_busy = 0; pcd_request(); spin_unlock_irqrestore(&pcd_lock, saved_flags); @@ -1011,6 +1034,7 @@ static void __exit pcd_exit(void) unregister_cdrom(&cd->info); } blk_cleanup_queue(cd->disk->queue); + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); } unregister_blkdev(major, name); -- cgit From 99fe8b02a82597a8fa8add6455d0a3037ca16e06 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 13:53:50 -0600 Subject: paride: convert pd to blk-mq Tested-by: Ondrej Zary Signed-off-by: Jens Axboe --- drivers/block/paride/pd.c | 94 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 28 deletions(-) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 7cf947586fe4..ae4971e5d9a8 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -151,7 +151,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; #include #include #include /* for the eject ioctl */ -#include +#include #include #include #include @@ -236,6 +236,8 @@ struct pd_unit { int alt_geom; char name[PD_NAMELEN]; /* pda, pdb, etc ... */ struct gendisk *gd; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; }; static struct pd_unit pd[PD_UNITS]; @@ -399,9 +401,17 @@ static int set_next_request(void) if (++pd_queue == PD_UNITS) pd_queue = 0; if (q) { - pd_req = blk_fetch_request(q); - if (pd_req) - break; + struct pd_unit *disk = q->queuedata; + + if (list_empty(&disk->rq_list)) + continue; + + pd_req = list_first_entry(&disk->rq_list, + struct request, + queuelist); + list_del_init(&pd_req->queuelist); + blk_mq_start_request(pd_req); + break; } } while (pd_queue != old_pos); @@ -412,7 +422,6 @@ static void run_fsm(void) { while (1) { enum action res; - unsigned long saved_flags; int stop = 0; if (!phase) { @@ -433,19 +442,24 @@ static void run_fsm(void) } switch(res = phase()) { - case Ok: case Fail: + case Ok: case Fail: { + blk_status_t err; + + err = res == Ok ? 0 : BLK_STS_IOERR; pi_disconnect(pi_current); pd_claimed = 0; phase = NULL; - spin_lock_irqsave(&pd_lock, saved_flags); - if (!__blk_end_request_cur(pd_req, - res == Ok ? 0 : BLK_STS_IOERR)) { - if (!set_next_request()) - stop = 1; + spin_lock_irq(&pd_lock); + if (!blk_update_request(pd_req, err, + blk_rq_cur_bytes(pd_req))) { + __blk_mq_end_request(pd_req, err); + pd_req = NULL; + stop = !set_next_request(); } - spin_unlock_irqrestore(&pd_lock, saved_flags); + spin_unlock_irq(&pd_lock); if (stop) return; + } /* fall through */ case Hold: schedule_fsm(); @@ -505,11 +519,17 @@ static int pd_next_buf(void) if (pd_count) return 0; spin_lock_irqsave(&pd_lock, saved_flags); - __blk_end_request_cur(pd_req, 0); - pd_count = blk_rq_cur_sectors(pd_req); - pd_buf = bio_data(pd_req->bio); + if (!blk_update_request(pd_req, 0, blk_rq_cur_bytes(pd_req))) { + __blk_mq_end_request(pd_req, 0); + pd_req = NULL; + pd_count = 0; + pd_buf = NULL; + } else { + pd_count = blk_rq_cur_sectors(pd_req); + pd_buf = bio_data(pd_req->bio); + } spin_unlock_irqrestore(&pd_lock, saved_flags); - return 0; + return !pd_count; } static unsigned long pd_timeout; @@ -726,15 +746,21 @@ static enum action pd_identify(struct pd_unit *disk) /* end of io request engine */ -static void do_pd_request(struct request_queue * q) +static blk_status_t pd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - if (pd_req) - return; - pd_req = blk_fetch_request(q); - if (!pd_req) - return; + struct pd_unit *disk = hctx->queue->queuedata; + + spin_lock_irq(&pd_lock); + if (!pd_req) { + pd_req = bd->rq; + blk_mq_start_request(pd_req); + } else + list_add_tail(&bd->rq->queuelist, &disk->rq_list); + spin_unlock_irq(&pd_lock); - schedule_fsm(); + run_fsm(); + return BLK_STS_OK; } static int pd_special_command(struct pd_unit *disk, @@ -847,23 +873,33 @@ static const struct block_device_operations pd_fops = { /* probing */ +static const struct blk_mq_ops pd_mq_ops = { + .queue_rq = pd_queue_rq, +}; + static void pd_probe_drive(struct pd_unit *disk) { - struct gendisk *p = alloc_disk(1 << PD_BITS); + struct gendisk *p; + + p = alloc_disk(1 << PD_BITS); if (!p) return; + strcpy(p->disk_name, disk->name); p->fops = &pd_fops; p->major = major; p->first_minor = (disk - pd) << PD_BITS; disk->gd = p; p->private_data = disk; - p->queue = blk_init_queue(do_pd_request, &pd_lock); - if (!p->queue) { - disk->gd = NULL; - put_disk(p); + + p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + if (IS_ERR(p->queue)) { + p->queue = NULL; return; } + + p->queue->queuedata = disk; blk_queue_max_hw_sectors(p->queue, cluster); blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH); @@ -895,6 +931,7 @@ static int pd_detect(void) disk->standby = parm[D_SBY]; if (parm[D_PRT]) pd_drive_count++; + INIT_LIST_HEAD(&disk->rq_list); } par_drv = pi_register_driver(name); @@ -972,6 +1009,7 @@ static void __exit pd_exit(void) disk->gd = NULL; del_gendisk(p); blk_cleanup_queue(p->queue); + blk_mq_free_tag_set(&disk->tag_set); put_disk(p); pi_release(disk->pi); } -- cgit From 77218ddf46d8d71161776a37739cd1723706ec95 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 08:38:08 -0600 Subject: paride: convert pf to blk-mq Tested-by: Ondrej Zary Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 56 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index eef7a91f667d..e92e7a8eeeb2 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -152,7 +152,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY}; #include #include #include -#include +#include #include #include #include @@ -206,7 +206,8 @@ module_param_array(drive3, int, NULL, 0); #define ATAPI_WRITE_10 0x2a static int pf_open(struct block_device *bdev, fmode_t mode); -static void do_pf_request(struct request_queue * q); +static blk_status_t pf_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); static int pf_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); @@ -238,6 +239,8 @@ struct pf_unit { int present; /* device present ? */ char name[PF_NAMELEN]; /* pf0, pf1, ... */ struct gendisk *disk; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; }; static struct pf_unit units[PF_UNITS]; @@ -277,6 +280,10 @@ static const struct block_device_operations pf_fops = { .check_events = pf_check_events, }; +static const struct blk_mq_ops pf_mq_ops = { + .queue_rq = pf_queue_rq, +}; + static void __init pf_init_units(void) { struct pf_unit *pf; @@ -284,14 +291,22 @@ static void __init pf_init_units(void) pf_drive_count = 0; for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) { - struct gendisk *disk = alloc_disk(1); + struct gendisk *disk; + + disk = alloc_disk(1); if (!disk) continue; - disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock); - if (!disk->queue) { + + disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops, + 1, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { put_disk(disk); - return; + disk->queue = NULL; + continue; } + + INIT_LIST_HEAD(&pf->rq_list); + disk->queue->queuedata = pf; blk_queue_max_segments(disk->queue, cluster); blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); pf->disk = disk; @@ -784,18 +799,18 @@ static int pf_queue; static int set_next_request(void) { struct pf_unit *pf; - struct request_queue *q; int old_pos = pf_queue; do { pf = &units[pf_queue]; - q = pf->present ? pf->disk->queue : NULL; if (++pf_queue == PF_UNITS) pf_queue = 0; - if (q) { - pf_req = blk_fetch_request(q); - if (pf_req) - break; + if (pf->present && !list_empty(&pf->rq_list)) { + pf_req = list_first_entry(&pf->rq_list, struct request, + queuelist); + list_del_init(&pf_req->queuelist); + blk_mq_start_request(pf_req); + break; } } while (pf_queue != old_pos); @@ -804,8 +819,12 @@ static int set_next_request(void) static void pf_end_request(blk_status_t err) { - if (pf_req && !__blk_end_request_cur(pf_req, err)) + if (!pf_req) + return; + if (!blk_update_request(pf_req, err, blk_rq_cur_bytes(pf_req))) { + __blk_mq_end_request(pf_req, err); pf_req = NULL; + } } static void pf_request(void) @@ -842,9 +861,17 @@ repeat: } } -static void do_pf_request(struct request_queue *q) +static blk_status_t pf_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct pf_unit *pf = hctx->queue->queuedata; + + spin_lock_irq(&pf_spin_lock); + list_add_tail(&bd->rq->queuelist, &pf->rq_list); pf_request(); + spin_unlock_irq(&pf_spin_lock); + + return BLK_STS_OK; } static int pf_next_buf(void) @@ -1024,6 +1051,7 @@ static void __exit pf_exit(void) continue; del_gendisk(pf->disk); blk_cleanup_queue(pf->disk->queue); + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); pi_release(pf->pi); } -- cgit From 804186fa95c871aba621da280284d3bc46746d50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 09:05:59 -0600 Subject: xsysace: convert to blk-mq Straight forward conversion, using an internal list to enable the driver to pull requests at will. Acked-by: Michal Simek Signed-off-by: Jens Axboe --- drivers/block/xsysace.c | 80 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index c24589414c75..87ccef4bd69e 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -88,7 +88,7 @@ #include #include #include -#include +#include #include #include #include @@ -209,6 +209,8 @@ struct ace_device { struct device *dev; struct request_queue *queue; struct gendisk *gd; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; /* Inserted CF card parameters */ u16 cf_id[ATA_ID_WORDS]; @@ -462,18 +464,26 @@ static inline void ace_fsm_yieldirq(struct ace_device *ace) ace->fsm_continue_flag = 0; } +static bool ace_has_next_request(struct request_queue *q) +{ + struct ace_device *ace = q->queuedata; + + return !list_empty(&ace->rq_list); +} + /* Get the next read/write request; ending requests that we don't handle */ static struct request *ace_get_next_request(struct request_queue *q) { - struct request *req; + struct ace_device *ace = q->queuedata; + struct request *rq; - while ((req = blk_peek_request(q)) != NULL) { - if (!blk_rq_is_passthrough(req)) - break; - blk_start_request(req); - __blk_end_request_all(req, BLK_STS_IOERR); + rq = list_first_entry_or_null(&ace->rq_list, struct request, queuelist); + if (rq) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); } - return req; + + return NULL; } static void ace_fsm_dostate(struct ace_device *ace) @@ -499,11 +509,11 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Drop all in-flight and pending requests */ if (ace->req) { - __blk_end_request_all(ace->req, BLK_STS_IOERR); + blk_mq_end_request(ace->req, BLK_STS_IOERR); ace->req = NULL; } - while ((req = blk_fetch_request(ace->queue)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); + while ((req = ace_get_next_request(ace->queue)) != NULL) + blk_mq_end_request(req, BLK_STS_IOERR); /* Drop back to IDLE state and notify waiters */ ace->fsm_state = ACE_FSM_STATE_IDLE; @@ -517,7 +527,7 @@ static void ace_fsm_dostate(struct ace_device *ace) switch (ace->fsm_state) { case ACE_FSM_STATE_IDLE: /* See if there is anything to do */ - if (ace->id_req_count || ace_get_next_request(ace->queue)) { + if (ace->id_req_count || ace_has_next_request(ace->queue)) { ace->fsm_iter_num++; ace->fsm_state = ACE_FSM_STATE_REQ_LOCK; mod_timer(&ace->stall_timer, jiffies + HZ); @@ -651,7 +661,6 @@ static void ace_fsm_dostate(struct ace_device *ace) ace->fsm_state = ACE_FSM_STATE_IDLE; break; } - blk_start_request(req); /* Okay, it's a data request, set it up for transfer */ dev_dbg(ace->dev, @@ -728,7 +737,8 @@ static void ace_fsm_dostate(struct ace_device *ace) } /* bio finished; is there another one? */ - if (__blk_end_request_cur(ace->req, BLK_STS_OK)) { + if (blk_update_request(ace->req, BLK_STS_OK, + blk_rq_cur_bytes(ace->req))) { /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", * blk_rq_sectors(ace->req), * blk_rq_cur_sectors(ace->req)); @@ -854,17 +864,23 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) /* --------------------------------------------------------------------- * Block ops */ -static void ace_request(struct request_queue * q) +static blk_status_t ace_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; - struct ace_device *ace; - - req = ace_get_next_request(q); + struct ace_device *ace = hctx->queue->queuedata; + struct request *req = bd->rq; - if (req) { - ace = req->rq_disk->private_data; - tasklet_schedule(&ace->fsm_tasklet); + if (blk_rq_is_passthrough(req)) { + blk_mq_start_request(req); + return BLK_STS_IOERR; } + + spin_lock_irq(&ace->lock); + list_add_tail(&req->queuelist, &ace->rq_list); + spin_unlock_irq(&ace->lock); + + tasklet_schedule(&ace->fsm_tasklet); + return BLK_STS_OK; } static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing) @@ -957,6 +973,10 @@ static const struct block_device_operations ace_fops = { .getgeo = ace_getgeo, }; +static const struct blk_mq_ops ace_mq_ops = { + .queue_rq = ace_queue_rq, +}; + /* -------------------------------------------------------------------- * SystemACE device setup/teardown code */ @@ -972,6 +992,7 @@ static int ace_setup(struct ace_device *ace) spin_lock_init(&ace->lock); init_completion(&ace->id_completion); + INIT_LIST_HEAD(&ace->rq_list); /* * Map the device @@ -989,9 +1010,15 @@ static int ace_setup(struct ace_device *ace) /* * Initialize the request queue */ - ace->queue = blk_init_queue(ace_request, &ace->lock); - if (ace->queue == NULL) + ace->queue = blk_mq_init_sq_queue(&ace->tag_set, &ace_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(ace->queue)) { + rc = PTR_ERR(ace->queue); + ace->queue = NULL; goto err_blk_initq; + } + ace->queue->queuedata = ace; + blk_queue_logical_block_size(ace->queue, 512); blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH); @@ -1066,6 +1093,7 @@ err_read: put_disk(ace->gd); err_alloc_disk: blk_cleanup_queue(ace->queue); + blk_mq_free_tag_set(&ace->tag_set); err_blk_initq: iounmap(ace->baseaddr); err_ioremap: @@ -1081,8 +1109,10 @@ static void ace_teardown(struct ace_device *ace) put_disk(ace->gd); } - if (ace->queue) + if (ace->queue) { blk_cleanup_queue(ace->queue); + blk_mq_free_tag_set(&ace->tag_set); + } tasklet_kill(&ace->fsm_tasklet); -- cgit From 891b7c5fbf619d2a314e424775b3c232eb227e90 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Oct 2018 08:09:58 -0600 Subject: mtd_blkdevs: convert to blk-mq Straight forward conversion, using an internal list to enable the driver to pull requests at will. Dynamically allocate the tag set to avoid having to pull in the block headers for blktrans.h, since various mtd drivers use block conflicting names for defines and functions. Cc: David Woodhouse Cc: linux-mtd@lists.infradead.org Tested-by: Richard Weinberger Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 100 +++++++++++++++++++++++++++---------------- include/linux/mtd/blktrans.h | 5 +-- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 6a41dfa3c36b..b0d44f9214b0 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,8 @@ static void blktrans_dev_release(struct kref *kref) dev->disk->private_data = NULL; blk_cleanup_queue(dev->rq); + blk_mq_free_tag_set(dev->tag_set); + kfree(dev->tag_set); put_disk(dev->disk); list_del(&dev->list); kfree(dev); @@ -134,28 +137,39 @@ int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev) } EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background); -static void mtd_blktrans_work(struct work_struct *work) +static struct request *mtd_next_request(struct mtd_blktrans_dev *dev) +{ + struct request *rq; + + rq = list_first_entry_or_null(&dev->rq_list, struct request, queuelist); + if (rq) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); + return rq; + } + + return NULL; +} + +static void mtd_blktrans_work(struct mtd_blktrans_dev *dev) + __releases(&dev->queue_lock) + __acquires(&dev->queue_lock) { - struct mtd_blktrans_dev *dev = - container_of(work, struct mtd_blktrans_dev, work); struct mtd_blktrans_ops *tr = dev->tr; - struct request_queue *rq = dev->rq; struct request *req = NULL; int background_done = 0; - spin_lock_irq(rq->queue_lock); - while (1) { blk_status_t res; dev->bg_stop = false; - if (!req && !(req = blk_fetch_request(rq))) { + if (!req && !(req = mtd_next_request(dev))) { if (tr->background && !background_done) { - spin_unlock_irq(rq->queue_lock); + spin_unlock_irq(&dev->queue_lock); mutex_lock(&dev->lock); tr->background(dev); mutex_unlock(&dev->lock); - spin_lock_irq(rq->queue_lock); + spin_lock_irq(&dev->queue_lock); /* * Do background processing just once per idle * period. @@ -166,35 +180,39 @@ static void mtd_blktrans_work(struct work_struct *work) break; } - spin_unlock_irq(rq->queue_lock); + spin_unlock_irq(&dev->queue_lock); mutex_lock(&dev->lock); res = do_blktrans_request(dev->tr, dev, req); mutex_unlock(&dev->lock); - spin_lock_irq(rq->queue_lock); - - if (!__blk_end_request_cur(req, res)) + if (!blk_update_request(req, res, blk_rq_cur_bytes(req))) { + __blk_mq_end_request(req, res); req = NULL; + } background_done = 0; + spin_lock_irq(&dev->queue_lock); } - - spin_unlock_irq(rq->queue_lock); } -static void mtd_blktrans_request(struct request_queue *rq) +static blk_status_t mtd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { struct mtd_blktrans_dev *dev; - struct request *req = NULL; - dev = rq->queuedata; + dev = hctx->queue->queuedata; + if (!dev) { + blk_mq_start_request(bd->rq); + return BLK_STS_IOERR; + } + + spin_lock_irq(&dev->queue_lock); + list_add_tail(&bd->rq->queuelist, &dev->rq_list); + mtd_blktrans_work(dev); + spin_unlock_irq(&dev->queue_lock); - if (!dev) - while ((req = blk_fetch_request(rq)) != NULL) - __blk_end_request_all(req, BLK_STS_IOERR); - else - queue_work(dev->wq, &dev->work); + return BLK_STS_OK; } static int blktrans_open(struct block_device *bdev, fmode_t mode) @@ -329,6 +347,10 @@ static const struct block_device_operations mtd_block_ops = { .getgeo = blktrans_getgeo, }; +static const struct blk_mq_ops mtd_mq_ops = { + .queue_rq = mtd_queue_rq, +}; + int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) { struct mtd_blktrans_ops *tr = new->tr; @@ -416,11 +438,20 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) /* Create the request queue */ spin_lock_init(&new->queue_lock); - new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock); + INIT_LIST_HEAD(&new->rq_list); - if (!new->rq) + new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL); + if (!new->tag_set) goto error3; + new->rq = blk_mq_init_sq_queue(new->tag_set, &mtd_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + if (IS_ERR(new->rq)) { + ret = PTR_ERR(new->rq); + new->rq = NULL; + goto error4; + } + if (tr->flush) blk_queue_write_cache(new->rq, true, false); @@ -437,13 +468,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) gd->queue = new->rq; - /* Create processing workqueue */ - new->wq = alloc_workqueue("%s%d", 0, 0, - tr->name, new->mtd->index); - if (!new->wq) - goto error4; - INIT_WORK(&new->work, mtd_blktrans_work); - if (new->readonly) set_disk_ro(gd, 1); @@ -456,7 +480,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) } return 0; error4: - blk_cleanup_queue(new->rq); + kfree(new->tag_set); error3: put_disk(new->disk); error2: @@ -481,15 +505,17 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) /* Stop new requests to arrive */ del_gendisk(old->disk); - /* Stop workqueue. This will perform any pending request. */ - destroy_workqueue(old->wq); - /* Kill current requests */ spin_lock_irqsave(&old->queue_lock, flags); old->rq->queuedata = NULL; - blk_start_queue(old->rq); spin_unlock_irqrestore(&old->queue_lock, flags); + /* freeze+quiesce queue to ensure all requests are flushed */ + blk_mq_freeze_queue(old->rq); + blk_mq_quiesce_queue(old->rq); + blk_mq_unquiesce_queue(old->rq); + blk_mq_unfreeze_queue(old->rq); + /* If the device is currently open, tell trans driver to close it, then put mtd device, and don't touch it again */ mutex_lock(&old->lock); diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index e93837f647de..1d3ade69d39a 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -23,7 +23,6 @@ #include #include #include -#include struct hd_geometry; struct mtd_info; @@ -44,9 +43,9 @@ struct mtd_blktrans_dev { struct kref ref; struct gendisk *disk; struct attribute_group *disk_attributes; - struct workqueue_struct *wq; - struct work_struct work; struct request_queue *rq; + struct list_head rq_list; + struct blk_mq_tag_set *tag_set; spinlock_t queue_lock; void *priv; fmode_t file_mode; -- cgit From 1448a2a5360ae06f25e2edc61ae070dff5c0beb4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:41 -0700 Subject: swim: fix cleanup on setup error If we fail to allocate the request queue for a disk, we still need to free that disk, not just the previous ones. Additionally, we need to cleanup the previous request queues. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/swim.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 0e31884a9519..cbe909c51847 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -887,8 +887,17 @@ static int swim_floppy_init(struct swim_priv *swd) exit_put_disks: unregister_blkdev(FLOPPY_MAJOR, "fd"); - while (drive--) - put_disk(swd->unit[drive].disk); + do { + struct gendisk *disk = swd->unit[drive].disk; + + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(disk); + } + } while (drive--); return err; } -- cgit From e3896d77b7025c39150eb5ada33ec2c88f4ad445 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 15 Oct 2018 09:12:12 -0600 Subject: swim: convert to blk-mq The only interesting thing here is that there may be two floppies (i.e., request queues) sharing the same controller, so we use the global struct swim_priv->lock to check whether the controller is busy. Compile-tested only. Tested-by: Finn Thain Acked-by: Laurent Vivier Signed-off-by: Omar Sandoval Converted to blk_mq_init_sq_queue() Signed-off-by: Jens Axboe --- drivers/block/swim.c | 93 +++++++++++++++++++++++----------------------------- 1 file changed, 41 insertions(+), 52 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index cbe909c51847..3fa6fcc34790 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -190,6 +190,7 @@ struct floppy_state { int ref_count; struct gendisk *disk; + struct blk_mq_tag_set tag_set; /* parent controller */ @@ -211,7 +212,6 @@ enum head { struct swim_priv { struct swim __iomem *base; spinlock_t lock; - int fdc_queue; int floppy_count; struct floppy_state unit[FD_MAX_UNIT]; }; @@ -525,58 +525,36 @@ static blk_status_t floppy_read_sectors(struct floppy_state *fs, return 0; } -static struct request *swim_next_request(struct swim_priv *swd) +static blk_status_t swim_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request_queue *q; - struct request *rq; - int old_pos = swd->fdc_queue; + struct floppy_state *fs = hctx->queue->queuedata; + struct swim_priv *swd = fs->swd; + struct request *req = bd->rq; + blk_status_t err; - do { - q = swd->unit[swd->fdc_queue].disk->queue; - if (++swd->fdc_queue == swd->floppy_count) - swd->fdc_queue = 0; - if (q) { - rq = blk_fetch_request(q); - if (rq) - return rq; - } - } while (swd->fdc_queue != old_pos); + if (!spin_trylock_irq(&swd->lock)) + return BLK_STS_DEV_RESOURCE; - return NULL; -} + blk_mq_start_request(req); -static void do_fd_request(struct request_queue *q) -{ - struct swim_priv *swd = q->queuedata; - struct request *req; - struct floppy_state *fs; + if (!fs->disk_in || rq_data_dir(req) == WRITE) { + err = BLK_STS_IOERR; + goto out; + } - req = swim_next_request(swd); - while (req) { - blk_status_t err = BLK_STS_IOERR; + do { + err = floppy_read_sectors(fs, blk_rq_pos(req), + blk_rq_cur_sectors(req), + bio_data(req->bio)); + } while (blk_update_request(req, err, blk_rq_cur_bytes(req))); + __blk_mq_end_request(req, err); - fs = req->rq_disk->private_data; - if (blk_rq_pos(req) >= fs->total_secs) - goto done; - if (!fs->disk_in) - goto done; - if (rq_data_dir(req) == WRITE && fs->write_protected) - goto done; + err = BLK_STS_OK; +out: + spin_unlock_irq(&swd->lock); + return err; - switch (rq_data_dir(req)) { - case WRITE: - /* NOT IMPLEMENTED */ - break; - case READ: - err = floppy_read_sectors(fs, blk_rq_pos(req), - blk_rq_cur_sectors(req), - bio_data(req->bio)); - break; - } - done: - if (!__blk_end_request_cur(req, err)) - req = swim_next_request(swd); - } } static struct floppy_struct floppy_type[4] = { @@ -823,6 +801,10 @@ static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) return 0; } +static const struct blk_mq_ops swim_mq_ops = { + .queue_rq = swim_queue_rq, +}; + static int swim_floppy_init(struct swim_priv *swd) { int err; @@ -852,20 +834,25 @@ static int swim_floppy_init(struct swim_priv *swd) spin_lock_init(&swd->lock); for (drive = 0; drive < swd->floppy_count; drive++) { + struct request_queue *q; + swd->unit[drive].disk = alloc_disk(1); if (swd->unit[drive].disk == NULL) { err = -ENOMEM; goto exit_put_disks; } - swd->unit[drive].disk->queue = blk_init_queue(do_fd_request, - &swd->lock); - if (!swd->unit[drive].disk->queue) { - err = -ENOMEM; + + q = blk_mq_init_sq_queue(&swd->unit[drive].tag_set, &swim_mq_ops, + 2, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(q)) { + err = PTR_ERR(q); goto exit_put_disks; } + + swd->unit[drive].disk->queue = q; blk_queue_bounce_limit(swd->unit[drive].disk->queue, BLK_BOUNCE_HIGH); - swd->unit[drive].disk->queue->queuedata = swd; + swd->unit[drive].disk->queue->queuedata = &swd->unit[drive]; swd->unit[drive].swd = swd; } @@ -895,6 +882,7 @@ exit_put_disks: blk_cleanup_queue(disk->queue); disk->queue = NULL; } + blk_mq_free_tag_set(&swd->unit[drive].tag_set); put_disk(disk); } } while (drive--); @@ -970,6 +958,7 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); blk_cleanup_queue(swd->unit[drive].disk->queue); + blk_mq_free_tag_set(&swd->unit[drive].tag_set); put_disk(swd->unit[drive].disk); } -- cgit From dbaa54b65e7a06e6ff2192f8fe92ecd51accc766 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:43 -0700 Subject: swim3: add real error handling in setup The driver doesn't have support for removing a device that has already been configured, but with more careful ordering we can avoid the need for that and make sure that we don't leak generic resources. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 60 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 469541c1e51e..df7ebe016e2c 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1202,47 +1202,59 @@ static int swim3_add_device(struct macio_dev *mdev, int index) static int swim3_attach(struct macio_dev *mdev, const struct of_device_id *match) { + struct floppy_state *fs; struct gendisk *disk; - int index, rc; + int rc; - index = floppy_count++; - if (index >= MAX_FLOPPIES) + if (floppy_count >= MAX_FLOPPIES) return -ENXIO; - /* Add the drive */ - rc = swim3_add_device(mdev, index); - if (rc) - return rc; - /* Now register that disk. Same comment about failure handling */ - disk = disks[index] = alloc_disk(1); - if (disk == NULL) - return -ENOMEM; + if (floppy_count == 0) { + rc = register_blkdev(FLOPPY_MAJOR, "fd"); + if (rc) + return rc; + } + + fs = &floppy_states[floppy_count]; + + disk = alloc_disk(1); + if (disk == NULL) { + rc = -ENOMEM; + goto out_unregister; + } disk->queue = blk_init_queue(do_fd_request, &swim3_lock); if (disk->queue == NULL) { - put_disk(disk); - return -ENOMEM; + rc = -ENOMEM; + goto out_put_disk; } blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); - disk->queue->queuedata = &floppy_states[index]; + disk->queue->queuedata = fs; - if (index == 0) { - /* If we failed, there isn't much we can do as the driver is still - * too dumb to remove the device, just bail out - */ - if (register_blkdev(FLOPPY_MAJOR, "fd")) - return 0; - } + rc = swim3_add_device(mdev, floppy_count); + if (rc) + goto out_cleanup_queue; disk->major = FLOPPY_MAJOR; - disk->first_minor = index; + disk->first_minor = floppy_count; disk->fops = &floppy_fops; - disk->private_data = &floppy_states[index]; + disk->private_data = fs; disk->flags |= GENHD_FL_REMOVABLE; - sprintf(disk->disk_name, "fd%d", index); + sprintf(disk->disk_name, "fd%d", floppy_count); set_capacity(disk, 2880); add_disk(disk); + disks[floppy_count++] = disk; return 0; + +out_cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; +out_put_disk: + put_disk(disk); +out_unregister: + if (floppy_count == 0) + unregister_blkdev(FLOPPY_MAJOR, "fd"); + return rc; } static const struct of_device_id swim3_match[] = -- cgit From 8ccb8cb1892b27655247ff42867665763793ffe8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 15 Oct 2018 09:14:46 -0600 Subject: swim3: convert to blk-mq Pretty simple conversion. grab_drive() could probably be replaced by some freeze/quiesce incantation, but I left it alone, and just used freeze/quiesce for eject. Compile-tested only. Cc: Benjamin Herrenschmidt Signed-off-by: Omar Sandoval Converted to blk_mq_init_sq_queue(). Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 153 +++++++++++++++++++++----------------------------- 1 file changed, 65 insertions(+), 88 deletions(-) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index df7ebe016e2c..c1c676a33e4a 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -206,6 +206,7 @@ struct floppy_state { char dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)]; int index; struct request *cur_req; + struct blk_mq_tag_set tag_set; }; #define swim3_err(fmt, arg...) dev_err(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg) @@ -260,16 +261,15 @@ static int floppy_revalidate(struct gendisk *disk); static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes) { struct request *req = fs->cur_req; - int rc; swim3_dbg(" end request, err=%d nr_bytes=%d, cur_req=%p\n", err, nr_bytes, req); if (err) nr_bytes = blk_rq_cur_bytes(req); - rc = __blk_end_request(req, err, nr_bytes); - if (rc) + if (blk_update_request(req, err, nr_bytes)) return true; + __blk_mq_end_request(req, err); fs->cur_req = NULL; return false; } @@ -309,86 +309,58 @@ static int swim3_readbit(struct floppy_state *fs, int bit) return (stat & DATA) == 0; } -static void start_request(struct floppy_state *fs) +static blk_status_t swim3_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; + struct floppy_state *fs = hctx->queue->queuedata; + struct request *req = bd->rq; unsigned long x; - swim3_dbg("start request, initial state=%d\n", fs->state); - - if (fs->state == idle && fs->wanted) { - fs->state = available; - wake_up(&fs->wait); - return; + spin_lock_irq(&swim3_lock); + if (fs->cur_req || fs->state != idle) { + spin_unlock_irq(&swim3_lock); + return BLK_STS_DEV_RESOURCE; } - while (fs->state == idle) { - swim3_dbg("start request, idle loop, cur_req=%p\n", fs->cur_req); - if (!fs->cur_req) { - fs->cur_req = blk_fetch_request(disks[fs->index]->queue); - swim3_dbg(" fetched request %p\n", fs->cur_req); - if (!fs->cur_req) - break; - } - req = fs->cur_req; - - if (fs->mdev->media_bay && - check_media_bay(fs->mdev->media_bay) != MB_FD) { - swim3_dbg("%s", " media bay absent, dropping req\n"); - swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } - -#if 0 /* This is really too verbose */ - swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", - req->rq_disk->disk_name, req->cmd, - (long)blk_rq_pos(req), blk_rq_sectors(req), - bio_data(req->bio)); - swim3_dbg(" current_nr_sectors=%u\n", - blk_rq_cur_sectors(req)); -#endif - - if (blk_rq_pos(req) >= fs->total_secs) { - swim3_dbg(" pos out of bounds (%ld, max is %ld)\n", - (long)blk_rq_pos(req), (long)fs->total_secs); - swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } - if (fs->ejected) { - swim3_dbg("%s", " disk ejected\n"); + blk_mq_start_request(req); + fs->cur_req = req; + if (fs->mdev->media_bay && + check_media_bay(fs->mdev->media_bay) != MB_FD) { + swim3_dbg("%s", " media bay absent, dropping req\n"); + swim3_end_request(fs, BLK_STS_IOERR, 0); + goto out; + } + if (fs->ejected) { + swim3_dbg("%s", " disk ejected\n"); + swim3_end_request(fs, BLK_STS_IOERR, 0); + goto out; + } + if (rq_data_dir(req) == WRITE) { + if (fs->write_prot < 0) + fs->write_prot = swim3_readbit(fs, WRITE_PROT); + if (fs->write_prot) { + swim3_dbg("%s", " try to write, disk write protected\n"); swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } - - if (rq_data_dir(req) == WRITE) { - if (fs->write_prot < 0) - fs->write_prot = swim3_readbit(fs, WRITE_PROT); - if (fs->write_prot) { - swim3_dbg("%s", " try to write, disk write protected\n"); - swim3_end_request(fs, BLK_STS_IOERR, 0); - continue; - } + goto out; } - - /* Do not remove the cast. blk_rq_pos(req) is now a - * sector_t and can be 64 bits, but it will never go - * past 32 bits for this driver anyway, so we can - * safely cast it down and not have to do a 64/32 - * division - */ - fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl; - x = ((long)blk_rq_pos(req)) % fs->secpercyl; - fs->head = x / fs->secpertrack; - fs->req_sector = x % fs->secpertrack + 1; - fs->state = do_transfer; - fs->retries = 0; - - act(fs); } -} -static void do_fd_request(struct request_queue * q) -{ - start_request(q->queuedata); + /* + * Do not remove the cast. blk_rq_pos(req) is now a sector_t and can be + * 64 bits, but it will never go past 32 bits for this driver anyway, so + * we can safely cast it down and not have to do a 64/32 division + */ + fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl; + x = ((long)blk_rq_pos(req)) % fs->secpercyl; + fs->head = x / fs->secpertrack; + fs->req_sector = x % fs->secpertrack + 1; + fs->state = do_transfer; + fs->retries = 0; + + act(fs); + +out: + spin_unlock_irq(&swim3_lock); + return BLK_STS_OK; } static void set_timeout(struct floppy_state *fs, int nticks, @@ -585,7 +557,6 @@ static void scan_timeout(struct timer_list *t) if (fs->retries > 5) { swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); } else { fs->state = jogging; act(fs); @@ -609,7 +580,6 @@ static void seek_timeout(struct timer_list *t) swim3_err("%s", "Seek timeout\n"); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); spin_unlock_irqrestore(&swim3_lock, flags); } @@ -638,7 +608,6 @@ static void settle_timeout(struct timer_list *t) swim3_err("%s", "Seek settle timeout\n"); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); unlock: spin_unlock_irqrestore(&swim3_lock, flags); } @@ -667,7 +636,6 @@ static void xfer_timeout(struct timer_list *t) (long)blk_rq_pos(fs->cur_req)); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); spin_unlock_irqrestore(&swim3_lock, flags); } @@ -704,7 +672,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) if (fs->retries > 5) { swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); } else { fs->state = jogging; act(fs); @@ -796,7 +763,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) fs->state, rq_data_dir(req), intr, err); swim3_end_request(fs, BLK_STS_IOERR, 0); fs->state = idle; - start_request(fs); break; } fs->retries = 0; @@ -813,8 +779,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) } else fs->state = idle; } - if (fs->state == idle) - start_request(fs); break; default: swim3_err("Don't know what to do in state %d\n", fs->state); @@ -862,14 +826,19 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state, static void release_drive(struct floppy_state *fs) { + struct request_queue *q = disks[fs->index]->queue; unsigned long flags; swim3_dbg("%s", "-> release drive\n"); spin_lock_irqsave(&swim3_lock, flags); fs->state = idle; - start_request(fs); spin_unlock_irqrestore(&swim3_lock, flags); + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); } static int fd_eject(struct floppy_state *fs) @@ -1089,6 +1058,10 @@ static const struct block_device_operations floppy_fops = { .revalidate_disk= floppy_revalidate, }; +static const struct blk_mq_ops swim3_mq_ops = { + .queue_rq = swim3_queue_rq, +}; + static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); @@ -1222,9 +1195,12 @@ static int swim3_attach(struct macio_dev *mdev, rc = -ENOMEM; goto out_unregister; } - disk->queue = blk_init_queue(do_fd_request, &swim3_lock); - if (disk->queue == NULL) { - rc = -ENOMEM; + + disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disk->queue)) { + rc = PTR_ERR(disk->queue); + disk->queue = NULL; goto out_put_disk; } blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); @@ -1249,6 +1225,7 @@ static int swim3_attach(struct macio_dev *mdev, out_cleanup_queue: blk_cleanup_queue(disk->queue); disk->queue = NULL; + blk_mq_free_tag_set(&fs->tag_set); out_put_disk: put_disk(disk); out_unregister: -- cgit From c87228f16f0acdf242dee62f139013212208473f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:45 -0700 Subject: amiflop: fold headers into C file amifd.h and amifdreg.h are only used from amiflop.c, and they're pretty small, so move the contents to amiflop.c and get rid of the .h files. This is preparation for adding a struct blk_mq_tag_set to struct amiga_floppy_struct. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 123 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/amifd.h | 63 ------------------------ include/linux/amifdreg.h | 82 ------------------------------- 3 files changed, 120 insertions(+), 148 deletions(-) delete mode 100644 include/linux/amifd.h delete mode 100644 include/linux/amifdreg.h diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 3aaf6af3ec23..a7d6e6a9b12f 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -61,10 +61,8 @@ #include #include #include -#include -#include #include -#include +#include #include #include #include @@ -86,6 +84,125 @@ * Defines */ +/* + * CIAAPRA bits (read only) + */ + +#define DSKRDY (0x1<<5) /* disk ready when low */ +#define DSKTRACK0 (0x1<<4) /* head at track zero when low */ +#define DSKPROT (0x1<<3) /* disk protected when low */ +#define DSKCHANGE (0x1<<2) /* low when disk removed */ + +/* + * CIAAPRB bits (read/write) + */ + +#define DSKMOTOR (0x1<<7) /* motor on when low */ +#define DSKSEL3 (0x1<<6) /* select drive 3 when low */ +#define DSKSEL2 (0x1<<5) /* select drive 2 when low */ +#define DSKSEL1 (0x1<<4) /* select drive 1 when low */ +#define DSKSEL0 (0x1<<3) /* select drive 0 when low */ +#define DSKSIDE (0x1<<2) /* side selection: 0 = upper, 1 = lower */ +#define DSKDIREC (0x1<<1) /* step direction: 0=in, 1=out (to trk 0) */ +#define DSKSTEP (0x1) /* pulse low to step head 1 track */ + +/* + * DSKBYTR bits (read only) + */ + +#define DSKBYT (1<<15) /* register contains valid byte when set */ +#define DMAON (1<<14) /* disk DMA enabled */ +#define DISKWRITE (1<<13) /* disk write bit in DSKLEN enabled */ +#define WORDEQUAL (1<<12) /* DSKSYNC register match when true */ +/* bits 7-0 are data */ + +/* + * ADKCON/ADKCONR bits + */ + +#ifndef SETCLR +#define ADK_SETCLR (1<<15) /* control bit */ +#endif +#define ADK_PRECOMP1 (1<<14) /* precompensation selection */ +#define ADK_PRECOMP0 (1<<13) /* 00=none, 01=140ns, 10=280ns, 11=500ns */ +#define ADK_MFMPREC (1<<12) /* 0=GCR precomp., 1=MFM precomp. */ +#define ADK_WORDSYNC (1<<10) /* enable DSKSYNC auto DMA */ +#define ADK_MSBSYNC (1<<9) /* when 1, enable sync on MSbit (for GCR) */ +#define ADK_FAST (1<<8) /* bit cell: 0=2us (GCR), 1=1us (MFM) */ + +/* + * DSKLEN bits + */ + +#define DSKLEN_DMAEN (1<<15) +#define DSKLEN_WRITE (1<<14) + +/* + * INTENA/INTREQ bits + */ + +#define DSKINDEX (0x1<<4) /* DSKINDEX bit */ + +/* + * Misc + */ + +#define MFM_SYNC 0x4489 /* standard MFM sync value */ + +/* Values for FD_COMMAND */ +#define FD_RECALIBRATE 0x07 /* move to track 0 */ +#define FD_SEEK 0x0F /* seek track */ +#define FD_READ 0xE6 /* read with MT, MFM, SKip deleted */ +#define FD_WRITE 0xC5 /* write with MT, MFM */ +#define FD_SENSEI 0x08 /* Sense Interrupt Status */ +#define FD_SPECIFY 0x03 /* specify HUT etc */ +#define FD_FORMAT 0x4D /* format one track */ +#define FD_VERSION 0x10 /* get version code */ +#define FD_CONFIGURE 0x13 /* configure FIFO operation */ +#define FD_PERPENDICULAR 0x12 /* perpendicular r/w mode */ + +#define FD_MAX_UNITS 4 /* Max. Number of drives */ +#define FLOPPY_MAX_SECTORS 22 /* Max. Number of sectors per track */ + +struct fd_data_type { + char *name; /* description of data type */ + int sects; /* sectors per track */ + int (*read_fkt)(int); /* read whole track */ + void (*write_fkt)(int); /* write whole track */ +}; + +struct fd_drive_type { + unsigned long code; /* code returned from drive */ + char *name; /* description of drive */ + unsigned int tracks; /* number of tracks */ + unsigned int heads; /* number of heads */ + unsigned int read_size; /* raw read size for one track */ + unsigned int write_size; /* raw write size for one track */ + unsigned int sect_mult; /* sectors and gap multiplier (HD = 2) */ + unsigned int precomp1; /* start track for precomp 1 */ + unsigned int precomp2; /* start track for precomp 2 */ + unsigned int step_delay; /* time (in ms) for delay after step */ + unsigned int settle_time; /* time to settle after dir change */ + unsigned int side_time; /* time needed to change sides */ +}; + +struct amiga_floppy_struct { + struct fd_drive_type *type; /* type of floppy for this unit */ + struct fd_data_type *dtype; /* type of floppy for this unit */ + int track; /* current track (-1 == unknown) */ + unsigned char *trackbuf; /* current track (kmaloc()'d */ + + int blocks; /* total # blocks on disk */ + + int changed; /* true when not known */ + int disk; /* disk in drive (-1 == unknown) */ + int motor; /* true when motor is at speed */ + int busy; /* true when drive is active */ + int dirty; /* true when trackbuf is not on disk */ + int status; /* current error code for unit */ + struct gendisk *gendisk; +}; + /* * Error codes */ diff --git a/include/linux/amifd.h b/include/linux/amifd.h deleted file mode 100644 index 202a77dbe46d..000000000000 --- a/include/linux/amifd.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _AMIFD_H -#define _AMIFD_H - -/* Definitions for the Amiga floppy driver */ - -#include - -#define FD_MAX_UNITS 4 /* Max. Number of drives */ -#define FLOPPY_MAX_SECTORS 22 /* Max. Number of sectors per track */ - -#ifndef ASSEMBLER - -struct fd_data_type { - char *name; /* description of data type */ - int sects; /* sectors per track */ -#ifdef __STDC__ - int (*read_fkt)(int); - void (*write_fkt)(int); -#else - int (*read_fkt)(); /* read whole track */ - void (*write_fkt)(); /* write whole track */ -#endif -}; - -/* -** Floppy type descriptions -*/ - -struct fd_drive_type { - unsigned long code; /* code returned from drive */ - char *name; /* description of drive */ - unsigned int tracks; /* number of tracks */ - unsigned int heads; /* number of heads */ - unsigned int read_size; /* raw read size for one track */ - unsigned int write_size; /* raw write size for one track */ - unsigned int sect_mult; /* sectors and gap multiplier (HD = 2) */ - unsigned int precomp1; /* start track for precomp 1 */ - unsigned int precomp2; /* start track for precomp 2 */ - unsigned int step_delay; /* time (in ms) for delay after step */ - unsigned int settle_time; /* time to settle after dir change */ - unsigned int side_time; /* time needed to change sides */ -}; - -struct amiga_floppy_struct { - struct fd_drive_type *type; /* type of floppy for this unit */ - struct fd_data_type *dtype; /* type of floppy for this unit */ - int track; /* current track (-1 == unknown) */ - unsigned char *trackbuf; /* current track (kmaloc()'d */ - - int blocks; /* total # blocks on disk */ - - int changed; /* true when not known */ - int disk; /* disk in drive (-1 == unknown) */ - int motor; /* true when motor is at speed */ - int busy; /* true when drive is active */ - int dirty; /* true when trackbuf is not on disk */ - int status; /* current error code for unit */ - struct gendisk *gendisk; -}; -#endif - -#endif diff --git a/include/linux/amifdreg.h b/include/linux/amifdreg.h deleted file mode 100644 index 9b514d05ec70..000000000000 --- a/include/linux/amifdreg.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_AMIFDREG_H -#define _LINUX_AMIFDREG_H - -/* -** CIAAPRA bits (read only) -*/ - -#define DSKRDY (0x1<<5) /* disk ready when low */ -#define DSKTRACK0 (0x1<<4) /* head at track zero when low */ -#define DSKPROT (0x1<<3) /* disk protected when low */ -#define DSKCHANGE (0x1<<2) /* low when disk removed */ - -/* -** CIAAPRB bits (read/write) -*/ - -#define DSKMOTOR (0x1<<7) /* motor on when low */ -#define DSKSEL3 (0x1<<6) /* select drive 3 when low */ -#define DSKSEL2 (0x1<<5) /* select drive 2 when low */ -#define DSKSEL1 (0x1<<4) /* select drive 1 when low */ -#define DSKSEL0 (0x1<<3) /* select drive 0 when low */ -#define DSKSIDE (0x1<<2) /* side selection: 0 = upper, 1 = lower */ -#define DSKDIREC (0x1<<1) /* step direction: 0=in, 1=out (to trk 0) */ -#define DSKSTEP (0x1) /* pulse low to step head 1 track */ - -/* -** DSKBYTR bits (read only) -*/ - -#define DSKBYT (1<<15) /* register contains valid byte when set */ -#define DMAON (1<<14) /* disk DMA enabled */ -#define DISKWRITE (1<<13) /* disk write bit in DSKLEN enabled */ -#define WORDEQUAL (1<<12) /* DSKSYNC register match when true */ -/* bits 7-0 are data */ - -/* -** ADKCON/ADKCONR bits -*/ - -#ifndef SETCLR -#define ADK_SETCLR (1<<15) /* control bit */ -#endif -#define ADK_PRECOMP1 (1<<14) /* precompensation selection */ -#define ADK_PRECOMP0 (1<<13) /* 00=none, 01=140ns, 10=280ns, 11=500ns */ -#define ADK_MFMPREC (1<<12) /* 0=GCR precomp., 1=MFM precomp. */ -#define ADK_WORDSYNC (1<<10) /* enable DSKSYNC auto DMA */ -#define ADK_MSBSYNC (1<<9) /* when 1, enable sync on MSbit (for GCR) */ -#define ADK_FAST (1<<8) /* bit cell: 0=2us (GCR), 1=1us (MFM) */ - -/* -** DSKLEN bits -*/ - -#define DSKLEN_DMAEN (1<<15) -#define DSKLEN_WRITE (1<<14) - -/* -** INTENA/INTREQ bits -*/ - -#define DSKINDEX (0x1<<4) /* DSKINDEX bit */ - -/* -** Misc -*/ - -#define MFM_SYNC 0x4489 /* standard MFM sync value */ - -/* Values for FD_COMMAND */ -#define FD_RECALIBRATE 0x07 /* move to track 0 */ -#define FD_SEEK 0x0F /* seek track */ -#define FD_READ 0xE6 /* read with MT, MFM, SKip deleted */ -#define FD_WRITE 0xC5 /* write with MT, MFM */ -#define FD_SENSEI 0x08 /* Sense Interrupt Status */ -#define FD_SPECIFY 0x03 /* specify HUT etc */ -#define FD_FORMAT 0x4D /* format one track */ -#define FD_VERSION 0x10 /* get version code */ -#define FD_CONFIGURE 0x13 /* configure FIFO operation */ -#define FD_PERPENDICULAR 0x12 /* perpendicular r/w mode */ - -#endif /* _LINUX_AMIFDREG_H */ -- cgit From 53d0f8dbde89cf6c862c7a62e00c6123e02cba41 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:46 -0700 Subject: amiflop: clean up on errors during setup The error handling in fd_probe_drives() doesn't clean up at all. Fix it up in preparation for converting to blk-mq. While we're here, get rid of the commented out amiga_floppy_remove(). Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 84 +++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index a7d6e6a9b12f..eef3b085e70a 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1818,11 +1818,41 @@ static const struct block_device_operations floppy_fops = { .check_events = amiga_check_events, }; +static struct gendisk *fd_alloc_disk(int drive) +{ + struct gendisk *disk; + + disk = alloc_disk(1); + if (!disk) + goto out; + + disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); + if (IS_ERR(disk->queue)) { + disk->queue = NULL; + goto out_put_disk; + } + + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); + if (!unit[drive].trackbuf) + goto out_cleanup_queue; + + return disk; + +out_cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; +out_put_disk: + put_disk(disk); +out: + unit[drive].type->code = FD_NODRIVE; + return NULL; +} + static int __init fd_probe_drives(void) { int drive,drives,nomem; - printk(KERN_INFO "FD: probing units\nfound "); + pr_info("FD: probing units\nfound"); drives=0; nomem=0; for(drive=0;drivecode == FD_NODRIVE) continue; - disk = alloc_disk(1); + + disk = fd_alloc_disk(drive); if (!disk) { - unit[drive].type->code = FD_NODRIVE; + pr_cont(" no mem for fd%d", drive); + nomem = 1; continue; } unit[drive].gendisk = disk; - - disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); - if (!disk->queue) { - unit[drive].type->code = FD_NODRIVE; - continue; - } - drives++; - if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) { - printk("no mem for "); - unit[drive].type = &drive_types[num_dr_types - 1]; /* FD_NODRIVE */ - drives--; - nomem = 1; - } - printk("fd%d ",drive); + + pr_cont(" fd%d",drive); disk->major = FLOPPY_MAJOR; disk->first_minor = drive; disk->fops = &floppy_fops; @@ -1861,11 +1881,11 @@ static int __init fd_probe_drives(void) } if ((drives > 0) || (nomem == 0)) { if (drives == 0) - printk("no drives"); - printk("\n"); + pr_cont(" no drives"); + pr_cont("\n"); return drives; } - printk("\n"); + pr_cont("\n"); return -ENOMEM; } @@ -1948,30 +1968,6 @@ out_blkdev: return ret; } -#if 0 /* not safe to unload */ -static int __exit amiga_floppy_remove(struct platform_device *pdev) -{ - int i; - - for( i = 0; i < FD_MAX_UNITS; i++) { - if (unit[i].type->code != FD_NODRIVE) { - struct request_queue *q = unit[i].gendisk->queue; - del_gendisk(unit[i].gendisk); - put_disk(unit[i].gendisk); - kfree(unit[i].trackbuf); - if (q) - blk_cleanup_queue(q); - } - } - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - free_irq(IRQ_AMIGA_CIAA_TB, NULL); - free_irq(IRQ_AMIGA_DSKBLK, NULL); - custom.dmacon = DMAF_DISK; /* disable DMA */ - amiga_chip_free(raw_buf); - unregister_blkdev(FLOPPY_MAJOR, "fd"); -} -#endif - static struct platform_driver amiga_floppy_driver = { .driver = { .name = "amiga-floppy", -- cgit From 21b07f35544af5e2c11f079057e8fb4263d35dd3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 15 Oct 2018 09:16:37 -0600 Subject: amiflop: convert to blk-mq Straightforward conversion, just use the existing amiflop_lock to serialize access to the controller. Compile-tested only. Cc: Laurent Vivier Signed-off-by: Omar Sandoval Converted to blk_mq_init_sq_queue() Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 113 +++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 78 deletions(-) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index eef3b085e70a..bf996bd44cfc 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -201,6 +201,7 @@ struct amiga_floppy_struct { int dirty; /* true when trackbuf is not on disk */ int status; /* current error code for unit */ struct gendisk *gendisk; + struct blk_mq_tag_set tag_set; }; /* @@ -281,7 +282,6 @@ static volatile int selected = -1; /* currently selected drive */ static int writepending; static int writefromint; static char *raw_buf; -static int fdc_queue; static DEFINE_SPINLOCK(amiflop_lock); @@ -1454,76 +1454,20 @@ static int get_track(int drive, int track) return -1; } -/* - * Round-robin between our available drives, doing one request from each - */ -static struct request *set_next_request(void) +static blk_status_t amiflop_rw_cur_segment(struct amiga_floppy_struct *floppy, + struct request *rq) { - struct request_queue *q; - int cnt = FD_MAX_UNITS; - struct request *rq = NULL; - - /* Find next queue we can dispatch from */ - fdc_queue = fdc_queue + 1; - if (fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - - for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) { - - if (unit[fdc_queue].type->code == FD_NODRIVE) { - if (++fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - continue; - } - - q = unit[fdc_queue].gendisk->queue; - if (q) { - rq = blk_fetch_request(q); - if (rq) - break; - } - - if (++fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - } - - return rq; -} - -static void redo_fd_request(void) -{ - struct request *rq; + int drive = floppy - unit; unsigned int cnt, block, track, sector; - int drive; - struct amiga_floppy_struct *floppy; char *data; - unsigned long flags; - blk_status_t err; -next_req: - rq = set_next_request(); - if (!rq) { - /* Nothing left to do */ - return; - } - - floppy = rq->rq_disk->private_data; - drive = floppy - unit; - -next_segment: - /* Here someone could investigate to be more efficient */ - for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) { + for (cnt = 0; cnt < blk_rq_cur_sectors(rq); cnt++) { #ifdef DEBUG printk("fd: sector %ld + %d requested for %s\n", blk_rq_pos(rq), cnt, (rq_data_dir(rq) == READ) ? "read" : "write"); #endif block = blk_rq_pos(rq) + cnt; - if ((int)block > floppy->blocks) { - err = BLK_STS_IOERR; - break; - } - track = block / (floppy->dtype->sects * floppy->type->sect_mult); sector = block % (floppy->dtype->sects * floppy->type->sect_mult); data = bio_data(rq->bio) + 512 * cnt; @@ -1532,10 +1476,8 @@ next_segment: "0x%08lx\n", track, sector, data); #endif - if (get_track(drive, track) == -1) { - err = BLK_STS_IOERR; - break; - } + if (get_track(drive, track) == -1) + return BLK_STS_IOERR; if (rq_data_dir(rq) == READ) { memcpy(data, floppy->trackbuf + sector * 512, 512); @@ -1543,31 +1485,40 @@ next_segment: memcpy(floppy->trackbuf + sector * 512, data, 512); /* keep the drive spinning while writes are scheduled */ - if (!fd_motor_on(drive)) { - err = BLK_STS_IOERR; - break; - } + if (!fd_motor_on(drive)) + return BLK_STS_IOERR; /* * setup a callback to write the track buffer * after a short (1 tick) delay. */ - local_irq_save(flags); - floppy->dirty = 1; /* reset the timer */ mod_timer (flush_track_timer + drive, jiffies + 1); - local_irq_restore(flags); } } - if (__blk_end_request_cur(rq, err)) - goto next_segment; - goto next_req; + return BLK_STS_OK; } -static void do_fd_request(struct request_queue * q) +static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - redo_fd_request(); + struct request *rq = bd->rq; + struct amiga_floppy_struct *floppy = rq->rq_disk->private_data; + blk_status_t err; + + if (!spin_trylock_irq(&amiflop_lock)) + return BLK_STS_DEV_RESOURCE; + + blk_mq_start_request(rq); + + do { + err = amiflop_rw_cur_segment(floppy, rq); + } while (blk_update_request(rq, err, blk_rq_cur_bytes(rq))); + blk_mq_end_request(rq, err); + + spin_unlock_irq(&amiflop_lock); + return BLK_STS_OK; } static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -1818,6 +1769,10 @@ static const struct block_device_operations floppy_fops = { .check_events = amiga_check_events, }; +static const struct blk_mq_ops amiflop_mq_ops = { + .queue_rq = amiflop_queue_rq, +}; + static struct gendisk *fd_alloc_disk(int drive) { struct gendisk *disk; @@ -1826,7 +1781,8 @@ static struct gendisk *fd_alloc_disk(int drive) if (!disk) goto out; - disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); + disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops, + 2, BLK_MQ_F_SHOULD_MERGE); if (IS_ERR(disk->queue)) { disk->queue = NULL; goto out_put_disk; @@ -1841,6 +1797,7 @@ static struct gendisk *fd_alloc_disk(int drive) out_cleanup_queue: blk_cleanup_queue(disk->queue); disk->queue = NULL; + blk_mq_free_tag_set(&unit[drive].tag_set); out_put_disk: put_disk(disk); out: -- cgit From 3e6b8c3c4b14f4f0c4a74027156eb31544c0b0da Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:48 -0700 Subject: ataflop: fold headers into C file atafd.h and atafdreg.h are only used from ataflop.c, so merge them in there. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- arch/m68k/include/asm/atafd.h | 13 ------- arch/m68k/include/asm/atafdreg.h | 80 -------------------------------------- drivers/block/ataflop.c | 83 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 95 deletions(-) delete mode 100644 arch/m68k/include/asm/atafd.h delete mode 100644 arch/m68k/include/asm/atafdreg.h diff --git a/arch/m68k/include/asm/atafd.h b/arch/m68k/include/asm/atafd.h deleted file mode 100644 index ad7014cad633..000000000000 --- a/arch/m68k/include/asm/atafd.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_M68K_FD_H -#define _ASM_M68K_FD_H - -/* Definitions for the Atari Floppy driver */ - -struct atari_format_descr { - int track; /* to be formatted */ - int head; /* "" "" */ - int sect_offset; /* offset of first sector */ -}; - -#endif diff --git a/arch/m68k/include/asm/atafdreg.h b/arch/m68k/include/asm/atafdreg.h deleted file mode 100644 index c31b4919ed2d..000000000000 --- a/arch/m68k/include/asm/atafdreg.h +++ /dev/null @@ -1,80 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_FDREG_H -#define _LINUX_FDREG_H - -/* -** WD1772 stuff - */ - -/* register codes */ - -#define FDCSELREG_STP (0x80) /* command/status register */ -#define FDCSELREG_TRA (0x82) /* track register */ -#define FDCSELREG_SEC (0x84) /* sector register */ -#define FDCSELREG_DTA (0x86) /* data register */ - -/* register names for FDC_READ/WRITE macros */ - -#define FDCREG_CMD 0 -#define FDCREG_STATUS 0 -#define FDCREG_TRACK 2 -#define FDCREG_SECTOR 4 -#define FDCREG_DATA 6 - -/* command opcodes */ - -#define FDCCMD_RESTORE (0x00) /* - */ -#define FDCCMD_SEEK (0x10) /* | */ -#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */ -#define FDCCMD_STIN (0x40) /* | */ -#define FDCCMD_STOT (0x60) /* - */ -#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */ -#define FDCCMD_WRSEC (0xa0) /* - " */ -#define FDCCMD_RDADR (0xc0) /* - */ -#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */ -#define FDCCMD_WRTRA (0xf0) /* - */ -#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */ - -/* command modifier bits */ - -#define FDCCMDADD_SR6 (0x00) /* step rate settings */ -#define FDCCMDADD_SR12 (0x01) -#define FDCCMDADD_SR2 (0x02) -#define FDCCMDADD_SR3 (0x03) -#define FDCCMDADD_V (0x04) /* verify */ -#define FDCCMDADD_H (0x08) /* wait for spin-up */ -#define FDCCMDADD_U (0x10) /* update track register */ -#define FDCCMDADD_M (0x10) /* multiple sector access */ -#define FDCCMDADD_E (0x04) /* head settling flag */ -#define FDCCMDADD_P (0x02) /* precompensation off */ -#define FDCCMDADD_A0 (0x01) /* DAM flag */ - -/* status register bits */ - -#define FDCSTAT_MOTORON (0x80) /* motor on */ -#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */ -#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */ -#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ -#define FDCSTAT_RECNF (0x10) /* record not found */ -#define FDCSTAT_CRC (0x08) /* CRC error */ -#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */ -#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */ -#define FDCSTAT_IDX (0x02) /* Index status (Type I) */ -#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */ -#define FDCSTAT_BUSY (0x01) /* FDC is busy */ - - -/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ -#define DSKSIDE (0x01) - -#define DSKDRVNONE (0x06) -#define DSKDRV0 (0x02) -#define DSKDRV1 (0x04) - -/* step rates */ -#define FDCSTEP_6 0x00 -#define FDCSTEP_12 0x01 -#define FDCSTEP_2 0x02 -#define FDCSTEP_3 0x03 - -#endif diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index dfb2c2622e5a..17df631c5d85 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -71,8 +71,6 @@ #include #include -#include -#include #include #include #include @@ -85,6 +83,87 @@ static DEFINE_MUTEX(ataflop_mutex); static struct request *fd_request; static int fdc_queue; +/* + * WD1772 stuff + */ + +/* register codes */ + +#define FDCSELREG_STP (0x80) /* command/status register */ +#define FDCSELREG_TRA (0x82) /* track register */ +#define FDCSELREG_SEC (0x84) /* sector register */ +#define FDCSELREG_DTA (0x86) /* data register */ + +/* register names for FDC_READ/WRITE macros */ + +#define FDCREG_CMD 0 +#define FDCREG_STATUS 0 +#define FDCREG_TRACK 2 +#define FDCREG_SECTOR 4 +#define FDCREG_DATA 6 + +/* command opcodes */ + +#define FDCCMD_RESTORE (0x00) /* - */ +#define FDCCMD_SEEK (0x10) /* | */ +#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */ +#define FDCCMD_STIN (0x40) /* | */ +#define FDCCMD_STOT (0x60) /* - */ +#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */ +#define FDCCMD_WRSEC (0xa0) /* - " */ +#define FDCCMD_RDADR (0xc0) /* - */ +#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */ +#define FDCCMD_WRTRA (0xf0) /* - */ +#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */ + +/* command modifier bits */ + +#define FDCCMDADD_SR6 (0x00) /* step rate settings */ +#define FDCCMDADD_SR12 (0x01) +#define FDCCMDADD_SR2 (0x02) +#define FDCCMDADD_SR3 (0x03) +#define FDCCMDADD_V (0x04) /* verify */ +#define FDCCMDADD_H (0x08) /* wait for spin-up */ +#define FDCCMDADD_U (0x10) /* update track register */ +#define FDCCMDADD_M (0x10) /* multiple sector access */ +#define FDCCMDADD_E (0x04) /* head settling flag */ +#define FDCCMDADD_P (0x02) /* precompensation off */ +#define FDCCMDADD_A0 (0x01) /* DAM flag */ + +/* status register bits */ + +#define FDCSTAT_MOTORON (0x80) /* motor on */ +#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */ +#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */ +#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */ +#define FDCSTAT_RECNF (0x10) /* record not found */ +#define FDCSTAT_CRC (0x08) /* CRC error */ +#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */ +#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */ +#define FDCSTAT_IDX (0x02) /* Index status (Type I) */ +#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */ +#define FDCSTAT_BUSY (0x01) /* FDC is busy */ + + +/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */ +#define DSKSIDE (0x01) + +#define DSKDRVNONE (0x06) +#define DSKDRV0 (0x02) +#define DSKDRV1 (0x04) + +/* step rates */ +#define FDCSTEP_6 0x00 +#define FDCSTEP_12 0x01 +#define FDCSTEP_2 0x02 +#define FDCSTEP_3 0x03 + +struct atari_format_descr { + int track; /* to be formatted */ + int head; /* "" "" */ + int sect_offset; /* offset of first sector */ +}; + /* Disk types: DD, HD, ED */ static struct atari_disk_type { const char *name; -- cgit From 71327f547ee3a46ec5c39fdbbd268401b2578d0e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 11 Oct 2018 12:20:49 -0700 Subject: ataflop: fix error handling during setup Move queue allocation next to disk allocation to fix a couple of issues: - If add_disk() hasn't been called, we should clear disk->queue before calling put_disk(). - If we fail to allocate a request queue, we still need to put all of the disks, not just the ones that we allocated queues for. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 17df631c5d85..0144d598ac47 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2014,6 +2014,11 @@ static int __init atari_floppy_init (void) unit[i].disk = alloc_disk(1); if (!unit[i].disk) goto Enomem; + + unit[i].disk->queue = blk_init_queue(do_fd_request, + &ataflop_lock); + if (!unit[i].disk->queue) + goto Enomem; } if (UseTrackbuffer < 0) @@ -2045,10 +2050,6 @@ static int __init atari_floppy_init (void) sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; unit[i].disk->private_data = &unit[i]; - unit[i].disk->queue = blk_init_queue(do_fd_request, - &ataflop_lock); - if (!unit[i].disk->queue) - goto Enomem; set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); add_disk(unit[i].disk); } @@ -2063,13 +2064,17 @@ static int __init atari_floppy_init (void) return 0; Enomem: - while (i--) { - struct request_queue *q = unit[i].disk->queue; + do { + struct gendisk *disk = unit[i].disk; - put_disk(unit[i].disk); - if (q) - blk_cleanup_queue(q); - } + if (disk) { + if (disk->queue) { + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + } + put_disk(unit[i].disk); + } + } while (i--); unregister_blkdev(FLOPPY_MAJOR, "fd"); return -ENOMEM; -- cgit From 6ec3938cff95fe792a05fdf4ba14f92977e5e31d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 15 Oct 2018 09:18:24 -0600 Subject: ataflop: convert to blk-mq This driver is already pretty broken, in that it has two wait_events() (one in stdma_lock()) in request_fn. Get rid of the first one by freezing/quiescing the queue on format, and the second one by replacing it with stdma_try_lock(). The rest is straightforward. Compile-tested only and probably incorrect. Cc: Laurent Vivier Signed-off-by: Omar Sandoval Converted to blk_mq_init_sq_queue() Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 173 ++++++++++++++++++++---------------------------- 1 file changed, 71 insertions(+), 102 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 0144d598ac47..f88b4c26d422 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -66,7 +66,7 @@ #include #include #include -#include +#include #include #include #include @@ -81,7 +81,6 @@ static DEFINE_MUTEX(ataflop_mutex); static struct request *fd_request; -static int fdc_queue; /* * WD1772 stuff @@ -300,6 +299,7 @@ static struct atari_floppy_struct { struct gendisk *disk; int ref; int type; + struct blk_mq_tag_set tag_set; } unit[FD_MAX_UNITS]; #define UD unit[drive] @@ -379,9 +379,6 @@ static int IsFormatting = 0, FormatError; static int UserSteprate[FD_MAX_UNITS] = { -1, -1 }; module_param_array(UserSteprate, int, NULL, 0); -/* Synchronization of FDC access. */ -static volatile int fdc_busy = 0; -static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_COMPLETION(format_wait); static unsigned long changed_floppies = 0xff, fake_change = 0; @@ -441,7 +438,6 @@ static void fd_times_out(struct timer_list *unused); static void finish_fdc( void ); static void finish_fdc_done( int dummy ); static void setup_req_params( int drive ); -static void redo_fd_request( void); static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long param); static void fd_probe( int drive ); @@ -459,8 +455,11 @@ static DEFINE_TIMER(fd_timer, check_change); static void fd_end_request_cur(blk_status_t err) { - if (!__blk_end_request_cur(fd_request, err)) + if (!blk_update_request(fd_request, err, + blk_rq_cur_bytes(fd_request))) { + __blk_mq_end_request(fd_request, err); fd_request = NULL; + } } static inline void start_motor_off_timer(void) @@ -706,7 +705,6 @@ static void fd_error( void ) if (SelectedDrive != -1) SUD.track = -1; } - redo_fd_request(); } @@ -724,14 +722,15 @@ static void fd_error( void ) static int do_format(int drive, int type, struct atari_format_descr *desc) { + struct request_queue *q = unit[drive].disk->queue; unsigned char *p; int sect, nsect; unsigned long flags; + int ret; - DPRINT(("do_format( dr=%d tr=%d he=%d offs=%d )\n", - drive, desc->track, desc->head, desc->sect_offset )); + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); - wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0); local_irq_save(flags); stdma_lock(floppy_irq, NULL); atari_turnon_irq( IRQ_MFP_FDC ); /* should be already, just to be sure */ @@ -740,16 +739,16 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) if (type) { if (--type >= NUM_DISK_MINORS || minor2disktype[type].drive_types > DriveType) { - redo_fd_request(); - return -EINVAL; + ret = -EINVAL; + goto out; } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; } if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) { - redo_fd_request(); - return -EINVAL; + ret = -EINVAL; + goto out; } nsect = UDT->spt; @@ -788,8 +787,11 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) wait_for_completion(&format_wait); - redo_fd_request(); - return( FormatError ? -EIO : 0 ); + ret = FormatError ? -EIO : 0; +out: + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); + return ret; } @@ -819,7 +821,6 @@ static void do_fd_action( int drive ) else { /* all sectors finished */ fd_end_request_cur(BLK_STS_OK); - redo_fd_request(); return; } } @@ -1224,7 +1225,6 @@ static void fd_rwsec_done1(int status) else { /* all sectors finished */ fd_end_request_cur(BLK_STS_OK); - redo_fd_request(); } return; @@ -1382,8 +1382,6 @@ static void finish_fdc_done( int dummy ) local_irq_save(flags); stdma_release(); - fdc_busy = 0; - wake_up( &fdc_wait ); local_irq_restore(flags); DPRINT(("finish_fdc() finished\n")); @@ -1473,59 +1471,34 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } -/* - * Round-robin between our available drives, doing one request from each - */ -static struct request *set_next_request(void) +static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request_queue *q; - int old_pos = fdc_queue; - struct request *rq = NULL; - - do { - q = unit[fdc_queue].disk->queue; - if (++fdc_queue == FD_MAX_UNITS) - fdc_queue = 0; - if (q) { - rq = blk_fetch_request(q); - if (rq) { - rq->error_count = 0; - break; - } - } - } while (fdc_queue != old_pos); - - return rq; -} - + struct atari_floppy_struct *floppy = bd->rq->rq_disk->private_data; + int drive = floppy - unit; + int type = floppy->type; -static void redo_fd_request(void) -{ - int drive, type; - struct atari_floppy_struct *floppy; + spin_lock_irq(&ataflop_lock); + if (fd_request) { + spin_unlock_irq(&ataflop_lock); + return BLK_STS_DEV_RESOURCE; + } + if (!stdma_try_lock(floppy_irq, NULL)) { + spin_unlock_irq(&ataflop_lock); + return BLK_STS_RESOURCE; + } + fd_request = bd->rq; + blk_mq_start_request(fd_request); - DPRINT(("redo_fd_request: fd_request=%p dev=%s fd_request->sector=%ld\n", - fd_request, fd_request ? fd_request->rq_disk->disk_name : "", - fd_request ? blk_rq_pos(fd_request) : 0 )); + atari_disable_irq( IRQ_MFP_FDC ); IsFormatting = 0; -repeat: - if (!fd_request) { - fd_request = set_next_request(); - if (!fd_request) - goto the_end; - } - - floppy = fd_request->rq_disk->private_data; - drive = floppy - unit; - type = floppy->type; - if (!UD.connected) { /* drive not connected */ printk(KERN_ERR "Unknown Device: fd%d\n", drive ); fd_end_request_cur(BLK_STS_IOERR); - goto repeat; + goto out; } if (type == 0) { @@ -1541,23 +1514,18 @@ repeat: if (--type >= NUM_DISK_MINORS) { printk(KERN_WARNING "fd%d: invalid disk format", drive ); fd_end_request_cur(BLK_STS_IOERR); - goto repeat; + goto out; } if (minor2disktype[type].drive_types > DriveType) { printk(KERN_WARNING "fd%d: unsupported disk format", drive ); fd_end_request_cur(BLK_STS_IOERR); - goto repeat; + goto out; } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; set_capacity(floppy->disk, UDT->blocks); UD.autoprobe = 0; } - - if (blk_rq_pos(fd_request) + 1 > UDT->blocks) { - fd_end_request_cur(BLK_STS_IOERR); - goto repeat; - } /* stop deselect timer */ del_timer( &motor_off_timer ); @@ -1569,22 +1537,13 @@ repeat: setup_req_params( drive ); do_fd_action( drive ); - return; - - the_end: - finish_fdc(); -} - - -void do_fd_request(struct request_queue * q) -{ - DPRINT(("do_fd_request for pid %d\n",current->pid)); - wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0); - stdma_lock(floppy_irq, NULL); - - atari_disable_irq( IRQ_MFP_FDC ); - redo_fd_request(); + if (bd->last) + finish_fdc(); atari_enable_irq( IRQ_MFP_FDC ); + +out: + spin_unlock_irq(&ataflop_lock); + return BLK_STS_OK; } static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, @@ -1662,7 +1621,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* what if type > 0 here? Overwrite specified entry ? */ if (type) { /* refuse to re-set a predefined type for now */ - redo_fd_request(); return -EINVAL; } @@ -1730,10 +1688,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* sanity check */ if (setprm.track != dtp->blocks/dtp->spt/2 || - setprm.head != 2) { - redo_fd_request(); + setprm.head != 2) return -EINVAL; - } UDT = dtp; set_capacity(floppy->disk, UDT->blocks); @@ -1989,6 +1945,10 @@ static const struct block_device_operations floppy_fops = { .revalidate_disk= floppy_revalidate, }; +static const struct blk_mq_ops ataflop_mq_ops = { + .queue_rq = ataflop_queue_rq, +}; + static struct kobject *floppy_find(dev_t dev, int *part, void *data) { int drive = *part & 3; @@ -2002,6 +1962,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) static int __init atari_floppy_init (void) { int i; + int ret; if (!MACH_IS_ATARI) /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ @@ -2012,13 +1973,19 @@ static int __init atari_floppy_init (void) for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].disk = alloc_disk(1); - if (!unit[i].disk) - goto Enomem; + if (!unit[i].disk) { + ret = -ENOMEM; + goto err; + } - unit[i].disk->queue = blk_init_queue(do_fd_request, - &ataflop_lock); - if (!unit[i].disk->queue) - goto Enomem; + unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set, + &ataflop_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(unit[i].disk->queue)) { + ret = PTR_ERR(unit[i].disk->queue); + unit[i].disk->queue = NULL; + goto err; + } } if (UseTrackbuffer < 0) @@ -2035,7 +2002,8 @@ static int __init atari_floppy_init (void) DMABuffer = atari_stram_alloc(BUFFER_SIZE+512, "ataflop"); if (!DMABuffer) { printk(KERN_ERR "atari_floppy_init: cannot get dma buffer\n"); - goto Enomem; + ret = -ENOMEM; + goto err; } TrackBuffer = DMABuffer + 512; PhysDMABuffer = atari_stram_to_phys(DMABuffer); @@ -2063,7 +2031,8 @@ static int __init atari_floppy_init (void) config_types(); return 0; -Enomem: + +err: do { struct gendisk *disk = unit[i].disk; @@ -2072,12 +2041,13 @@ Enomem: blk_cleanup_queue(disk->queue); disk->queue = NULL; } + blk_mq_free_tag_set(&unit[i].tag_set); put_disk(unit[i].disk); } } while (i--); unregister_blkdev(FLOPPY_MAJOR, "fd"); - return -ENOMEM; + return ret; } #ifndef MODULE @@ -2124,11 +2094,10 @@ static void __exit atari_floppy_exit(void) int i; blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); for (i = 0; i < FD_MAX_UNITS; i++) { - struct request_queue *q = unit[i].disk->queue; - del_gendisk(unit[i].disk); + blk_cleanup_queue(unit[i].disk->queue); + blk_mq_free_tag_set(&unit[i].tag_set); put_disk(unit[i].disk); - blk_cleanup_queue(q); } unregister_blkdev(FLOPPY_MAJOR, "fd"); -- cgit From a9f38e1dec107af70d81338332494bf0a1e76597 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 15 Oct 2018 09:21:34 -0600 Subject: floppy: convert to blk-mq This driver likes to fetch requests from all over the place, so make queue_rq put requests on a list so that the logic stays the same. Tested with QEMU. Signed-off-by: Omar Sandoval Converted to blk_mq_init_sq_queue() and fixed a few spots where the tag_set leaked on cleanup. Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 66 +++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c494a771c06f..a8cfa011c284 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -252,13 +252,13 @@ static int allowed_drive_mask = 0x33; static int irqdma_allocated; -#include +#include #include #include /* for the compatibility eject ioctl */ #include +static LIST_HEAD(floppy_reqs); static struct request *current_req; -static void do_fd_request(struct request_queue *q); static int set_next_request(void); #ifndef fd_get_dma_residue @@ -414,10 +414,10 @@ static struct floppy_drive_struct drive_state[N_DRIVE]; static struct floppy_write_errors write_errors[N_DRIVE]; static struct timer_list motor_off_timer[N_DRIVE]; static struct gendisk *disks[N_DRIVE]; +static struct blk_mq_tag_set tag_sets[N_DRIVE]; static struct block_device *opened_bdev[N_DRIVE]; static DEFINE_MUTEX(open_lock); static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; -static int fdc_queue; /* * This struct defines the different floppy types. @@ -2216,8 +2216,9 @@ static void floppy_end_request(struct request *req, blk_status_t error) /* current_count_sectors can be zero if transfer failed */ if (error) nr_sectors = blk_rq_cur_sectors(req); - if (__blk_end_request(req, error, nr_sectors << 9)) + if (blk_update_request(req, error, nr_sectors << 9)) return; + __blk_mq_end_request(req, error); /* We're done with the request */ floppy_off(drive); @@ -2797,27 +2798,14 @@ static int make_raw_rw_request(void) return 2; } -/* - * Round-robin between our available drives, doing one request from each - */ static int set_next_request(void) { - struct request_queue *q; - int old_pos = fdc_queue; - - do { - q = disks[fdc_queue]->queue; - if (++fdc_queue == N_DRIVE) - fdc_queue = 0; - if (q) { - current_req = blk_fetch_request(q); - if (current_req) { - current_req->error_count = 0; - break; - } - } - } while (fdc_queue != old_pos); - + current_req = list_first_entry_or_null(&floppy_reqs, struct request, + queuelist); + if (current_req) { + current_req->error_count = 0; + list_del_init(¤t_req->queuelist); + } return current_req != NULL; } @@ -2901,29 +2889,38 @@ static void process_fd_request(void) schedule_bh(redo_fd_request); } -static void do_fd_request(struct request_queue *q) +static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + blk_mq_start_request(bd->rq); + if (WARN(max_buffer_sectors == 0, "VFS: %s called on non-open device\n", __func__)) - return; + return BLK_STS_IOERR; if (WARN(atomic_read(&usage_count) == 0, "warning: usage count=0, current_req=%p sect=%ld flags=%llx\n", current_req, (long)blk_rq_pos(current_req), (unsigned long long) current_req->cmd_flags)) - return; + return BLK_STS_IOERR; + + spin_lock_irq(&floppy_lock); + list_add_tail(&bd->rq->queuelist, &floppy_reqs); + spin_unlock_irq(&floppy_lock); if (test_and_set_bit(0, &fdc_busy)) { /* fdc busy, this new request will be treated when the current one is done */ is_alive(__func__, "old request running"); - return; + return BLK_STS_OK; } + command_status = FD_COMMAND_NONE; __reschedule_timeout(MAXTIMEOUT, "fd_request"); set_fdc(0); process_fd_request(); is_alive(__func__, ""); + return BLK_STS_OK; } static const struct cont_t poll_cont = { @@ -4486,6 +4483,10 @@ static struct platform_driver floppy_driver = { }, }; +static const struct blk_mq_ops floppy_mq_ops = { + .queue_rq = floppy_queue_rq, +}; + static struct platform_device floppy_device[N_DRIVE]; static bool floppy_available(int drive) @@ -4533,9 +4534,12 @@ static int __init do_floppy_init(void) goto out_put_disk; } - disks[drive]->queue = blk_init_queue(do_fd_request, &floppy_lock); - if (!disks[drive]->queue) { - err = -ENOMEM; + disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive], + &floppy_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(disks[drive]->queue)) { + err = PTR_ERR(disks[drive]->queue); + disks[drive]->queue = NULL; goto out_put_disk; } @@ -4708,6 +4712,7 @@ out_put_disk: del_timer_sync(&motor_off_timer[drive]); blk_cleanup_queue(disks[drive]->queue); disks[drive]->queue = NULL; + blk_mq_free_tag_set(&tag_sets[drive]); } put_disk(disks[drive]); } @@ -4935,6 +4940,7 @@ static void __exit floppy_module_exit(void) platform_device_unregister(&floppy_device[drive]); } blk_cleanup_queue(disks[drive]->queue); + blk_mq_free_tag_set(&tag_sets[drive]); /* * These disks have not called add_disk(). Don't put down -- cgit From ad5fc6bb72214615f300af1f4ed57f71bc3be510 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 09:01:40 -0600 Subject: gdrom: convert to blk-mq Ditch the deffered list, lock, and workqueue handling. Just mark the set as being blocking, so we are invoked from a workqueue already. Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 174 +++++++++++++++++++++++--------------------------- 1 file changed, 79 insertions(+), 95 deletions(-) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index ae3a7537cf0f..757e85b81879 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -31,12 +31,11 @@ #include #include #include -#include +#include #include #include #include #include -#include #include #include #include @@ -102,11 +101,6 @@ static int gdrom_major; static DECLARE_WAIT_QUEUE_HEAD(command_queue); static DECLARE_WAIT_QUEUE_HEAD(request_queue); -static DEFINE_SPINLOCK(gdrom_lock); -static void gdrom_readdisk_dma(struct work_struct *work); -static DECLARE_WORK(work, gdrom_readdisk_dma); -static LIST_HEAD(gdrom_deferred); - struct gdromtoc { unsigned int entry[99]; unsigned int first, last; @@ -122,6 +116,7 @@ static struct gdrom_unit { char disk_type; struct gdromtoc *toc; struct request_queue *gdrom_rq; + struct blk_mq_tag_set tag_set; } gd; struct gdrom_id { @@ -584,103 +579,83 @@ static int gdrom_set_interrupt_handlers(void) * 9 -> sectors >> 8 * 10 -> sectors */ -static void gdrom_readdisk_dma(struct work_struct *work) +static blk_status_t gdrom_readdisk_dma(struct request *req) { int block, block_cnt; blk_status_t err; struct packet_command *read_command; - struct list_head *elem, *next; - struct request *req; unsigned long timeout; - if (list_empty(&gdrom_deferred)) - return; read_command = kzalloc(sizeof(struct packet_command), GFP_KERNEL); if (!read_command) - return; /* get more memory later? */ + return BLK_STS_RESOURCE; + read_command->cmd[0] = 0x30; read_command->cmd[1] = 0x20; - spin_lock(&gdrom_lock); - list_for_each_safe(elem, next, &gdrom_deferred) { - req = list_entry(elem, struct request, queuelist); - spin_unlock(&gdrom_lock); - block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; - block_cnt = blk_rq_sectors(req)/GD_TO_BLK; - __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG); - __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); - __raw_writel(1, GDROM_DMA_DIRECTION_REG); - __raw_writel(1, GDROM_DMA_ENABLE_REG); - read_command->cmd[2] = (block >> 16) & 0xFF; - read_command->cmd[3] = (block >> 8) & 0xFF; - read_command->cmd[4] = block & 0xFF; - read_command->cmd[8] = (block_cnt >> 16) & 0xFF; - read_command->cmd[9] = (block_cnt >> 8) & 0xFF; - read_command->cmd[10] = block_cnt & 0xFF; - /* set for DMA */ - __raw_writeb(1, GDROM_ERROR_REG); - /* other registers */ - __raw_writeb(0, GDROM_SECNUM_REG); - __raw_writeb(0, GDROM_BCL_REG); - __raw_writeb(0, GDROM_BCH_REG); - __raw_writeb(0, GDROM_DSEL_REG); - __raw_writeb(0, GDROM_INTSEC_REG); - /* Wait for registers to reset after any previous activity */ - timeout = jiffies + HZ / 2; - while (gdrom_is_busy() && time_before(jiffies, timeout)) - cpu_relax(); - __raw_writeb(GDROM_COM_PACKET, GDROM_STATUSCOMMAND_REG); - timeout = jiffies + HZ / 2; - /* Wait for packet command to finish */ - while (gdrom_is_busy() && time_before(jiffies, timeout)) - cpu_relax(); - gd.pending = 1; - gd.transfer = 1; - outsw(GDROM_DATA_REG, &read_command->cmd, 6); - timeout = jiffies + HZ / 2; - /* Wait for any pending DMA to finish */ - while (__raw_readb(GDROM_DMA_STATUS_REG) && - time_before(jiffies, timeout)) - cpu_relax(); - /* start transfer */ - __raw_writeb(1, GDROM_DMA_STATUS_REG); - wait_event_interruptible_timeout(request_queue, - gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); - err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK; - gd.transfer = 0; - gd.pending = 0; - /* now seek to take the request spinlock - * before handling ending the request */ - spin_lock(&gdrom_lock); - list_del_init(&req->queuelist); - __blk_end_request_all(req, err); - } - spin_unlock(&gdrom_lock); + block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; + block_cnt = blk_rq_sectors(req)/GD_TO_BLK; + __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG); + __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); + __raw_writel(1, GDROM_DMA_DIRECTION_REG); + __raw_writel(1, GDROM_DMA_ENABLE_REG); + read_command->cmd[2] = (block >> 16) & 0xFF; + read_command->cmd[3] = (block >> 8) & 0xFF; + read_command->cmd[4] = block & 0xFF; + read_command->cmd[8] = (block_cnt >> 16) & 0xFF; + read_command->cmd[9] = (block_cnt >> 8) & 0xFF; + read_command->cmd[10] = block_cnt & 0xFF; + /* set for DMA */ + __raw_writeb(1, GDROM_ERROR_REG); + /* other registers */ + __raw_writeb(0, GDROM_SECNUM_REG); + __raw_writeb(0, GDROM_BCL_REG); + __raw_writeb(0, GDROM_BCH_REG); + __raw_writeb(0, GDROM_DSEL_REG); + __raw_writeb(0, GDROM_INTSEC_REG); + /* Wait for registers to reset after any previous activity */ + timeout = jiffies + HZ / 2; + while (gdrom_is_busy() && time_before(jiffies, timeout)) + cpu_relax(); + __raw_writeb(GDROM_COM_PACKET, GDROM_STATUSCOMMAND_REG); + timeout = jiffies + HZ / 2; + /* Wait for packet command to finish */ + while (gdrom_is_busy() && time_before(jiffies, timeout)) + cpu_relax(); + gd.pending = 1; + gd.transfer = 1; + outsw(GDROM_DATA_REG, &read_command->cmd, 6); + timeout = jiffies + HZ / 2; + /* Wait for any pending DMA to finish */ + while (__raw_readb(GDROM_DMA_STATUS_REG) && + time_before(jiffies, timeout)) + cpu_relax(); + /* start transfer */ + __raw_writeb(1, GDROM_DMA_STATUS_REG); + wait_event_interruptible_timeout(request_queue, + gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); + err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK; + gd.transfer = 0; + gd.pending = 0; + + blk_mq_end_request(req, err); kfree(read_command); + return BLK_STS_OK; } -static void gdrom_request(struct request_queue *rq) -{ - struct request *req; - - while ((req = blk_fetch_request(rq)) != NULL) { - switch (req_op(req)) { - case REQ_OP_READ: - /* - * Add to list of deferred work and then schedule - * workqueue. - */ - list_add_tail(&req->queuelist, &gdrom_deferred); - schedule_work(&work); - break; - case REQ_OP_WRITE: - pr_notice("Read only device - write request ignored\n"); - __blk_end_request_all(req, BLK_STS_IOERR); - break; - default: - printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); - __blk_end_request_all(req, BLK_STS_IOERR); - break; - } +static blk_status_t gdrom_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + blk_mq_start_request(bd->rq); + + switch (req_op(bd->rq)) { + case REQ_OP_READ: + return gdrom_readdisk_dma(bd->rq); + case REQ_OP_WRITE: + pr_notice("Read only device - write request ignored\n"); + return BLK_STS_IOERR; + default: + printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); + return BLK_STS_IOERR; } } @@ -768,6 +743,10 @@ static int probe_gdrom_setupqueue(void) return gdrom_init_dma_mode(); } +static const struct blk_mq_ops gdrom_mq_ops = { + .queue_rq = gdrom_queue_rq, +}; + /* * register this as a block device and as compliant with the * universal CD Rom driver interface @@ -811,11 +790,15 @@ static int probe_gdrom(struct platform_device *devptr) err = gdrom_set_interrupt_handlers(); if (err) goto probe_fail_cmdirq_register; - gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); - if (!gd.gdrom_rq) { - err = -ENOMEM; + + gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + if (IS_ERR(gd.gdrom_rq)) { + rc = PTR_ERR(gd.gdrom_rq); + gd.gdrom_rq = NULL; goto probe_fail_requestq; } + blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH); err = probe_gdrom_setupqueue(); @@ -832,6 +815,7 @@ static int probe_gdrom(struct platform_device *devptr) probe_fail_toc: blk_cleanup_queue(gd.gdrom_rq); + blk_mq_free_tag_set(&gd.tag_set); probe_fail_requestq: free_irq(HW_EVENT_GDROM_DMA, &gd); free_irq(HW_EVENT_GDROM_CMD, &gd); @@ -849,8 +833,8 @@ probe_fail_no_mem: static int remove_gdrom(struct platform_device *devptr) { - flush_work(&work); blk_cleanup_queue(gd.gdrom_rq); + blk_mq_free_tag_set(&gd.tag_set); free_irq(HW_EVENT_GDROM_CMD, &gd); free_irq(HW_EVENT_GDROM_DMA, &gd); del_gendisk(gd.disk); -- cgit From 8535fd6f702a4b3e225258a86dcbf5dd93b73d51 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 09:02:45 -0600 Subject: z2ram: convert to blk-mq Straight forward conversion to blk-mq, nothing special about this driver. Signed-off-by: Jens Axboe --- drivers/block/z2ram.c | 87 +++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index d0c5bc4e0703..1106c076fa4b 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -66,43 +66,44 @@ static DEFINE_SPINLOCK(z2ram_lock); static struct gendisk *z2ram_gendisk; -static void do_z2_request(struct request_queue *q) +static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct request *req; - - req = blk_fetch_request(q); - while (req) { - unsigned long start = blk_rq_pos(req) << 9; - unsigned long len = blk_rq_cur_bytes(req); - blk_status_t err = BLK_STS_OK; - - if (start + len > z2ram_size) { - pr_err(DEVICE_NAME ": bad access: block=%llu, " - "count=%u\n", - (unsigned long long)blk_rq_pos(req), - blk_rq_cur_sectors(req)); - err = BLK_STS_IOERR; - goto done; - } - while (len) { - unsigned long addr = start & Z2RAM_CHUNKMASK; - unsigned long size = Z2RAM_CHUNKSIZE - addr; - void *buffer = bio_data(req->bio); - - if (len < size) - size = len; - addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; - if (rq_data_dir(req) == READ) - memcpy(buffer, (char *)addr, size); - else - memcpy((char *)addr, buffer, size); - start += size; - len -= size; - } - done: - if (!__blk_end_request_cur(req, err)) - req = blk_fetch_request(q); + struct request *req = bd->rq; + unsigned long start = blk_rq_pos(req) << 9; + unsigned long len = blk_rq_cur_bytes(req); + + blk_mq_start_request(req); + + if (start + len > z2ram_size) { + pr_err(DEVICE_NAME ": bad access: block=%llu, " + "count=%u\n", + (unsigned long long)blk_rq_pos(req), + blk_rq_cur_sectors(req)); + return BLK_STS_IOERR; + } + + spin_lock_irq(&z2ram_lock); + + while (len) { + unsigned long addr = start & Z2RAM_CHUNKMASK; + unsigned long size = Z2RAM_CHUNKSIZE - addr; + void *buffer = bio_data(req->bio); + + if (len < size) + size = len; + addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; + if (rq_data_dir(req) == READ) + memcpy(buffer, (char *)addr, size); + else + memcpy((char *)addr, buffer, size); + start += size; + len -= size; } + + spin_unlock_irq(&z2ram_lock); + blk_mq_end_request(req, BLK_STS_OK); + return BLK_STS_OK; } static void @@ -337,6 +338,11 @@ static struct kobject *z2_find(dev_t dev, int *part, void *data) } static struct request_queue *z2_queue; +static struct blk_mq_tag_set tag_set; + +static const struct blk_mq_ops z2_mq_ops = { + .queue_rq = z2_queue_rq, +}; static int __init z2_init(void) @@ -355,9 +361,13 @@ z2_init(void) if (!z2ram_gendisk) goto out_disk; - z2_queue = blk_init_queue(do_z2_request, &z2ram_lock); - if (!z2_queue) + z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(z2_queue)) { + ret = PTR_ERR(z2_queue); + z2_queue = NULL; goto out_queue; + } z2ram_gendisk->major = Z2RAM_MAJOR; z2ram_gendisk->first_minor = 0; @@ -387,6 +397,7 @@ static void __exit z2_exit(void) del_gendisk(z2ram_gendisk); put_disk(z2ram_gendisk); blk_cleanup_queue(z2_queue); + blk_mq_free_tag_set(&tag_set); if ( current_device != -1 ) { -- cgit From 0585b75437d335c6580066c1ab9ea3092139df32 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 15 Oct 2018 08:53:45 -0600 Subject: sx8: convert to blk-mq Convert from the old request_fn style driver to blk-mq. Signed-off-by: Jens Axboe --- drivers/block/sx8.c | 113 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 45 deletions(-) diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 4d90e5eba2f5..16f37254bcf4 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -244,6 +244,7 @@ struct carm_port { unsigned int port_no; struct gendisk *disk; struct carm_host *host; + struct blk_mq_tag_set tag_set; /* attached device characteristics */ u64 capacity; @@ -279,6 +280,7 @@ struct carm_host { unsigned int state; u32 fw_ver; + struct blk_mq_tag_set tag_set; struct request_queue *oob_q; unsigned int n_oob; @@ -750,7 +752,7 @@ static inline void carm_end_request_queued(struct carm_host *host, struct request *req = crq->rq; int rc; - __blk_end_request_all(req, error); + blk_mq_end_request(req, error); rc = carm_put_request(host, crq); assert(rc == 0); @@ -760,7 +762,7 @@ static inline void carm_push_q (struct carm_host *host, struct request_queue *q) { unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q; - blk_stop_queue(q); + blk_mq_stop_hw_queues(q); VPRINTK("STOPPED QUEUE %p\n", q); host->wait_q[idx] = q; @@ -785,7 +787,7 @@ static inline void carm_round_robin(struct carm_host *host) { struct request_queue *q = carm_pop_q(host); if (q) { - blk_start_queue(q); + blk_mq_start_hw_queues(q); VPRINTK("STARTED QUEUE %p\n", q); } } @@ -802,62 +804,62 @@ static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq, } } -static void carm_oob_rq_fn(struct request_queue *q) +static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request_queue *q = hctx->queue; struct carm_host *host = q->queuedata; struct carm_request *crq; - struct request *rq; int rc; - while (1) { - DPRINTK("get req\n"); - rq = blk_fetch_request(q); - if (!rq) - break; + blk_mq_start_request(bd->rq); - crq = rq->special; - assert(crq != NULL); - assert(crq->rq == rq); + spin_lock_irq(&host->lock); - crq->n_elem = 0; + crq = bd->rq->special; + assert(crq != NULL); + assert(crq->rq == bd->rq); - DPRINTK("send req\n"); - rc = carm_send_msg(host, crq); - if (rc) { - blk_requeue_request(q, rq); - carm_push_q(host, q); - return; /* call us again later, eventually */ - } + crq->n_elem = 0; + + DPRINTK("send req\n"); + rc = carm_send_msg(host, crq); + if (rc) { + carm_push_q(host, q); + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; } + + spin_unlock_irq(&host->lock); + return BLK_STS_OK; } -static void carm_rq_fn(struct request_queue *q) +static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request_queue *q = hctx->queue; struct carm_port *port = q->queuedata; struct carm_host *host = port->host; struct carm_msg_rw *msg; struct carm_request *crq; - struct request *rq; + struct request *rq = bd->rq; struct scatterlist *sg; int writing = 0, pci_dir, i, n_elem, rc; u32 tmp; unsigned int msg_size; -queue_one_request: - VPRINTK("get req\n"); - rq = blk_peek_request(q); - if (!rq) - return; + blk_mq_start_request(rq); + + spin_lock_irq(&host->lock); crq = carm_get_request(host); if (!crq) { carm_push_q(host, q); - return; /* call us again later, eventually */ + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; } crq->rq = rq; - blk_start_request(rq); - if (rq_data_dir(rq) == WRITE) { writing = 1; pci_dir = PCI_DMA_TODEVICE; @@ -869,15 +871,19 @@ queue_one_request: sg = &crq->sg[0]; n_elem = blk_rq_map_sg(q, rq, sg); if (n_elem <= 0) { + /* request with no s/g entries? */ carm_end_rq(host, crq, BLK_STS_IOERR); - return; /* request with no s/g entries? */ + spin_unlock_irq(&host->lock); + return BLK_STS_IOERR; } /* map scatterlist to PCI bus addresses */ n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir); if (n_elem <= 0) { + /* request with no s/g entries? */ carm_end_rq(host, crq, BLK_STS_IOERR); - return; /* request with no s/g entries? */ + spin_unlock_irq(&host->lock); + return BLK_STS_IOERR; } crq->n_elem = n_elem; crq->port = port; @@ -927,12 +933,13 @@ queue_one_request: rc = carm_send_msg(host, crq); if (rc) { carm_put_request(host, crq); - blk_requeue_request(q, rq); carm_push_q(host, q); - return; /* call us again later, eventually */ + spin_unlock_irq(&host->lock); + return BLK_STS_DEV_RESOURCE; } - goto queue_one_request; + spin_unlock_irq(&host->lock); + return BLK_STS_OK; } static void carm_handle_array_info(struct carm_host *host, @@ -1485,6 +1492,14 @@ static int carm_init_host(struct carm_host *host) return 0; } +static const struct blk_mq_ops carm_oob_mq_ops = { + .queue_rq = carm_oob_queue_rq, +}; + +static const struct blk_mq_ops carm_mq_ops = { + .queue_rq = carm_queue_rq, +}; + static int carm_init_disks(struct carm_host *host) { unsigned int i; @@ -1513,9 +1528,10 @@ static int carm_init_disks(struct carm_host *host) disk->fops = &carm_bd_ops; disk->private_data = port; - q = blk_init_queue(carm_rq_fn, &host->lock); - if (!q) { - rc = -ENOMEM; + q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops, + max_queue, BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(q)) { + rc = PTR_ERR(q); break; } disk->queue = q; @@ -1533,14 +1549,18 @@ static void carm_free_disks(struct carm_host *host) unsigned int i; for (i = 0; i < CARM_MAX_PORTS; i++) { - struct gendisk *disk = host->port[i].disk; + struct carm_port *port = &host->port[i]; + struct gendisk *disk = port->disk; + if (disk) { struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) del_gendisk(disk); - if (q) + if (q) { + blk_mq_free_tag_set(&port->tag_set); blk_cleanup_queue(q); + } put_disk(disk); } } @@ -1636,11 +1656,12 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_iounmap; } - q = blk_init_queue(carm_oob_rq_fn, &host->lock); - if (!q) { + q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1, + BLK_MQ_F_NO_SCHED); + if (IS_ERR(q)) { printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n", pci_name(pdev)); - rc = -ENOMEM; + rc = PTR_ERR(q); goto err_out_pci_free; } host->oob_q = q; @@ -1705,6 +1726,7 @@ err_out_free_majors: else if (host->major == 161) clear_bit(1, &carm_major_alloc); blk_cleanup_queue(host->oob_q); + blk_mq_free_tag_set(&host->tag_set); err_out_pci_free: pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma); err_out_iounmap: @@ -1736,6 +1758,7 @@ static void carm_remove_one (struct pci_dev *pdev) else if (host->major == 161) clear_bit(1, &carm_major_alloc); blk_cleanup_queue(host->oob_q); + blk_mq_free_tag_set(&host->tag_set); pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma); iounmap(host->mmio); kfree(host); -- cgit From 886fabf693263e8651c0c4ab84fc626ad6d3a6e7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 5 Oct 2018 09:49:37 -0600 Subject: nvme: update node paths after adding new path The nvme namespace paths were being updated only when the current path was not set or nonoptimized. If a new path comes online that is a better path for its NUMA node, the multipath selector may continue using the previously set path on a potentially further node. This patch re-runs the path assignment after successfully adding a new optimized path. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 52987052b7fc..5e3cc8c59a39 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -321,6 +321,15 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + if (nvme_path_is_optimized(ns)) { + int node, srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) + __nvme_find_path(head, node); + srcu_read_unlock(&head->srcu, srcu_idx); + } + kblockd_schedule_work(&ns->head->requeue_work); } -- cgit From 48440ab6dc275a3144474e8c5f45fab854d6e20f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 22 Aug 2018 19:58:45 -0700 Subject: nvmet: remove unreachable code Get rid of the unreachable code in the nvmet_parse_discovery_cmd(). Keep the error message identical to the admin-cmd.c and io-cmd*.c Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/discovery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index eae29f493a07..d1954f4ca28d 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -219,12 +219,10 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("unsupported cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("unhandled cmd %d\n", cmd->common.opcode); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } int __init nvmet_init_discovery(void) -- cgit From 43a6f8fb619730fe5989e2430669626b2b5e13a0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:49 -0700 Subject: nvmet: use strcmp() instead of strncmp() for subsystem lookup strncmp() stops comparing when either the end of one of the first two arguments is reached or when 'n' characters have been compared, whichever comes first. That means that strncmp(s1, s2, n) is equivalent to strcmp(s1, s2) if n exceeds the length of s1 or the length of s2. Since that is the case in nvmet_find_get_subsys(), change strncmp() into strcmp(). This patch avoids that the following warning is reported by smatch: drivers/nvme/target/core.c:940:1 nvmet_find_get_subsys() error: strncmp() '"nqn.2014-08.org.nvmexpress.discovery"' too small (37 vs 223) Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b5ec96abd048..0acdff9e6842 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1105,8 +1105,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, if (!port) return NULL; - if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, - NVMF_NQN_SIZE)) { + if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) return NULL; return nvmet_disc_subsys; -- cgit From 35da77d556c17980f9bd6892828a70d7a1a8a145 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:54 -0700 Subject: nvmet-rdma: check for timeout in nvme_rdma_wait_for_cm() Check whether queue->cm_error holds a value before reading it. This patch addresses Coverity ID 1373774: unchecked return value. Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dc042017c293..e7be903041a8 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -233,8 +233,15 @@ static void nvme_rdma_qp_event(struct ib_event *event, void *context) static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue) { - wait_for_completion_interruptible_timeout(&queue->cm_done, + int ret; + + ret = wait_for_completion_interruptible_timeout(&queue->cm_done, msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; + WARN_ON_ONCE(queue->cm_error > 0); return queue->cm_error; } -- cgit From eb090c4c948ccc7a051451261cf1426edf83f3eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:39 -0700 Subject: nvme-core: declare local symbols static This patch avoids that sparse complains about missing declarations. Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2db33a752e2b..63932dea74a1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2729,7 +2729,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_ns_id_attr_group = { +static const struct attribute_group nvme_ns_id_attr_group = { .attrs = nvme_ns_id_attrs, .is_visible = nvme_ns_id_attrs_are_visible, }; -- cgit From bb2a1d4e804aa41eef0003a192a674f844dbca23 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:41 -0700 Subject: nvme-core: rework a NQN copying operation Although it is easy to see that the code in nvme_init_subnqn() guarantees that the subsys->nqn string is '\0'-terminated, apparently Coverity is not smart enough to see this. Make it easier for Coverity to analyze this code by changing the strncpy() call into a strlcpy() call. This patch does not change the behavior of the code but fixes Coveritiy ID 1423720. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 63932dea74a1..8cecb36b5af1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2076,7 +2076,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } -- cgit From 40581d1a91a1527e1e15350e479156810a389a96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:43 -0700 Subject: nvme-pci: fix nvme_suspend_queue() kernel-doc header This patch avoids that the kernel-doc tool complains about the nvme_suspend_queue() function header when building with W=1. Signed-off-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d668682f91df..450481c2fd17 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1249,7 +1249,7 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) /** * nvme_suspend_queue - put queue into suspended state - * @nvmeq - queue to suspend + * @nvmeq: queue to suspend */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { -- cgit From 5eadc9cce17100caef88e972abeeeca7ef6d8a92 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:51 -0700 Subject: nvmet: use strlcpy() instead of strcpy() Although the code modified by this patch looks fine to me, this patch avoids that Coverity reports the following complaint (ID 1364971 and ID 1364973): "You might overrun the 256-character fixed-size string id->subnqn". Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/discovery.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 7a45f4477679..1179f6314323 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -353,7 +353,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* Max command capsule size is sqe + single page of in-capsule data */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index d1954f4ca28d..bc0aa0bf1543 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -174,7 +174,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); -- cgit From 0d3ebdec9394c984f3aa59ea97541f2243952b55 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:53 -0700 Subject: nvmet-rdma: declare local symbols static This patch avoids that sparse complains about missing declarations. Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5becca88ccbe..bd265aceb90c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,7 +122,7 @@ struct nvmet_rdma_device { int inline_page_count; }; -struct workqueue_struct *nvmet_rdma_delete_wq; +static struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); -- cgit From 8eacd1bd21d6913ec27e6120e9a8733352e191d3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:52 -0700 Subject: nvmet: avoid integer overflow in the discard code Although I'm not sure whether it is a good idea to support large discard commands, I think integer overflow for discard ranges larger than 4 GB should be avoided. This patch avoids that smatch reports the following: drivers/nvme/target/io-cmd-file.c:249:1 nvmet_file_execute_discard() warn: should '((range.nlb)) << req->ns->blksize_shift' be a 64 bit type? Fixes: d5eff33ee6f8 ("nvmet: add simple file backed ns support") Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 81a9dc5290a8..39d972e2595f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -246,7 +246,8 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; - len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb); + len <<= req->ns->blksize_shift; if (offset + len > req->ns->size) { ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; break; -- cgit From 76c910c7cf6d2d325c24439855a606cf1d414d29 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:44 -0700 Subject: nvme-fc: fix kernel-doc headers This patch avoids that the kernel-doc tool complains about several multiple function headers when building with W=1. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9d201b35397d..d838987fffe1 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -317,7 +317,7 @@ out_done: * @template: LLDD entrypoints and operational parameters for the port * @dev: physical hardware device node port corresponds to. Will be * used for DMA mappings - * @lport_p: pointer to a local port pointer. Upon success, the routine + * @portptr: pointer to a local port pointer. Upon success, the routine * will allocate a nvme_fc_local_port structure and place its * address in the local port pointer. Upon failure, local port * pointer will be set to 0. @@ -425,8 +425,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_register_localport); * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME host FC port. - * @localport: pointer to the (registered) local port that is to be - * deregistered. + * @portptr: pointer to the (registered) local port that is to be deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -632,7 +631,7 @@ __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, * @localport: pointer to the (registered) local port that the remote * subsystem port is connected to. * @pinfo: pointer to information about the port to be registered - * @rport_p: pointer to a remote port pointer. Upon success, the routine + * @portptr: pointer to a remote port pointer. Upon success, the routine * will allocate a nvme_fc_remote_port structure and place its * address in the remote port pointer. Upon failure, remote port * pointer will be set to 0. @@ -809,8 +808,8 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME subsystem FC port. - * @remoteport: pointer to the (registered) remote port that is to be - * deregistered. + * @portptr: pointer to the (registered) remote port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno -- cgit From d3d0bc78be300098104d9fde9ca1330694a70f45 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:45 -0700 Subject: nvme-fc: introduce struct nvme_fcp_op_w_sgl This patch does not change any functionality but makes the intent of the code more clear. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d838987fffe1..fdadc9464f6f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "nvme.h" #include "fabrics.h" @@ -104,6 +105,12 @@ struct nvme_fc_fcp_op { struct nvme_fc_ersp_iu rsp_iu; }; +struct nvme_fcp_op_w_sgl { + struct nvme_fc_fcp_op op; + struct scatterlist sgl[SG_CHUNK_SIZE]; + uint8_t priv[0]; +}; + struct nvme_fc_lport { struct nvme_fc_local_port localport; @@ -1686,6 +1693,8 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, struct request *rq, u32 rqno) { + struct nvme_fcp_op_w_sgl *op_w_sgl = + container_of(op, typeof(*op_w_sgl), op); struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; int ret = 0; @@ -1695,7 +1704,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; + op->fcp_req.first_sgl = &op_w_sgl->sgl[0]; op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; @@ -1734,12 +1743,12 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { struct nvme_fc_ctrl *ctrl = set->driver_data; - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; nvme_req(rq)->ctrl = &ctrl->ctrl; - return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); + return __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); } static int @@ -2423,10 +2432,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->tag_set.driver_data = ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; @@ -3028,10 +3036,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; -- cgit From 0d2bdf9f4134582bc7c4b82cb516cb27952127d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:46 -0700 Subject: nvme-fc: rework the request initialization code Instead of setting and then clearing the first_sgl pointer for AEN requests, leave that pointer zero. This patch does not change how requests are initialized but avoids that Coverity reports the following complaint for nvme_fc_init_aen_ops(): CID 1418400 (#1 of 1): Out-of-bounds access (OVERRUN) 4. overrun-buffer-val: Overrunning buffer pointed to by aen_op of 312 bytes by passing it to a function which accesses it at byte offset 312. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fdadc9464f6f..e52b9d3c0bd6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = &op_w_sgl->sgl[0]; op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; @@ -1746,9 +1745,14 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; + int res; nvme_req(rq)->ctrl = &ctrl->ctrl; - return __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + if (res) + return res; + op->op.fcp_req.first_sgl = &op->sgl[0]; + return res; } static int @@ -1778,7 +1782,6 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) } aen_op->flags = FCOP_FLAGS_AEN; - aen_op->fcp_req.first_sgl = NULL; /* no sg list */ aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); -- cgit From 1c4665272ca73335a662a0fb6a9604ec76983756 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:47 -0700 Subject: nvmet-fc: fix kernel-doc headers This patch avoids that the kernel-doc tool complains about two function headers when building with W=1. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index ef286b72d958..409081a03b24 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1245,8 +1245,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) * nvme_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously * registered a local NVME subsystem FC port. - * @tgtport: pointer to the (registered) target port that is to be - * deregistered. + * @target_port: pointer to the (registered) target port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -1749,7 +1749,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * If this routine returns error, the LLDD should abort the exchange. * - * @tgtport: pointer to the (registered) target port the LS was + * @target_port: pointer to the (registered) target port the LS was * received on. * @lsreq: pointer to a lsreq request structure to be used to reference * the exchange corresponding to the LS. -- cgit From 202359c007f6b1d6247a062c0682d6d5bcd3e7d7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Oct 2018 08:08:19 -0700 Subject: nvme-core: make implicit seed truncation explicit The nvme_user_io.slba field is 64 bits wide. That value is copied into the 32-bit bio_integrity_payload.bip_iter.bi_sector field. Make that truncation explicit to avoid that Coverity complains about implicit truncation. See also Coverity ID 1056486 on http://scan.coverity.com/projects/linux. Signed-off-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8cecb36b5af1..65c42448e904 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1132,7 +1132,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, - metadata, meta_len, io.slba, NULL, 0); + metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } static u32 nvme_known_admin_effects(u8 opcode) -- cgit From 1216e9ef18b84f4fb5934792368fb01eb3540520 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Oct 2018 08:08:20 -0700 Subject: nvmet-fcloop: suppress a compiler warning Building with W=1 enables the compiler warning -Wimplicit-fallthrough=3. That option does not recognize the fall-through comment in the fcloop driver. Add a fall-through comment that is recognized for -Wimplicit-fallthrough=3. This patch avoids that the compiler reports the following warning when building with W=1: drivers/nvme/target/fcloop.c:647:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (op == NVMET_FCOP_READDATA) ^ Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 5251689a1d9a..291f4121f516 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -648,6 +648,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; /* Fall-Thru to RSP handling */ + /* FALLTHRU */ case NVMET_FCOP_RSP: if (fcpreq) { -- cgit From cb4bfda62afa25b4eee3d635d33fccdd9485dd7c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Oct 2018 10:19:06 -0600 Subject: nvme-pci: fix hot removal during error handling A removal waits for the reset_work to complete. If a surprise removal occurs around the same time as an error triggered controller reset, and reset work happened to dispatch a command to the removed controller, the command won't be recovered since the timeout work doesn't do anything during error recovery. We wouldn't want to wait for timeout handling anyway, so this patch fixes this by disabling the controller and killing admin queues prior to syncing with the reset_work. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 450481c2fd17..72737009b82d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2564,13 +2564,12 @@ static void nvme_remove(struct pci_dev *pdev) struct nvme_dev *dev = pci_get_drvdata(pdev); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); - - cancel_work_sync(&dev->ctrl.reset_work); pci_set_drvdata(pdev, NULL); if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_dev_disable(dev, true); + nvme_dev_remove_admin(dev); } flush_work(&dev->ctrl.reset_work); -- cgit From 6956b956934f10c19eca2a1d44f50a3bee860531 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 17 Oct 2018 17:25:13 +0200 Subject: drivers/block: Remove DAC960 driver The DAC960 driver has been obsoleted by the myrb/myrs drivers, so it can be dropped. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/blockdev/README.DAC960 | 756 ---- drivers/block/DAC960.c | 7229 ---------------------------------- drivers/block/DAC960.h | 4414 --------------------- drivers/block/Kconfig | 12 - drivers/block/Makefile | 1 - 5 files changed, 12412 deletions(-) delete mode 100644 Documentation/blockdev/README.DAC960 delete mode 100644 drivers/block/DAC960.c delete mode 100644 drivers/block/DAC960.h diff --git a/Documentation/blockdev/README.DAC960 b/Documentation/blockdev/README.DAC960 deleted file mode 100644 index bd85fb9dc6e5..000000000000 --- a/Documentation/blockdev/README.DAC960 +++ /dev/null @@ -1,756 +0,0 @@ - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Version 2.2.11 for Linux 2.2.19 - Version 2.4.11 for Linux 2.4.12 - - PRODUCTION RELEASE - - 11 October 2001 - - Leonard N. Zubkoff - Dandelion Digital - lnz@dandelion.com - - Copyright 1998-2001 by Leonard N. Zubkoff - - - INTRODUCTION - -Mylex, Inc. designs and manufactures a variety of high performance PCI RAID -controllers. Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont, -California 94555, USA and can be reached at 510.796.6100 or on the World Wide -Web at http://www.mylex.com. Mylex Technical Support can be reached by -electronic mail at mylexsup@us.ibm.com, by voice at 510.608.2400, or by FAX at -510.745.7715. Contact information for offices in Europe and Japan is available -on their Web site. - -The latest information on Linux support for DAC960 PCI RAID Controllers, as -well as the most recent release of this driver, will always be available from -my Linux Home Page at URL "http://www.dandelion.com/Linux/". The Linux DAC960 -driver supports all current Mylex PCI RAID controllers including the new -eXtremeRAID 2000/3000 and AcceleRAID 352/170/160 models which have an entirely -new firmware interface from the older eXtremeRAID 1100, AcceleRAID 150/200/250, -and DAC960PJ/PG/PU/PD/PL. See below for a complete controller list as well as -minimum firmware version requirements. For simplicity, in most places this -documentation refers to DAC960 generically rather than explicitly listing all -the supported models. - -Driver bug reports should be sent via electronic mail to "lnz@dandelion.com". -Please include with the bug report the complete configuration messages reported -by the driver at startup, along with any subsequent system messages relevant to -the controller's operation, and a detailed description of your system's -hardware configuration. Driver bugs are actually quite rare; if you encounter -problems with disks being marked offline, for example, please contact Mylex -Technical Support as the problem is related to the hardware configuration -rather than the Linux driver. - -Please consult the RAID controller documentation for detailed information -regarding installation and configuration of the controllers. This document -primarily provides information specific to the Linux support. - - - DRIVER FEATURES - -The DAC960 RAID controllers are supported solely as high performance RAID -controllers, not as interfaces to arbitrary SCSI devices. The Linux DAC960 -driver operates at the block device level, the same level as the SCSI and IDE -drivers. Unlike other RAID controllers currently supported on Linux, the -DAC960 driver is not dependent on the SCSI subsystem, and hence avoids all the -complexity and unnecessary code that would be associated with an implementation -as a SCSI driver. The DAC960 driver is designed for as high a performance as -possible with no compromises or extra code for compatibility with lower -performance devices. The DAC960 driver includes extensive error logging and -online configuration management capabilities. Except for initial configuration -of the controller and adding new disk drives, most everything can be handled -from Linux while the system is operational. - -The DAC960 driver is architected to support up to 8 controllers per system. -Each DAC960 parallel SCSI controller can support up to 15 disk drives per -channel, for a maximum of 60 drives on a four channel controller; the fibre -channel eXtremeRAID 3000 controller supports up to 125 disk drives per loop for -a total of 250 drives. The drives installed on a controller are divided into -one or more "Drive Groups", and then each Drive Group is subdivided further -into 1 to 32 "Logical Drives". Each Logical Drive has a specific RAID Level -and caching policy associated with it, and it appears to Linux as a single -block device. Logical Drives are further subdivided into up to 7 partitions -through the normal Linux and PC disk partitioning schemes. Logical Drives are -also known as "System Drives", and Drive Groups are also called "Packs". Both -terms are in use in the Mylex documentation; I have chosen to standardize on -the more generic "Logical Drive" and "Drive Group". - -DAC960 RAID disk devices are named in the style of the obsolete Device File -System (DEVFS). The device corresponding to Logical Drive D on Controller C -is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1 -through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on -Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI -disks the device names will not change in the event of a disk drive failure. -The DAC960 driver is assigned major numbers 48 - 55 with one major number per -controller. The 8 bits of minor number are divided into 5 bits for the Logical -Drive and 3 bits for the partition. - - - SUPPORTED DAC960/AcceleRAID/eXtremeRAID PCI RAID CONTROLLERS - -The following list comprises the supported DAC960, AcceleRAID, and eXtremeRAID -PCI RAID Controllers as of the date of this document. It is recommended that -anyone purchasing a Mylex PCI RAID Controller not in the following table -contact the author beforehand to verify that it is or will be supported. - -eXtremeRAID 3000 - 1 Wide Ultra-2/LVD SCSI channel - 2 External Fibre FC-AL channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -eXtremeRAID 2000 - 4 Wide Ultra-160 LVD SCSI channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -AcceleRAID 352 - 2 Wide Ultra-160 LVD SCSI channels - 100MHz Intel i960RN RISC Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 32MB/64MB ECC SDRAM Memory - -AcceleRAID 170 - 1 Wide Ultra-160 LVD SCSI channel - 100MHz Intel i960RM RISC Processor - 16MB/32MB/64MB ECC SDRAM Memory - -AcceleRAID 160 (AcceleRAID 170LP) - 1 Wide Ultra-160 LVD SCSI channel - 100MHz Intel i960RS RISC Processor - Built in 16M ECC SDRAM Memory - PCI Low Profile Form Factor - fit for 2U height - -eXtremeRAID 1100 (DAC1164P) - 3 Wide Ultra-2/LVD SCSI channels - 233MHz StrongARM SA 110 Processor - 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots) - 16MB/32MB/64MB Parity SDRAM Memory with Battery Backup - -AcceleRAID 250 (DAC960PTL1) - Uses onboard Symbios SCSI chips on certain motherboards - Also includes one onboard Wide Ultra-2/LVD SCSI Channel - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -AcceleRAID 200 (DAC960PTL0) - Uses onboard Symbios SCSI chips on certain motherboards - Includes no onboard SCSI Channels - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -AcceleRAID 150 (DAC960PRL) - Uses onboard Symbios SCSI chips on certain motherboards - Also includes one onboard Wide Ultra-2/LVD SCSI Channel - 33MHz Intel i960RP RISC Processor - 4MB Parity EDO Memory - -DAC960PJ 1/2/3 Wide Ultra SCSI-3 Channels - 66MHz Intel i960RD RISC Processor - 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory - -DAC960PG 1/2/3 Wide Ultra SCSI-3 Channels - 33MHz Intel i960RP RISC Processor - 4MB/8MB ECC EDO Memory - -DAC960PU 1/2/3 Wide Ultra SCSI-3 Channels - Intel i960CF RISC Processor - 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960PD 1/2/3 Wide Fast SCSI-2 Channels - Intel i960CF RISC Processor - 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960PL 1/2/3 Wide Fast SCSI-2 Channels - Intel i960 RISC Processor - 2MB/4MB/8MB/16MB/32MB DRAM Memory - -DAC960P 1/2/3 Wide Fast SCSI-2 Channels - Intel i960 RISC Processor - 2MB/4MB/8MB/16MB/32MB DRAM Memory - -For the eXtremeRAID 2000/3000 and AcceleRAID 352/170/160, firmware version -6.00-01 or above is required. - -For the eXtremeRAID 1100, firmware version 5.06-0-52 or above is required. - -For the AcceleRAID 250, 200, and 150, firmware version 4.06-0-57 or above is -required. - -For the DAC960PJ and DAC960PG, firmware version 4.06-0-00 or above is required. - -For the DAC960PU, DAC960PD, DAC960PL, and DAC960P, either firmware version -3.51-0-04 or above is required (for dual Flash ROM controllers), or firmware -version 2.73-0-00 or above is required (for single Flash ROM controllers) - -Please note that not all SCSI disk drives are suitable for use with DAC960 -controllers, and only particular firmware versions of any given model may -actually function correctly. Similarly, not all motherboards have a BIOS that -properly initializes the AcceleRAID 250, AcceleRAID 200, AcceleRAID 150, -DAC960PJ, and DAC960PG because the Intel i960RD/RP is a multi-function device. -If in doubt, contact Mylex RAID Technical Support (mylexsup@us.ibm.com) to -verify compatibility. Mylex makes available a hard disk compatibility list at -http://www.mylex.com/support/hdcomp/hd-lists.html. - - - DRIVER INSTALLATION - -This distribution was prepared for Linux kernel version 2.2.19 or 2.4.12. - -To install the DAC960 RAID driver, you may use the following commands, -replacing "/usr/src" with wherever you keep your Linux kernel source tree: - - cd /usr/src - tar -xvzf DAC960-2.2.11.tar.gz (or DAC960-2.4.11.tar.gz) - mv README.DAC960 linux/Documentation - mv DAC960.[ch] linux/drivers/block - patch -p0 < DAC960.patch (if DAC960.patch is included) - cd linux - make config - make bzImage (or zImage) - -Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your -standard kernel, run lilo if appropriate, and reboot. - -To create the necessary devices in /dev, the "make_rd" script included in -"DAC960-Utilities.tar.gz" from http://www.dandelion.com/Linux/ may be used. -LILO 21 and FDISK v2.9 include DAC960 support; also included in this archive -are patches to LILO 20 and FDISK v2.8 that add DAC960 support, along with -statically linked executables of LILO and FDISK. This modified version of LILO -will allow booting from a DAC960 controller and/or mounting the root file -system from a DAC960. - -Red Hat Linux 6.0 and SuSE Linux 6.1 include support for Mylex PCI RAID -controllers. Installing directly onto a DAC960 may be problematic from other -Linux distributions until their installation utilities are updated. - - - INSTALLATION NOTES - -Before installing Linux or adding DAC960 logical drives to an existing Linux -system, the controller must first be configured to provide one or more logical -drives using the BIOS Configuration Utility or DACCF. Please note that since -there are only at most 6 usable partitions on each logical drive, systems -requiring more partitions should subdivide a drive group into multiple logical -drives, each of which can have up to 6 usable partitions. Also, note that with -large disk arrays it is advisable to enable the 8GB BIOS Geometry (255/63) -rather than accepting the default 2GB BIOS Geometry (128/32); failing to so do -will cause the logical drive geometry to have more than 65535 cylinders which -will make it impossible for FDISK to be used properly. The 8GB BIOS Geometry -can be enabled by configuring the DAC960 BIOS, which is accessible via Alt-M -during the BIOS initialization sequence. - -For maximum performance and the most efficient E2FSCK performance, it is -recommended that EXT2 file systems be built with a 4KB block size and 16 block -stride to match the DAC960 controller's 64KB default stripe size. The command -"mke2fs -b 4096 -R stride=16 " is appropriate. Unless there will be a -large number of small files on the file systems, it is also beneficial to add -the "-i 16384" option to increase the bytes per inode parameter thereby -reducing the file system metadata. Finally, on systems that will only be run -with Linux 2.2 or later kernels it is beneficial to enable sparse superblocks -with the "-s 1" option. - - - DAC960 ANNOUNCEMENTS MAILING LIST - -The DAC960 Announcements Mailing List provides a forum for informing Linux -users of new driver releases and other announcements regarding Linux support -for DAC960 PCI RAID Controllers. To join the mailing list, send a message to -"dac960-announce-request@dandelion.com" with the line "subscribe" in the -message body. - - - CONTROLLER CONFIGURATION AND STATUS MONITORING - -The DAC960 RAID controllers running firmware 4.06 or above include a Background -Initialization facility so that system downtime is minimized both for initial -installation and subsequent configuration of additional storage. The BIOS -Configuration Utility (accessible via Alt-R during the BIOS initialization -sequence) is used to quickly configure the controller, and then the logical -drives that have been created are available for immediate use even while they -are still being initialized by the controller. The primary need for online -configuration and status monitoring is then to avoid system downtime when disk -drives fail and must be replaced. Mylex's online monitoring and configuration -utilities are being ported to Linux and will become available at some point in -the future. Note that with a SAF-TE (SCSI Accessed Fault-Tolerant Enclosure) -enclosure, the controller is able to rebuild failed drives automatically as -soon as a drive replacement is made available. - -The primary interfaces for controller configuration and status monitoring are -special files created in the /proc/rd/... hierarchy along with the normal -system console logging mechanism. Whenever the system is operating, the DAC960 -driver queries each controller for status information every 10 seconds, and -checks for additional conditions every 60 seconds. The initial status of each -controller is always available for controller N in /proc/rd/cN/initial_status, -and the current status as of the last status monitoring query is available in -/proc/rd/cN/current_status. In addition, status changes are also logged by the -driver to the system console and will appear in the log files maintained by -syslog. The progress of asynchronous rebuild or consistency check operations -is also available in /proc/rd/cN/current_status, and progress messages are -logged to the system console at most every 60 seconds. - -Starting with the 2.2.3/2.0.3 versions of the driver, the status information -available in /proc/rd/cN/initial_status and /proc/rd/cN/current_status has been -augmented to include the vendor, model, revision, and serial number (if -available) for each physical device found connected to the controller: - -***** DAC960 RAID Driver Version 2.2.3 of 19 August 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff -Configuring Mylex DAC960PRL PCI RAID Controller - Firmware Version: 4.07-0-07, Channels: 1, Memory Size: 16MB - PCI Bus: 1, Device: 4, Function: 1, I/O Address: Unassigned - PCI Address: 0xFE300000 mapped at 0xA0800000, IRQ Channel: 21 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - SAF-TE Enclosure Management Enabled - Physical Devices: - 0:0 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68016775HA - Disk Status: Online, 17928192 blocks - 0:1 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68004E53HA - Disk Status: Online, 17928192 blocks - 0:2 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 13013935HA - Disk Status: Online, 17928192 blocks - 0:3 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 13016897HA - Disk Status: Online, 17928192 blocks - 0:4 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68019905HA - Disk Status: Online, 17928192 blocks - 0:5 Vendor: IBM Model: DRVS09D Revision: 0270 - Serial Number: 68012753HA - Disk Status: Online, 17928192 blocks - 0:6 Vendor: ESG-SHV Model: SCA HSBP M6 Revision: 0.61 - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 89640960 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -To simplify the monitoring process for custom software, the special file -/proc/rd/status returns "OK" when all DAC960 controllers in the system are -operating normally and no failures have occurred, or "ALERT" if any logical -drives are offline or critical or any non-standby physical drives are dead. - -Configuration commands for controller N are available via the special file -/proc/rd/cN/user_command. A human readable command can be written to this -special file to initiate a configuration operation, and the results of the -operation can then be read back from the special file in addition to being -logged to the system console. The shell command sequence - - echo "" > /proc/rd/c0/user_command - cat /proc/rd/c0/user_command - -is typically used to execute configuration commands. The configuration -commands are: - - flush-cache - - The "flush-cache" command flushes the controller's cache. The system - automatically flushes the cache at shutdown or if the driver module is - unloaded, so this command is only needed to be certain a write back cache - is flushed to disk before the system is powered off by a command to a UPS. - Note that the flush-cache command also stops an asynchronous rebuild or - consistency check, so it should not be used except when the system is being - halted. - - kill : - - The "kill" command marks the physical drive : as DEAD. - This command is provided primarily for testing, and should not be used - during normal system operation. - - make-online : - - The "make-online" command changes the physical drive : - from status DEAD to status ONLINE. In cases where multiple physical drives - have been killed simultaneously, this command may be used to bring all but - one of them back online, after which a rebuild to the final drive is - necessary. - - Warning: make-online should only be used on a dead physical drive that is - an active part of a drive group, never on a standby drive. The command - should never be used on a dead drive that is part of a critical logical - drive; rebuild should be used if only a single drive is dead. - - make-standby : - - The "make-standby" command changes physical drive : - from status DEAD to status STANDBY. It should only be used in cases where - a dead drive was replaced after an automatic rebuild was performed onto a - standby drive. It cannot be used to add a standby drive to the controller - configuration if one was not created initially; the BIOS Configuration - Utility must be used for that currently. - - rebuild : - - The "rebuild" command initiates an asynchronous rebuild onto physical drive - :. It should only be used when a dead drive has been - replaced. - - check-consistency - - The "check-consistency" command initiates an asynchronous consistency check - of with automatic restoration. It can be used - whenever it is desired to verify the consistency of the redundancy - information. - - cancel-rebuild - cancel-consistency-check - - The "cancel-rebuild" and "cancel-consistency-check" commands cancel any - rebuild or consistency check operations previously initiated. - - - EXAMPLE I - DRIVE FAILURE WITHOUT A STANDBY DRIVE - -The following annotated logs demonstrate the controller configuration and and -online status monitoring capabilities of the Linux DAC960 Driver. The test -configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a -DAC960PJ controller. The physical drives are configured into a single drive -group without a standby drive, and the drive group has been configured into two -logical drives, one RAID-5 and one RAID-6. Note that these logs are from an -earlier version of the driver and the messages have changed somewhat with newer -releases, but the functionality remains similar. First, here is the current -status of the RAID configuration: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -The above messages indicate that everything is healthy, and /proc/rd/status -returns "OK" indicating that there are no problems with any DAC960 controller -in the system. For demonstration purposes, while I/O is active Physical Drive -1:1 is now disconnected, simulating a drive failure. The failure is noted by -the driver within 10 seconds of the controller's having detected it, and the -driver logs the following console status messages indicating that Logical -Drives 0 and 1 are now CRITICAL as a result of Physical Drive 1:1 being DEAD: - -DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:1 killed because of timeout on SCSI command -DAC960#0: Physical Drive 1:1 is now DEAD -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL - -The Sense Keys logged here are just Check Condition / Unit Attention conditions -arising from a SCSI bus reset that is forced by the controller during its error -recovery procedures. Concurrently with the above, the driver status available -from /proc/rd also reflects the drive failure. The status message in -/proc/rd/status has changed from "OK" to "ALERT": - -gwynedd:/u/lnz# cat /proc/rd/status -ALERT - -and /proc/rd/c0/current_status has been updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Dead, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -Since there are no standby drives configured, the system can continue to access -the logical drives in a performance degraded mode until the failed drive is -replaced and a rebuild operation completed to restore the redundancy of the -logical drives. Once Physical Drive 1:1 is replaced with a properly -functioning drive, or if the physical drive was killed without having failed -(e.g., due to electrical problems on the SCSI bus), the user can instruct the -controller to initiate a rebuild operation onto the newly replaced drive: - -gwynedd:/u/lnz# echo "rebuild 1:1" > /proc/rd/c0/user_command -gwynedd:/u/lnz# cat /proc/rd/c0/user_command -Rebuild of Physical Drive 1:1 Initiated - -The echo command instructs the controller to initiate an asynchronous rebuild -operation onto Physical Drive 1:1, and the status message that results from the -operation is then available for reading from /proc/rd/c0/user_command, as well -as being logged to the console by the driver. - -Within 10 seconds of this command the driver logs the initiation of the -asynchronous rebuild operation: - -DAC960#0: Rebuild of Physical Drive 1:1 Initiated -DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01 -DAC960#0: Physical Drive 1:1 is now WRITE-ONLY -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 1% completed - -and /proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Write-Only, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 6% completed - -As the rebuild progresses, the current status in /proc/rd/c0/current_status is -updated every 10 seconds: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Write-Only, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 15% completed - -and every minute a progress message is logged to the console by the driver: - -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 32% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 63% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 94% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 94% completed - -Finally, the rebuild completes successfully. The driver logs the status of the -logical and physical drives and the rebuild completion: - -DAC960#0: Rebuild Completed Successfully -DAC960#0: Physical Drive 1:1 is now ONLINE -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE - -/proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru - Rebuild Completed Successfully - -and /proc/rd/status indicates that everything is healthy once again: - -gwynedd:/u/lnz# cat /proc/rd/status -OK - - - EXAMPLE II - DRIVE FAILURE WITH A STANDBY DRIVE - -The following annotated logs demonstrate the controller configuration and and -online status monitoring capabilities of the Linux DAC960 Driver. The test -configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a -DAC960PJ controller. The physical drives are configured into a single drive -group with a standby drive, and the drive group has been configured into two -logical drives, one RAID-5 and one RAID-6. Note that these logs are from an -earlier version of the driver and the messages have changed somewhat with newer -releases, but the functionality remains similar. First, here is the current -status of the RAID configuration: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Online, 2201600 blocks - 1:3 - Disk: Standby, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - No Rebuild or Consistency Check in Progress - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -The above messages indicate that everything is healthy, and /proc/rd/status -returns "OK" indicating that there are no problems with any DAC960 controller -in the system. For demonstration purposes, while I/O is active Physical Drive -1:2 is now disconnected, simulating a drive failure. The failure is noted by -the driver within 10 seconds of the controller's having detected it, and the -driver logs the following console status messages: - -DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02 -DAC960#0: Physical Drive 1:2 killed because of timeout on SCSI command -DAC960#0: Physical Drive 1:2 is now DEAD -DAC960#0: Physical Drive 1:2 killed because it was removed -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL - -Since a standby drive is configured, the controller automatically begins -rebuilding onto the standby drive: - -DAC960#0: Physical Drive 1:3 is now WRITE-ONLY -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed - -Concurrently with the above, the driver status available from /proc/rd also -reflects the drive failure and automatic rebuild. The status message in -/proc/rd/status has changed from "OK" to "ALERT": - -gwynedd:/u/lnz# cat /proc/rd/status -ALERT - -and /proc/rd/c0/current_status has been updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Write-Only, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed - -As the rebuild progresses, the current status in /proc/rd/c0/current_status is -updated every 10 seconds: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Write-Only, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru - Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed - -and every minute a progress message is logged on the console by the driver: - -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed -DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 76% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 66% completed -DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 84% completed - -Finally, the rebuild completes successfully. The driver logs the status of the -logical and physical drives and the rebuild completion: - -DAC960#0: Rebuild Completed Successfully -DAC960#0: Physical Drive 1:3 is now ONLINE -DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE -DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE - -/proc/rd/c0/current_status is updated: - -***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 ***** -Copyright 1998-1999 by Leonard N. Zubkoff -Configuring Mylex DAC960PJ PCI RAID Controller - Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB - PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned - PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9 - Controller Queue Depth: 128, Maximum Blocks per Command: 128 - Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33 - Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63 - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Dead, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - Rebuild Completed Successfully - -and /proc/rd/status indicates that everything is healthy once again: - -gwynedd:/u/lnz# cat /proc/rd/status -OK - -Note that the absence of a viable standby drive does not create an "ALERT" -status. Once dead Physical Drive 1:2 has been replaced, the controller must be -told that this has occurred and that the newly replaced drive should become the -new standby drive: - -gwynedd:/u/lnz# echo "make-standby 1:2" > /proc/rd/c0/user_command -gwynedd:/u/lnz# cat /proc/rd/c0/user_command -Make Standby of Physical Drive 1:2 Succeeded - -The echo command instructs the controller to make Physical Drive 1:2 into a -standby drive, and the status message that results from the operation is then -available for reading from /proc/rd/c0/user_command, as well as being logged to -the console by the driver. Within 60 seconds of this command the driver logs: - -DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01 -DAC960#0: Physical Drive 1:2 is now STANDBY -DAC960#0: Make Standby of Physical Drive 1:2 Succeeded - -and /proc/rd/c0/current_status is updated: - -gwynedd:/u/lnz# cat /proc/rd/c0/current_status - ... - Physical Devices: - 0:1 - Disk: Online, 2201600 blocks - 0:2 - Disk: Online, 2201600 blocks - 0:3 - Disk: Online, 2201600 blocks - 1:1 - Disk: Online, 2201600 blocks - 1:2 - Disk: Standby, 2201600 blocks - 1:3 - Disk: Online, 2201600 blocks - Logical Drives: - /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru - /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru - Rebuild Completed Successfully diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c deleted file mode 100644 index 581312ac375f..000000000000 --- a/drivers/block/DAC960.c +++ /dev/null @@ -1,7229 +0,0 @@ -/* - - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Copyright 1998-2001 by Leonard N. Zubkoff - Portions Copyright 2002 by Mylex (An IBM Business Unit) - - This program is free software; you may redistribute and/or modify it under - the terms of the GNU General Public License Version 2 as published by the - Free Software Foundation. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for complete details. - -*/ - - -#define DAC960_DriverVersion "2.5.49" -#define DAC960_DriverDate "21 Aug 2007" - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "DAC960.h" - -#define DAC960_GAM_MINOR 252 - - -static DEFINE_MUTEX(DAC960_mutex); -static DAC960_Controller_T *DAC960_Controllers[DAC960_MaxControllers]; -static int DAC960_ControllerCount; -static struct proc_dir_entry *DAC960_ProcDirectoryEntry; - -static long disk_size(DAC960_Controller_T *p, int drive_nr) -{ - if (p->FirmwareType == DAC960_V1_Controller) { - if (drive_nr >= p->LogicalDriveCount) - return 0; - return p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize; - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - if (i == NULL) - return 0; - return i->ConfigurableDeviceSize; - } -} - -static int DAC960_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - int ret = -ENXIO; - - mutex_lock(&DAC960_mutex); - if (p->FirmwareType == DAC960_V1_Controller) { - if (p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveState == DAC960_V1_LogicalDrive_Offline) - goto out; - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - if (!i || i->LogicalDeviceState == DAC960_V2_LogicalDevice_Offline) - goto out; - } - - check_disk_change(bdev); - - if (!get_capacity(p->disks[drive_nr])) - goto out; - ret = 0; -out: - mutex_unlock(&DAC960_mutex); - return ret; -} - -static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - struct gendisk *disk = bdev->bd_disk; - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - - if (p->FirmwareType == DAC960_V1_Controller) { - geo->heads = p->V1.GeometryTranslationHeads; - geo->sectors = p->V1.GeometryTranslationSectors; - geo->cylinders = p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize / (geo->heads * geo->sectors); - } else { - DAC960_V2_LogicalDeviceInfo_T *i = - p->V2.LogicalDeviceInformation[drive_nr]; - switch (i->DriveGeometry) { - case DAC960_V2_Geometry_128_32: - geo->heads = 128; - geo->sectors = 32; - break; - case DAC960_V2_Geometry_255_63: - geo->heads = 255; - geo->sectors = 63; - break; - default: - DAC960_Error("Illegal Logical Device Geometry %d\n", - p, i->DriveGeometry); - return -EINVAL; - } - - geo->cylinders = i->ConfigurableDeviceSize / - (geo->heads * geo->sectors); - } - - return 0; -} - -static unsigned int DAC960_check_events(struct gendisk *disk, - unsigned int clearing) -{ - DAC960_Controller_T *p = disk->queue->queuedata; - int drive_nr = (long)disk->private_data; - - if (!p->LogicalDriveInitiallyAccessible[drive_nr]) - return DISK_EVENT_MEDIA_CHANGE; - return 0; -} - -static int DAC960_revalidate_disk(struct gendisk *disk) -{ - DAC960_Controller_T *p = disk->queue->queuedata; - int unit = (long)disk->private_data; - - set_capacity(disk, disk_size(p, unit)); - return 0; -} - -static const struct block_device_operations DAC960_BlockDeviceOperations = { - .owner = THIS_MODULE, - .open = DAC960_open, - .getgeo = DAC960_getgeo, - .check_events = DAC960_check_events, - .revalidate_disk = DAC960_revalidate_disk, -}; - - -/* - DAC960_AnnounceDriver announces the Driver Version and Date, Author's Name, - Copyright Notice, and Electronic Mail Address. -*/ - -static void DAC960_AnnounceDriver(DAC960_Controller_T *Controller) -{ - DAC960_Announce("***** DAC960 RAID Driver Version " - DAC960_DriverVersion " of " - DAC960_DriverDate " *****\n", Controller); - DAC960_Announce("Copyright 1998-2001 by Leonard N. Zubkoff " - "\n", Controller); -} - - -/* - DAC960_Failure prints a standardized error message, and then returns false. -*/ - -static bool DAC960_Failure(DAC960_Controller_T *Controller, - unsigned char *ErrorMessage) -{ - DAC960_Error("While configuring DAC960 PCI RAID Controller at\n", - Controller); - if (Controller->IO_Address == 0) - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A " - "PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->PCI_Address); - else DAC960_Error("PCI Bus %d Device %d Function %d I/O Address " - "0x%X PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->IO_Address, - Controller->PCI_Address); - DAC960_Error("%s FAILED - DETACHING\n", Controller, ErrorMessage); - return false; -} - -/* - init_dma_loaf() and slice_dma_loaf() are helper functions for - aggregating the dma-mapped memory for a well-known collection of - data structures that are of different lengths. - - These routines don't guarantee any alignment. The caller must - include any space needed for alignment in the sizes of the structures - that are passed in. - */ - -static bool init_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf, - size_t len) -{ - void *cpu_addr; - dma_addr_t dma_handle; - - cpu_addr = pci_alloc_consistent(dev, len, &dma_handle); - if (cpu_addr == NULL) - return false; - - loaf->cpu_free = loaf->cpu_base = cpu_addr; - loaf->dma_free =loaf->dma_base = dma_handle; - loaf->length = len; - memset(cpu_addr, 0, len); - return true; -} - -static void *slice_dma_loaf(struct dma_loaf *loaf, size_t len, - dma_addr_t *dma_handle) -{ - void *cpu_end = loaf->cpu_free + len; - void *cpu_addr = loaf->cpu_free; - - BUG_ON(cpu_end > loaf->cpu_base + loaf->length); - *dma_handle = loaf->dma_free; - loaf->cpu_free = cpu_end; - loaf->dma_free += len; - return cpu_addr; -} - -static void free_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf_handle) -{ - if (loaf_handle->cpu_base != NULL) - pci_free_consistent(dev, loaf_handle->length, - loaf_handle->cpu_base, loaf_handle->dma_base); -} - - -/* - DAC960_CreateAuxiliaryStructures allocates and initializes the auxiliary - data structures for Controller. It returns true on success and false on - failure. -*/ - -static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) -{ - int CommandAllocationLength, CommandAllocationGroupSize; - int CommandsRemaining = 0, CommandIdentifier, CommandGroupByteCount; - void *AllocationPointer = NULL; - void *ScatterGatherCPU = NULL; - dma_addr_t ScatterGatherDMA; - struct dma_pool *ScatterGatherPool; - void *RequestSenseCPU = NULL; - dma_addr_t RequestSenseDMA; - struct dma_pool *RequestSensePool = NULL; - - if (Controller->FirmwareType == DAC960_V1_Controller) - { - CommandAllocationLength = offsetof(DAC960_Command_T, V1.EndMarker); - CommandAllocationGroupSize = DAC960_V1_CommandAllocationGroupSize; - ScatterGatherPool = dma_pool_create("DAC960_V1_ScatterGather", - &Controller->PCIDevice->dev, - DAC960_V1_ScatterGatherLimit * sizeof(DAC960_V1_ScatterGatherSegment_T), - sizeof(DAC960_V1_ScatterGatherSegment_T), 0); - if (ScatterGatherPool == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - Controller->ScatterGatherPool = ScatterGatherPool; - } - else - { - CommandAllocationLength = offsetof(DAC960_Command_T, V2.EndMarker); - CommandAllocationGroupSize = DAC960_V2_CommandAllocationGroupSize; - ScatterGatherPool = dma_pool_create("DAC960_V2_ScatterGather", - &Controller->PCIDevice->dev, - DAC960_V2_ScatterGatherLimit * sizeof(DAC960_V2_ScatterGatherSegment_T), - sizeof(DAC960_V2_ScatterGatherSegment_T), 0); - if (ScatterGatherPool == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - RequestSensePool = dma_pool_create("DAC960_V2_RequestSense", - &Controller->PCIDevice->dev, sizeof(DAC960_SCSI_RequestSense_T), - sizeof(int), 0); - if (RequestSensePool == NULL) { - dma_pool_destroy(ScatterGatherPool); - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION (SG)"); - } - Controller->ScatterGatherPool = ScatterGatherPool; - Controller->V2.RequestSensePool = RequestSensePool; - } - Controller->CommandAllocationGroupSize = CommandAllocationGroupSize; - Controller->FreeCommands = NULL; - for (CommandIdentifier = 1; - CommandIdentifier <= Controller->DriverQueueDepth; - CommandIdentifier++) - { - DAC960_Command_T *Command; - if (--CommandsRemaining <= 0) - { - CommandsRemaining = - Controller->DriverQueueDepth - CommandIdentifier + 1; - if (CommandsRemaining > CommandAllocationGroupSize) - CommandsRemaining = CommandAllocationGroupSize; - CommandGroupByteCount = - CommandsRemaining * CommandAllocationLength; - AllocationPointer = kzalloc(CommandGroupByteCount, GFP_ATOMIC); - if (AllocationPointer == NULL) - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION"); - } - Command = (DAC960_Command_T *) AllocationPointer; - AllocationPointer += CommandAllocationLength; - Command->CommandIdentifier = CommandIdentifier; - Command->Controller = Controller; - Command->Next = Controller->FreeCommands; - Controller->FreeCommands = Command; - Controller->Commands[CommandIdentifier-1] = Command; - ScatterGatherCPU = dma_pool_alloc(ScatterGatherPool, GFP_ATOMIC, - &ScatterGatherDMA); - if (ScatterGatherCPU == NULL) - return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION"); - - if (RequestSensePool != NULL) { - RequestSenseCPU = dma_pool_alloc(RequestSensePool, GFP_ATOMIC, - &RequestSenseDMA); - if (RequestSenseCPU == NULL) { - dma_pool_free(ScatterGatherPool, ScatterGatherCPU, - ScatterGatherDMA); - return DAC960_Failure(Controller, - "AUXILIARY STRUCTURE CREATION"); - } - } - if (Controller->FirmwareType == DAC960_V1_Controller) { - Command->cmd_sglist = Command->V1.ScatterList; - Command->V1.ScatterGatherList = - (DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU; - Command->V1.ScatterGatherListDMA = ScatterGatherDMA; - sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit); - } else { - Command->cmd_sglist = Command->V2.ScatterList; - Command->V2.ScatterGatherList = - (DAC960_V2_ScatterGatherSegment_T *)ScatterGatherCPU; - Command->V2.ScatterGatherListDMA = ScatterGatherDMA; - Command->V2.RequestSense = - (DAC960_SCSI_RequestSense_T *)RequestSenseCPU; - Command->V2.RequestSenseDMA = RequestSenseDMA; - sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit); - } - } - return true; -} - - -/* - DAC960_DestroyAuxiliaryStructures deallocates the auxiliary data - structures for Controller. -*/ - -static void DAC960_DestroyAuxiliaryStructures(DAC960_Controller_T *Controller) -{ - int i; - struct dma_pool *ScatterGatherPool = Controller->ScatterGatherPool; - struct dma_pool *RequestSensePool = NULL; - void *ScatterGatherCPU; - dma_addr_t ScatterGatherDMA; - void *RequestSenseCPU; - dma_addr_t RequestSenseDMA; - DAC960_Command_T *CommandGroup = NULL; - - - if (Controller->FirmwareType == DAC960_V2_Controller) - RequestSensePool = Controller->V2.RequestSensePool; - - Controller->FreeCommands = NULL; - for (i = 0; i < Controller->DriverQueueDepth; i++) - { - DAC960_Command_T *Command = Controller->Commands[i]; - - if (Command == NULL) - continue; - - if (Controller->FirmwareType == DAC960_V1_Controller) { - ScatterGatherCPU = (void *)Command->V1.ScatterGatherList; - ScatterGatherDMA = Command->V1.ScatterGatherListDMA; - RequestSenseCPU = NULL; - RequestSenseDMA = (dma_addr_t)0; - } else { - ScatterGatherCPU = (void *)Command->V2.ScatterGatherList; - ScatterGatherDMA = Command->V2.ScatterGatherListDMA; - RequestSenseCPU = (void *)Command->V2.RequestSense; - RequestSenseDMA = Command->V2.RequestSenseDMA; - } - if (ScatterGatherCPU != NULL) - dma_pool_free(ScatterGatherPool, ScatterGatherCPU, ScatterGatherDMA); - if (RequestSenseCPU != NULL) - dma_pool_free(RequestSensePool, RequestSenseCPU, RequestSenseDMA); - - if ((Command->CommandIdentifier - % Controller->CommandAllocationGroupSize) == 1) { - /* - * We can't free the group of commands until all of the - * request sense and scatter gather dma structures are free. - * Remember the beginning of the group, but don't free it - * until we've reached the beginning of the next group. - */ - kfree(CommandGroup); - CommandGroup = Command; - } - Controller->Commands[i] = NULL; - } - kfree(CommandGroup); - - if (Controller->CombinedStatusBuffer != NULL) - { - kfree(Controller->CombinedStatusBuffer); - Controller->CombinedStatusBuffer = NULL; - Controller->CurrentStatusBuffer = NULL; - } - - dma_pool_destroy(ScatterGatherPool); - if (Controller->FirmwareType == DAC960_V1_Controller) - return; - - dma_pool_destroy(RequestSensePool); - - for (i = 0; i < DAC960_MaxLogicalDrives; i++) { - kfree(Controller->V2.LogicalDeviceInformation[i]); - Controller->V2.LogicalDeviceInformation[i] = NULL; - } - - for (i = 0; i < DAC960_V2_MaxPhysicalDevices; i++) - { - kfree(Controller->V2.PhysicalDeviceInformation[i]); - Controller->V2.PhysicalDeviceInformation[i] = NULL; - kfree(Controller->V2.InquiryUnitSerialNumber[i]); - Controller->V2.InquiryUnitSerialNumber[i] = NULL; - } -} - - -/* - DAC960_V1_ClearCommand clears critical fields of Command for DAC960 V1 - Firmware Controllers. -*/ - -static inline void DAC960_V1_ClearCommand(DAC960_Command_T *Command) -{ - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - memset(CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); - Command->V1.CommandStatus = 0; -} - - -/* - DAC960_V2_ClearCommand clears critical fields of Command for DAC960 V2 - Firmware Controllers. -*/ - -static inline void DAC960_V2_ClearCommand(DAC960_Command_T *Command) -{ - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T)); - Command->V2.CommandStatus = 0; -} - - -/* - DAC960_AllocateCommand allocates a Command structure from Controller's - free list. During driver initialization, a special initialization command - has been placed on the free list to guarantee that command allocation can - never fail. -*/ - -static inline DAC960_Command_T *DAC960_AllocateCommand(DAC960_Controller_T - *Controller) -{ - DAC960_Command_T *Command = Controller->FreeCommands; - if (Command == NULL) return NULL; - Controller->FreeCommands = Command->Next; - Command->Next = NULL; - return Command; -} - - -/* - DAC960_DeallocateCommand deallocates Command, returning it to Controller's - free list. -*/ - -static inline void DAC960_DeallocateCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - - Command->Request = NULL; - Command->Next = Controller->FreeCommands; - Controller->FreeCommands = Command; -} - - -/* - DAC960_WaitForCommand waits for a wake_up on Controller's Command Wait Queue. -*/ - -static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) -{ - spin_unlock_irq(&Controller->queue_lock); - __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&Controller->queue_lock); -} - -/* - DAC960_GEM_QueueCommand queues Command for DAC960 GEM Series Controllers. -*/ - -static void DAC960_GEM_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_GEM_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_GEM_MemoryMailboxNewCommand(ControllerBaseAddress); - - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - -/* - DAC960_BA_QueueCommand queues Command for DAC960 BA Series Controllers. -*/ - -static void DAC960_BA_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_BA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_BA_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LP_QueueCommand queues Command for DAC960 LP Series Controllers. -*/ - -static void DAC960_LP_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox = - Controller->V2.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LP_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V2.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LP_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.PreviousCommandMailbox1; - Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V2.LastCommandMailbox) - NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LA_QueueCommandDualMode queues Command for DAC960 LA Series - Controllers with Dual Mode Firmware. -*/ - -static void DAC960_LA_QueueCommandDualMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LA_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_LA_QueueCommandSingleMode queues Command for DAC960 LA Series - Controllers with Single Mode Firmware. -*/ - -static void DAC960_LA_QueueCommandSingleMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PG_QueueCommandDualMode queues Command for DAC960 PG Series - Controllers with Dual Mode Firmware. -*/ - -static void DAC960_PG_QueueCommandDualMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_PG_MemoryMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PG_QueueCommandSingleMode queues Command for DAC960 PG Series - Controllers with Single Mode Firmware. -*/ - -static void DAC960_PG_QueueCommandSingleMode(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox = - Controller->V1.NextCommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox); - if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 || - Controller->V1.PreviousCommandMailbox2->Words[0] == 0) - DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress); - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.PreviousCommandMailbox1; - Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox; - if (++NextCommandMailbox > Controller->V1.LastCommandMailbox) - NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.NextCommandMailbox = NextCommandMailbox; -} - - -/* - DAC960_PD_QueueCommand queues Command for DAC960 PD Series Controllers. -*/ - -static void DAC960_PD_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - while (DAC960_PD_MailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox); - DAC960_PD_NewCommand(ControllerBaseAddress); -} - - -/* - DAC960_P_QueueCommand queues Command for DAC960 P Series Controllers. -*/ - -static void DAC960_P_QueueCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier; - switch (CommandMailbox->Common.CommandOpcode) - { - case DAC960_V1_Enquiry: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Enquiry_Old; - break; - case DAC960_V1_GetDeviceState: - CommandMailbox->Common.CommandOpcode = DAC960_V1_GetDeviceState_Old; - break; - case DAC960_V1_Read: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Read_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_Write: - CommandMailbox->Common.CommandOpcode = DAC960_V1_Write_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_ReadWithScatterGather: - CommandMailbox->Common.CommandOpcode = - DAC960_V1_ReadWithScatterGather_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_WriteWithScatterGather: - CommandMailbox->Common.CommandOpcode = - DAC960_V1_WriteWithScatterGather_Old; - DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox); - break; - default: - break; - } - while (DAC960_PD_MailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox); - DAC960_PD_NewCommand(ControllerBaseAddress); -} - - -/* - DAC960_ExecuteCommand executes Command and waits for completion. -*/ - -static void DAC960_ExecuteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DECLARE_COMPLETION_ONSTACK(Completion); - unsigned long flags; - Command->Completion = &Completion; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - - if (in_interrupt()) - return; - wait_for_completion(&Completion); -} - - -/* - DAC960_V1_ExecuteType3 executes a DAC960 V1 Firmware Controller Type 3 - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3.CommandOpcode = CommandOpcode; - CommandMailbox->Type3.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V1_ExecuteTypeB executes a DAC960 V1 Firmware Controller Type 3B - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3B(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - unsigned char CommandOpcode2, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3B.CommandOpcode = CommandOpcode; - CommandMailbox->Type3B.CommandOpcode2 = CommandOpcode2; - CommandMailbox->Type3B.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V1_ExecuteType3D executes a DAC960 V1 Firmware Controller Type 3D - Command and waits for completion. It returns true on success and false - on failure. -*/ - -static bool DAC960_V1_ExecuteType3D(DAC960_Controller_T *Controller, - DAC960_V1_CommandOpcode_T CommandOpcode, - unsigned char Channel, - unsigned char TargetID, - dma_addr_t DataDMA) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Type3D.CommandOpcode = CommandOpcode; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - CommandMailbox->Type3D.BusAddress = DataDMA; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V1_NormalCompletion); -} - - -/* - DAC960_V2_GeneralInfo executes a DAC960 V2 Firmware General Information - Reading IOCTL Command and waits for completion. It returns true on success - and false on failure. - - Return data in The controller's HealthStatusBuffer, which is dma-able memory -*/ - -static bool DAC960_V2_GeneralInfo(DAC960_Controller_T *Controller) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->Common.DataTransferSize = sizeof(DAC960_V2_HealthStatusBuffer_T); - CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_GetHealthStatus; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.HealthStatusBufferDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_ControllerInfo executes a DAC960 V2 Firmware Controller - Information Reading IOCTL Command and waits for completion. It returns - true on success and false on failure. - - Data is returned in the controller's V2.NewControllerInformation dma-able - memory buffer. -*/ - -static bool DAC960_V2_NewControllerInfo(DAC960_Controller_T *Controller) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_LogicalDeviceInfo executes a DAC960 V2 Firmware Controller Logical - Device Information Reading IOCTL Command and waits for completion. It - returns true on success and false on failure. - - Data is returned in the controller's V2.NewLogicalDeviceInformation -*/ - -static bool DAC960_V2_NewLogicalDeviceInfo(DAC960_Controller_T *Controller, - unsigned short LogicalDeviceNumber) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->LogicalDeviceInfo.CommandOpcode = - DAC960_V2_IOCTL; - CommandMailbox->LogicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->LogicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->LogicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_LogicalDeviceInfo_T); - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = DAC960_V2_GetLogicalDeviceInfoValid; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewLogicalDeviceInformationDMA; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->LogicalDeviceInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_PhysicalDeviceInfo executes a DAC960 V2 Firmware Controller "Read - Physical Device Information" IOCTL Command and waits for completion. It - returns true on success and false on failure. - - The Channel, TargetID, LogicalUnit arguments should be 0 the first time - this function is called for a given controller. This will return data - for the "first" device on that controller. The returned data includes a - Channel, TargetID, LogicalUnit that can be passed in to this routine to - get data for the NEXT device on that controller. - - Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able - memory buffer. - -*/ - -static bool DAC960_V2_NewPhysicalDeviceInfo(DAC960_Controller_T *Controller, - unsigned char Channel, - unsigned char TargetID, - unsigned char LogicalUnit) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalDeviceInfo_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = LogicalUnit; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetPhysicalDeviceInfoValid; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewPhysicalDeviceInformationDMA; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->PhysicalDeviceInfo.DataTransferSize; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -static void DAC960_V2_ConstructNewUnitSerialNumber( - DAC960_Controller_T *Controller, - DAC960_V2_CommandMailbox_T *CommandMailbox, int Channel, int TargetID, - int LogicalUnit) -{ - CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10_Passthru; - CommandMailbox->SCSI_10.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->SCSI_10.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->SCSI_10.DataTransferSize = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - CommandMailbox->SCSI_10.PhysicalDevice.LogicalUnit = LogicalUnit; - CommandMailbox->SCSI_10.PhysicalDevice.TargetID = TargetID; - CommandMailbox->SCSI_10.PhysicalDevice.Channel = Channel; - CommandMailbox->SCSI_10.CDBLength = 6; - CommandMailbox->SCSI_10.SCSI_CDB[0] = 0x12; /* INQUIRY */ - CommandMailbox->SCSI_10.SCSI_CDB[1] = 1; /* EVPD = 1 */ - CommandMailbox->SCSI_10.SCSI_CDB[2] = 0x80; /* Page Code */ - CommandMailbox->SCSI_10.SCSI_CDB[3] = 0; /* Reserved */ - CommandMailbox->SCSI_10.SCSI_CDB[4] = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - CommandMailbox->SCSI_10.SCSI_CDB[5] = 0; /* Control */ - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewInquiryUnitSerialNumberDMA; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->SCSI_10.DataTransferSize; -} - - -/* - DAC960_V2_NewUnitSerialNumber executes an SCSI pass-through - Inquiry command to a SCSI device identified by Channel number, - Target id, Logical Unit Number. This function Waits for completion - of the command. - - The return data includes Unit Serial Number information for the - specified device. - - Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able - memory buffer. -*/ - -static bool DAC960_V2_NewInquiryUnitSerialNumber(DAC960_Controller_T *Controller, - int Channel, int TargetID, int LogicalUnit) -{ - DAC960_Command_T *Command; - DAC960_V2_CommandMailbox_T *CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - - Command = DAC960_AllocateCommand(Controller); - CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - - DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox, - Channel, TargetID, LogicalUnit); - - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_DeviceOperation executes a DAC960 V2 Firmware Controller Device - Operation IOCTL Command and waits for completion. It returns true on - success and false on failure. -*/ - -static bool DAC960_V2_DeviceOperation(DAC960_Controller_T *Controller, - DAC960_V2_IOCTL_Opcode_T IOCTL_Opcode, - DAC960_V2_OperationDevice_T - OperationDevice) -{ - DAC960_Command_T *Command = DAC960_AllocateCommand(Controller); - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox->DeviceOperation.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->DeviceOperation.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->DeviceOperation.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->DeviceOperation.IOCTL_Opcode = IOCTL_Opcode; - CommandMailbox->DeviceOperation.OperationDevice = OperationDevice; - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - DAC960_DeallocateCommand(Command); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V1_EnableMemoryMailboxInterface enables the Memory Mailbox Interface - for DAC960 V1 Firmware Controllers. - - PD and P controller types have no memory mailbox, but still need the - other dma mapped memory. -*/ - -static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T - *Controller) -{ - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_HardwareType_T hw_type = Controller->HardwareType; - struct pci_dev *PCI_Device = Controller->PCIDevice; - struct dma_loaf *DmaPages = &Controller->DmaPages; - size_t DmaPagesSize; - size_t CommandMailboxesSize; - size_t StatusMailboxesSize; - - DAC960_V1_CommandMailbox_T *CommandMailboxesMemory; - dma_addr_t CommandMailboxesMemoryDMA; - - DAC960_V1_StatusMailbox_T *StatusMailboxesMemory; - dma_addr_t StatusMailboxesMemoryDMA; - - DAC960_V1_CommandMailbox_T CommandMailbox; - DAC960_V1_CommandStatus_T CommandStatus; - int TimeoutCounter; - int i; - - memset(&CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T)); - - if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - return DAC960_Failure(Controller, "DMA mask out of range"); - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) { - CommandMailboxesSize = 0; - StatusMailboxesSize = 0; - } else { - CommandMailboxesSize = DAC960_V1_CommandMailboxCount * sizeof(DAC960_V1_CommandMailbox_T); - StatusMailboxesSize = DAC960_V1_StatusMailboxCount * sizeof(DAC960_V1_StatusMailbox_T); - } - DmaPagesSize = CommandMailboxesSize + StatusMailboxesSize + - sizeof(DAC960_V1_DCDB_T) + sizeof(DAC960_V1_Enquiry_T) + - sizeof(DAC960_V1_ErrorTable_T) + sizeof(DAC960_V1_EventLogEntry_T) + - sizeof(DAC960_V1_RebuildProgress_T) + - sizeof(DAC960_V1_LogicalDriveInformationArray_T) + - sizeof(DAC960_V1_BackgroundInitializationStatus_T) + - sizeof(DAC960_V1_DeviceState_T) + sizeof(DAC960_SCSI_Inquiry_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - - if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) - return false; - - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) - goto skip_mailboxes; - - CommandMailboxesMemory = slice_dma_loaf(DmaPages, - CommandMailboxesSize, &CommandMailboxesMemoryDMA); - - /* These are the base addresses for the command memory mailbox array */ - Controller->V1.FirstCommandMailbox = CommandMailboxesMemory; - Controller->V1.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA; - - CommandMailboxesMemory += DAC960_V1_CommandMailboxCount - 1; - Controller->V1.LastCommandMailbox = CommandMailboxesMemory; - Controller->V1.NextCommandMailbox = Controller->V1.FirstCommandMailbox; - Controller->V1.PreviousCommandMailbox1 = Controller->V1.LastCommandMailbox; - Controller->V1.PreviousCommandMailbox2 = - Controller->V1.LastCommandMailbox - 1; - - /* These are the base addresses for the status memory mailbox array */ - StatusMailboxesMemory = slice_dma_loaf(DmaPages, - StatusMailboxesSize, &StatusMailboxesMemoryDMA); - - Controller->V1.FirstStatusMailbox = StatusMailboxesMemory; - Controller->V1.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA; - StatusMailboxesMemory += DAC960_V1_StatusMailboxCount - 1; - Controller->V1.LastStatusMailbox = StatusMailboxesMemory; - Controller->V1.NextStatusMailbox = Controller->V1.FirstStatusMailbox; - -skip_mailboxes: - Controller->V1.MonitoringDCDB = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_DCDB_T), - &Controller->V1.MonitoringDCDB_DMA); - - Controller->V1.NewEnquiry = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_Enquiry_T), - &Controller->V1.NewEnquiryDMA); - - Controller->V1.NewErrorTable = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_ErrorTable_T), - &Controller->V1.NewErrorTableDMA); - - Controller->V1.EventLogEntry = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_EventLogEntry_T), - &Controller->V1.EventLogEntryDMA); - - Controller->V1.RebuildProgress = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_RebuildProgress_T), - &Controller->V1.RebuildProgressDMA); - - Controller->V1.NewLogicalDriveInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_LogicalDriveInformationArray_T), - &Controller->V1.NewLogicalDriveInformationDMA); - - Controller->V1.BackgroundInitializationStatus = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_BackgroundInitializationStatus_T), - &Controller->V1.BackgroundInitializationStatusDMA); - - Controller->V1.NewDeviceState = slice_dma_loaf(DmaPages, - sizeof(DAC960_V1_DeviceState_T), - &Controller->V1.NewDeviceStateDMA); - - Controller->V1.NewInquiryStandardData = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_T), - &Controller->V1.NewInquiryStandardDataDMA); - - Controller->V1.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - &Controller->V1.NewInquiryUnitSerialNumberDMA); - - if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) - return true; - - /* Enable the Memory Mailbox Interface. */ - Controller->V1.DualModeMemoryMailboxInterface = true; - CommandMailbox.TypeX.CommandOpcode = 0x2B; - CommandMailbox.TypeX.CommandIdentifier = 0; - CommandMailbox.TypeX.CommandOpcode2 = 0x14; - CommandMailbox.TypeX.CommandMailboxesBusAddress = - Controller->V1.FirstCommandMailboxDMA; - CommandMailbox.TypeX.StatusMailboxesBusAddress = - Controller->V1.FirstStatusMailboxDMA; -#define TIMEOUT_COUNT 1000000 - - for (i = 0; i < 2; i++) - switch (Controller->HardwareType) - { - case DAC960_LA_Controller: - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (!DAC960_LA_HardwareMailboxFullP(ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - DAC960_LA_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox); - DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress); - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (DAC960_LA_HardwareMailboxStatusAvailableP( - ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - CommandStatus = DAC960_LA_ReadStatusRegister(ControllerBaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - if (CommandStatus == DAC960_V1_NormalCompletion) return true; - Controller->V1.DualModeMemoryMailboxInterface = false; - CommandMailbox.TypeX.CommandOpcode2 = 0x10; - break; - case DAC960_PG_Controller: - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (!DAC960_PG_HardwareMailboxFullP(ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - DAC960_PG_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox); - DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress); - - TimeoutCounter = TIMEOUT_COUNT; - while (--TimeoutCounter >= 0) - { - if (DAC960_PG_HardwareMailboxStatusAvailableP( - ControllerBaseAddress)) - break; - udelay(10); - } - if (TimeoutCounter < 0) return false; - CommandStatus = DAC960_PG_ReadStatusRegister(ControllerBaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - if (CommandStatus == DAC960_V1_NormalCompletion) return true; - Controller->V1.DualModeMemoryMailboxInterface = false; - CommandMailbox.TypeX.CommandOpcode2 = 0x10; - break; - default: - DAC960_Failure(Controller, "Unknown Controller Type\n"); - break; - } - return false; -} - - -/* - DAC960_V2_EnableMemoryMailboxInterface enables the Memory Mailbox Interface - for DAC960 V2 Firmware Controllers. - - Aggregate the space needed for the controller's memory mailbox and - the other data structures that will be targets of dma transfers with - the controller. Allocate a dma-mapped region of memory to hold these - structures. Then, save CPU pointers and dma_addr_t values to reference - the structures that are contained in that region. -*/ - -static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T - *Controller) -{ - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - struct pci_dev *PCI_Device = Controller->PCIDevice; - struct dma_loaf *DmaPages = &Controller->DmaPages; - size_t DmaPagesSize; - size_t CommandMailboxesSize; - size_t StatusMailboxesSize; - - DAC960_V2_CommandMailbox_T *CommandMailboxesMemory; - dma_addr_t CommandMailboxesMemoryDMA; - - DAC960_V2_StatusMailbox_T *StatusMailboxesMemory; - dma_addr_t StatusMailboxesMemoryDMA; - - DAC960_V2_CommandMailbox_T *CommandMailbox; - dma_addr_t CommandMailboxDMA; - DAC960_V2_CommandStatus_T CommandStatus; - - if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) && - pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32))) - return DAC960_Failure(Controller, "DMA mask out of range"); - - /* This is a temporary dma mapping, used only in the scope of this function */ - CommandMailbox = pci_alloc_consistent(PCI_Device, - sizeof(DAC960_V2_CommandMailbox_T), &CommandMailboxDMA); - if (CommandMailbox == NULL) - return false; - - CommandMailboxesSize = DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T); - StatusMailboxesSize = DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T); - DmaPagesSize = - CommandMailboxesSize + StatusMailboxesSize + - sizeof(DAC960_V2_HealthStatusBuffer_T) + - sizeof(DAC960_V2_ControllerInfo_T) + - sizeof(DAC960_V2_LogicalDeviceInfo_T) + - sizeof(DAC960_V2_PhysicalDeviceInfo_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T) + - sizeof(DAC960_V2_Event_T) + - sizeof(DAC960_V2_PhysicalToLogicalDevice_T); - - if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) { - pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T), - CommandMailbox, CommandMailboxDMA); - return false; - } - - CommandMailboxesMemory = slice_dma_loaf(DmaPages, - CommandMailboxesSize, &CommandMailboxesMemoryDMA); - - /* These are the base addresses for the command memory mailbox array */ - Controller->V2.FirstCommandMailbox = CommandMailboxesMemory; - Controller->V2.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA; - - CommandMailboxesMemory += DAC960_V2_CommandMailboxCount - 1; - Controller->V2.LastCommandMailbox = CommandMailboxesMemory; - Controller->V2.NextCommandMailbox = Controller->V2.FirstCommandMailbox; - Controller->V2.PreviousCommandMailbox1 = Controller->V2.LastCommandMailbox; - Controller->V2.PreviousCommandMailbox2 = - Controller->V2.LastCommandMailbox - 1; - - /* These are the base addresses for the status memory mailbox array */ - StatusMailboxesMemory = slice_dma_loaf(DmaPages, - StatusMailboxesSize, &StatusMailboxesMemoryDMA); - - Controller->V2.FirstStatusMailbox = StatusMailboxesMemory; - Controller->V2.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA; - StatusMailboxesMemory += DAC960_V2_StatusMailboxCount - 1; - Controller->V2.LastStatusMailbox = StatusMailboxesMemory; - Controller->V2.NextStatusMailbox = Controller->V2.FirstStatusMailbox; - - Controller->V2.HealthStatusBuffer = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_HealthStatusBuffer_T), - &Controller->V2.HealthStatusBufferDMA); - - Controller->V2.NewControllerInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_ControllerInfo_T), - &Controller->V2.NewControllerInformationDMA); - - Controller->V2.NewLogicalDeviceInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_LogicalDeviceInfo_T), - &Controller->V2.NewLogicalDeviceInformationDMA); - - Controller->V2.NewPhysicalDeviceInformation = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_PhysicalDeviceInfo_T), - &Controller->V2.NewPhysicalDeviceInformationDMA); - - Controller->V2.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - &Controller->V2.NewInquiryUnitSerialNumberDMA); - - Controller->V2.Event = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_Event_T), - &Controller->V2.EventDMA); - - Controller->V2.PhysicalToLogicalDevice = slice_dma_loaf(DmaPages, - sizeof(DAC960_V2_PhysicalToLogicalDevice_T), - &Controller->V2.PhysicalToLogicalDeviceDMA); - - /* - Enable the Memory Mailbox Interface. - - I don't know why we can't just use one of the memory mailboxes - we just allocated to do this, instead of using this temporary one. - Try this change later. - */ - memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T)); - CommandMailbox->SetMemoryMailbox.CommandIdentifier = 1; - CommandMailbox->SetMemoryMailbox.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->SetMemoryMailbox.CommandControlBits.NoAutoRequestSense = true; - CommandMailbox->SetMemoryMailbox.FirstCommandMailboxSizeKB = - (DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T)) >> 10; - CommandMailbox->SetMemoryMailbox.FirstStatusMailboxSizeKB = - (DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T)) >> 10; - CommandMailbox->SetMemoryMailbox.SecondCommandMailboxSizeKB = 0; - CommandMailbox->SetMemoryMailbox.SecondStatusMailboxSizeKB = 0; - CommandMailbox->SetMemoryMailbox.RequestSenseSize = 0; - CommandMailbox->SetMemoryMailbox.IOCTL_Opcode = DAC960_V2_SetMemoryMailbox; - CommandMailbox->SetMemoryMailbox.HealthStatusBufferSizeKB = 1; - CommandMailbox->SetMemoryMailbox.HealthStatusBufferBusAddress = - Controller->V2.HealthStatusBufferDMA; - CommandMailbox->SetMemoryMailbox.FirstCommandMailboxBusAddress = - Controller->V2.FirstCommandMailboxDMA; - CommandMailbox->SetMemoryMailbox.FirstStatusMailboxBusAddress = - Controller->V2.FirstStatusMailboxDMA; - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - while (DAC960_GEM_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_GEM_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_GEM_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_GEM_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_GEM_ReadCommandStatus(ControllerBaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - case DAC960_BA_Controller: - while (DAC960_BA_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_BA_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_BA_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_BA_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_BA_ReadCommandStatus(ControllerBaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - case DAC960_LP_Controller: - while (DAC960_LP_HardwareMailboxFullP(ControllerBaseAddress)) - udelay(1); - DAC960_LP_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA); - DAC960_LP_HardwareMailboxNewCommand(ControllerBaseAddress); - while (!DAC960_LP_HardwareMailboxStatusAvailableP(ControllerBaseAddress)) - udelay(1); - CommandStatus = DAC960_LP_ReadCommandStatus(ControllerBaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress); - break; - default: - DAC960_Failure(Controller, "Unknown Controller Type\n"); - CommandStatus = DAC960_V2_AbormalCompletion; - break; - } - pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T), - CommandMailbox, CommandMailboxDMA); - return (CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V1_ReadControllerConfiguration reads the Configuration Information - from DAC960 V1 Firmware Controllers and initializes the Controller structure. -*/ - -static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_V1_Enquiry2_T *Enquiry2; - dma_addr_t Enquiry2DMA; - DAC960_V1_Config2_T *Config2; - dma_addr_t Config2DMA; - int LogicalDriveNumber, Channel, TargetID; - struct dma_loaf local_dma; - - if (!init_dma_loaf(Controller->PCIDevice, &local_dma, - sizeof(DAC960_V1_Enquiry2_T) + sizeof(DAC960_V1_Config2_T))) - return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION"); - - Enquiry2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Enquiry2_T), &Enquiry2DMA); - Config2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Config2_T), &Config2DMA); - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry, - Controller->V1.NewEnquiryDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "ENQUIRY"); - } - memcpy(&Controller->V1.Enquiry, Controller->V1.NewEnquiry, - sizeof(DAC960_V1_Enquiry_T)); - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry2, Enquiry2DMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "ENQUIRY2"); - } - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_ReadConfig2, Config2DMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "READ CONFIG2"); - } - - if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_GetLogicalDriveInformation, - Controller->V1.NewLogicalDriveInformationDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "GET LOGICAL DRIVE INFORMATION"); - } - memcpy(&Controller->V1.LogicalDriveInformation, - Controller->V1.NewLogicalDriveInformation, - sizeof(DAC960_V1_LogicalDriveInformationArray_T)); - - for (Channel = 0; Channel < Enquiry2->ActualChannels; Channel++) - for (TargetID = 0; TargetID < Enquiry2->MaxTargets; TargetID++) { - if (!DAC960_V1_ExecuteType3D(Controller, DAC960_V1_GetDeviceState, - Channel, TargetID, - Controller->V1.NewDeviceStateDMA)) { - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "GET DEVICE STATE"); - } - memcpy(&Controller->V1.DeviceState[Channel][TargetID], - Controller->V1.NewDeviceState, sizeof(DAC960_V1_DeviceState_T)); - } - /* - Initialize the Controller Model Name and Full Model Name fields. - */ - switch (Enquiry2->HardwareID.SubModel) - { - case DAC960_V1_P_PD_PU: - if (Enquiry2->SCSICapability.BusSpeed == DAC960_V1_Ultra) - strcpy(Controller->ModelName, "DAC960PU"); - else strcpy(Controller->ModelName, "DAC960PD"); - break; - case DAC960_V1_PL: - strcpy(Controller->ModelName, "DAC960PL"); - break; - case DAC960_V1_PG: - strcpy(Controller->ModelName, "DAC960PG"); - break; - case DAC960_V1_PJ: - strcpy(Controller->ModelName, "DAC960PJ"); - break; - case DAC960_V1_PR: - strcpy(Controller->ModelName, "DAC960PR"); - break; - case DAC960_V1_PT: - strcpy(Controller->ModelName, "DAC960PT"); - break; - case DAC960_V1_PTL0: - strcpy(Controller->ModelName, "DAC960PTL0"); - break; - case DAC960_V1_PRL: - strcpy(Controller->ModelName, "DAC960PRL"); - break; - case DAC960_V1_PTL1: - strcpy(Controller->ModelName, "DAC960PTL1"); - break; - case DAC960_V1_1164P: - strcpy(Controller->ModelName, "DAC1164P"); - break; - default: - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "MODEL VERIFICATION"); - } - strcpy(Controller->FullModelName, "Mylex "); - strcat(Controller->FullModelName, Controller->ModelName); - /* - Initialize the Controller Firmware Version field and verify that it - is a supported firmware version. The supported firmware versions are: - - DAC1164P 5.06 and above - DAC960PTL/PRL/PJ/PG 4.06 and above - DAC960PU/PD/PL 3.51 and above - DAC960PU/PD/PL/P 2.73 and above - */ -#if defined(CONFIG_ALPHA) - /* - DEC Alpha machines were often equipped with DAC960 cards that were - OEMed from Mylex, and had their own custom firmware. Version 2.70, - the last custom FW revision to be released by DEC for these older - controllers, appears to work quite well with this driver. - - Cards tested successfully were several versions each of the PD and - PU, called by DEC the KZPSC and KZPAC, respectively, and having - the Manufacturer Numbers (from Mylex), usually on a sticker on the - back of the board, of: - - KZPSC: D040347 (1-channel) or D040348 (2-channel) or D040349 (3-channel) - KZPAC: D040395 (1-channel) or D040396 (2-channel) or D040397 (3-channel) - */ -# define FIRMWARE_27X "2.70" -#else -# define FIRMWARE_27X "2.73" -#endif - - if (Enquiry2->FirmwareID.MajorVersion == 0) - { - Enquiry2->FirmwareID.MajorVersion = - Controller->V1.Enquiry.MajorFirmwareVersion; - Enquiry2->FirmwareID.MinorVersion = - Controller->V1.Enquiry.MinorFirmwareVersion; - Enquiry2->FirmwareID.FirmwareType = '0'; - Enquiry2->FirmwareID.TurnID = 0; - } - snprintf(Controller->FirmwareVersion, sizeof(Controller->FirmwareVersion), - "%d.%02d-%c-%02d", - Enquiry2->FirmwareID.MajorVersion, - Enquiry2->FirmwareID.MinorVersion, - Enquiry2->FirmwareID.FirmwareType, - Enquiry2->FirmwareID.TurnID); - if (!((Controller->FirmwareVersion[0] == '5' && - strcmp(Controller->FirmwareVersion, "5.06") >= 0) || - (Controller->FirmwareVersion[0] == '4' && - strcmp(Controller->FirmwareVersion, "4.06") >= 0) || - (Controller->FirmwareVersion[0] == '3' && - strcmp(Controller->FirmwareVersion, "3.51") >= 0) || - (Controller->FirmwareVersion[0] == '2' && - strcmp(Controller->FirmwareVersion, FIRMWARE_27X) >= 0))) - { - DAC960_Failure(Controller, "FIRMWARE VERSION VERIFICATION"); - DAC960_Error("Firmware Version = '%s'\n", Controller, - Controller->FirmwareVersion); - free_dma_loaf(Controller->PCIDevice, &local_dma); - return false; - } - /* - Initialize the Controller Channels, Targets, Memory Size, and SAF-TE - Enclosure Management Enabled fields. - */ - Controller->Channels = Enquiry2->ActualChannels; - Controller->Targets = Enquiry2->MaxTargets; - Controller->MemorySize = Enquiry2->MemorySize >> 20; - Controller->V1.SAFTE_EnclosureManagementEnabled = - (Enquiry2->FaultManagementType == DAC960_V1_SAFTE); - /* - Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive - Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and - Driver Scatter/Gather Limit. The Driver Queue Depth must be at most one - less than the Controller Queue Depth to allow for an automatic drive - rebuild operation. - */ - Controller->ControllerQueueDepth = Controller->V1.Enquiry.MaxCommands; - Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1; - if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth) - Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth; - Controller->LogicalDriveCount = - Controller->V1.Enquiry.NumberOfLogicalDrives; - Controller->MaxBlocksPerCommand = Enquiry2->MaxBlocksPerCommand; - Controller->ControllerScatterGatherLimit = Enquiry2->MaxScatterGatherEntries; - Controller->DriverScatterGatherLimit = - Controller->ControllerScatterGatherLimit; - if (Controller->DriverScatterGatherLimit > DAC960_V1_ScatterGatherLimit) - Controller->DriverScatterGatherLimit = DAC960_V1_ScatterGatherLimit; - /* - Initialize the Stripe Size, Segment Size, and Geometry Translation. - */ - Controller->V1.StripeSize = Config2->BlocksPerStripe * Config2->BlockFactor - >> (10 - DAC960_BlockSizeBits); - Controller->V1.SegmentSize = Config2->BlocksPerCacheLine * Config2->BlockFactor - >> (10 - DAC960_BlockSizeBits); - switch (Config2->DriveGeometry) - { - case DAC960_V1_Geometry_128_32: - Controller->V1.GeometryTranslationHeads = 128; - Controller->V1.GeometryTranslationSectors = 32; - break; - case DAC960_V1_Geometry_255_63: - Controller->V1.GeometryTranslationHeads = 255; - Controller->V1.GeometryTranslationSectors = 63; - break; - default: - free_dma_loaf(Controller->PCIDevice, &local_dma); - return DAC960_Failure(Controller, "CONFIG2 DRIVE GEOMETRY"); - } - /* - Initialize the Background Initialization Status. - */ - if ((Controller->FirmwareVersion[0] == '4' && - strcmp(Controller->FirmwareVersion, "4.08") >= 0) || - (Controller->FirmwareVersion[0] == '5' && - strcmp(Controller->FirmwareVersion, "5.08") >= 0)) - { - Controller->V1.BackgroundInitializationStatusSupported = true; - DAC960_V1_ExecuteType3B(Controller, - DAC960_V1_BackgroundInitializationControl, 0x20, - Controller-> - V1.BackgroundInitializationStatusDMA); - memcpy(&Controller->V1.LastBackgroundInitializationStatus, - Controller->V1.BackgroundInitializationStatus, - sizeof(DAC960_V1_BackgroundInitializationStatus_T)); - } - /* - Initialize the Logical Drive Initially Accessible flag. - */ - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - if (Controller->V1.LogicalDriveInformation - [LogicalDriveNumber].LogicalDriveState != - DAC960_V1_LogicalDrive_Offline) - Controller->LogicalDriveInitiallyAccessible[LogicalDriveNumber] = true; - Controller->V1.LastRebuildStatus = DAC960_V1_NoRebuildOrCheckInProgress; - free_dma_loaf(Controller->PCIDevice, &local_dma); - return true; -} - - -/* - DAC960_V2_ReadControllerConfiguration reads the Configuration Information - from DAC960 V2 Firmware Controllers and initializes the Controller structure. -*/ - -static bool DAC960_V2_ReadControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - unsigned short LogicalDeviceNumber = 0; - int ModelNameLength; - - /* Get data into dma-able area, then copy into permanent location */ - if (!DAC960_V2_NewControllerInfo(Controller)) - return DAC960_Failure(Controller, "GET CONTROLLER INFO"); - memcpy(ControllerInfo, Controller->V2.NewControllerInformation, - sizeof(DAC960_V2_ControllerInfo_T)); - - - if (!DAC960_V2_GeneralInfo(Controller)) - return DAC960_Failure(Controller, "GET HEALTH STATUS"); - - /* - Initialize the Controller Model Name and Full Model Name fields. - */ - ModelNameLength = sizeof(ControllerInfo->ControllerName); - if (ModelNameLength > sizeof(Controller->ModelName)-1) - ModelNameLength = sizeof(Controller->ModelName)-1; - memcpy(Controller->ModelName, ControllerInfo->ControllerName, - ModelNameLength); - ModelNameLength--; - while (Controller->ModelName[ModelNameLength] == ' ' || - Controller->ModelName[ModelNameLength] == '\0') - ModelNameLength--; - Controller->ModelName[++ModelNameLength] = '\0'; - strcpy(Controller->FullModelName, "Mylex "); - strcat(Controller->FullModelName, Controller->ModelName); - /* - Initialize the Controller Firmware Version field. - */ - sprintf(Controller->FirmwareVersion, "%d.%02d-%02d", - ControllerInfo->FirmwareMajorVersion, - ControllerInfo->FirmwareMinorVersion, - ControllerInfo->FirmwareTurnNumber); - if (ControllerInfo->FirmwareMajorVersion == 6 && - ControllerInfo->FirmwareMinorVersion == 0 && - ControllerInfo->FirmwareTurnNumber < 1) - { - DAC960_Info("FIRMWARE VERSION %s DOES NOT PROVIDE THE CONTROLLER\n", - Controller, Controller->FirmwareVersion); - DAC960_Info("STATUS MONITORING FUNCTIONALITY NEEDED BY THIS DRIVER.\n", - Controller); - DAC960_Info("PLEASE UPGRADE TO VERSION 6.00-01 OR ABOVE.\n", - Controller); - } - /* - Initialize the Controller Channels, Targets, and Memory Size. - */ - Controller->Channels = ControllerInfo->NumberOfPhysicalChannelsPresent; - Controller->Targets = - ControllerInfo->MaximumTargetsPerChannel - [ControllerInfo->NumberOfPhysicalChannelsPresent-1]; - Controller->MemorySize = ControllerInfo->MemorySizeMB; - /* - Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive - Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and - Driver Scatter/Gather Limit. The Driver Queue Depth must be at most one - less than the Controller Queue Depth to allow for an automatic drive - rebuild operation. - */ - Controller->ControllerQueueDepth = ControllerInfo->MaximumParallelCommands; - Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1; - if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth) - Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth; - Controller->LogicalDriveCount = ControllerInfo->LogicalDevicesPresent; - Controller->MaxBlocksPerCommand = - ControllerInfo->MaximumDataTransferSizeInBlocks; - Controller->ControllerScatterGatherLimit = - ControllerInfo->MaximumScatterGatherEntries; - Controller->DriverScatterGatherLimit = - Controller->ControllerScatterGatherLimit; - if (Controller->DriverScatterGatherLimit > DAC960_V2_ScatterGatherLimit) - Controller->DriverScatterGatherLimit = DAC960_V2_ScatterGatherLimit; - /* - Initialize the Logical Device Information. - */ - while (true) - { - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = - Controller->V2.NewLogicalDeviceInformation; - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo; - DAC960_V2_PhysicalDevice_T PhysicalDevice; - - if (!DAC960_V2_NewLogicalDeviceInfo(Controller, LogicalDeviceNumber)) - break; - LogicalDeviceNumber = NewLogicalDeviceInfo->LogicalDeviceNumber; - if (LogicalDeviceNumber >= DAC960_MaxLogicalDrives) { - DAC960_Error("DAC960: Logical Drive Number %d not supported\n", - Controller, LogicalDeviceNumber); - break; - } - if (NewLogicalDeviceInfo->DeviceBlockSizeInBytes != DAC960_BlockSize) { - DAC960_Error("DAC960: Logical Drive Block Size %d not supported\n", - Controller, NewLogicalDeviceInfo->DeviceBlockSizeInBytes); - LogicalDeviceNumber++; - continue; - } - PhysicalDevice.Controller = 0; - PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel; - PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID; - PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit; - Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] = - PhysicalDevice; - if (NewLogicalDeviceInfo->LogicalDeviceState != - DAC960_V2_LogicalDevice_Offline) - Controller->LogicalDriveInitiallyAccessible[LogicalDeviceNumber] = true; - LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T), - GFP_ATOMIC); - if (LogicalDeviceInfo == NULL) - return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION"); - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] = - LogicalDeviceInfo; - memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - LogicalDeviceNumber++; - } - return true; -} - - -/* - DAC960_ReportControllerConfiguration reports the Configuration Information - for Controller. -*/ - -static bool DAC960_ReportControllerConfiguration(DAC960_Controller_T - *Controller) -{ - DAC960_Info("Configuring Mylex %s PCI RAID Controller\n", - Controller, Controller->ModelName); - DAC960_Info(" Firmware Version: %s, Channels: %d, Memory Size: %dMB\n", - Controller, Controller->FirmwareVersion, - Controller->Channels, Controller->MemorySize); - DAC960_Info(" PCI Bus: %d, Device: %d, Function: %d, I/O Address: ", - Controller, Controller->Bus, - Controller->Device, Controller->Function); - if (Controller->IO_Address == 0) - DAC960_Info("Unassigned\n", Controller); - else DAC960_Info("0x%X\n", Controller, Controller->IO_Address); - DAC960_Info(" PCI Address: 0x%X mapped at 0x%lX, IRQ Channel: %d\n", - Controller, Controller->PCI_Address, - (unsigned long) Controller->BaseAddress, - Controller->IRQ_Channel); - DAC960_Info(" Controller Queue Depth: %d, " - "Maximum Blocks per Command: %d\n", - Controller, Controller->ControllerQueueDepth, - Controller->MaxBlocksPerCommand); - DAC960_Info(" Driver Queue Depth: %d, " - "Scatter/Gather Limit: %d of %d Segments\n", - Controller, Controller->DriverQueueDepth, - Controller->DriverScatterGatherLimit, - Controller->ControllerScatterGatherLimit); - if (Controller->FirmwareType == DAC960_V1_Controller) - { - DAC960_Info(" Stripe Size: %dKB, Segment Size: %dKB, " - "BIOS Geometry: %d/%d\n", Controller, - Controller->V1.StripeSize, - Controller->V1.SegmentSize, - Controller->V1.GeometryTranslationHeads, - Controller->V1.GeometryTranslationSectors); - if (Controller->V1.SAFTE_EnclosureManagementEnabled) - DAC960_Info(" SAF-TE Enclosure Management Enabled\n", Controller); - } - return true; -} - - -/* - DAC960_V1_ReadDeviceConfiguration reads the Device Configuration Information - for DAC960 V1 Firmware Controllers by requesting the SCSI Inquiry and SCSI - Inquiry Unit Serial Number information for each device connected to - Controller. -*/ - -static bool DAC960_V1_ReadDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - struct dma_loaf local_dma; - - dma_addr_t DCDBs_dma[DAC960_V1_MaxChannels]; - DAC960_V1_DCDB_T *DCDBs_cpu[DAC960_V1_MaxChannels]; - - dma_addr_t SCSI_Inquiry_dma[DAC960_V1_MaxChannels]; - DAC960_SCSI_Inquiry_T *SCSI_Inquiry_cpu[DAC960_V1_MaxChannels]; - - dma_addr_t SCSI_NewInquiryUnitSerialNumberDMA[DAC960_V1_MaxChannels]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *SCSI_NewInquiryUnitSerialNumberCPU[DAC960_V1_MaxChannels]; - - struct completion Completions[DAC960_V1_MaxChannels]; - unsigned long flags; - int Channel, TargetID; - - if (!init_dma_loaf(Controller->PCIDevice, &local_dma, - DAC960_V1_MaxChannels*(sizeof(DAC960_V1_DCDB_T) + - sizeof(DAC960_SCSI_Inquiry_T) + - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)))) - return DAC960_Failure(Controller, - "DMA ALLOCATION FAILED IN ReadDeviceConfiguration"); - - for (Channel = 0; Channel < Controller->Channels; Channel++) { - DCDBs_cpu[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_V1_DCDB_T), DCDBs_dma + Channel); - SCSI_Inquiry_cpu[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_SCSI_Inquiry_T), - SCSI_Inquiry_dma + Channel); - SCSI_NewInquiryUnitSerialNumberCPU[Channel] = slice_dma_loaf(&local_dma, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - SCSI_NewInquiryUnitSerialNumberDMA + Channel); - } - - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - /* - * For each channel, submit a probe for a device on that channel. - * The timeout interval for a device that is present is 10 seconds. - * With this approach, the timeout periods can elapse in parallel - * on each channel. - */ - for (Channel = 0; Channel < Controller->Channels; Channel++) - { - dma_addr_t NewInquiryStandardDataDMA = SCSI_Inquiry_dma[Channel]; - DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel]; - dma_addr_t DCDB_dma = DCDBs_dma[Channel]; - DAC960_Command_T *Command = Controller->Commands[Channel]; - struct completion *Completion = &Completions[Channel]; - - init_completion(Completion); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - Command->Completion = Completion; - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_dma; - DCDB->Channel = Channel; - DCDB->TargetID = TargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->BusAddress = NewInquiryStandardDataDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 0; /* EVPD = 0 */ - DCDB->CDB[2] = 0; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->CDB[5] = 0; /* Control */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - } - /* - * Wait for the problems submitted in the previous loop - * to complete. On the probes that are successful, - * get the serial number of the device that was found. - */ - for (Channel = 0; Channel < Controller->Channels; Channel++) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData[Channel][TargetID]; - DAC960_SCSI_Inquiry_T *NewInquiryStandardData = SCSI_Inquiry_cpu[Channel]; - dma_addr_t NewInquiryUnitSerialNumberDMA = - SCSI_NewInquiryUnitSerialNumberDMA[Channel]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber = - SCSI_NewInquiryUnitSerialNumberCPU[Channel]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; - DAC960_Command_T *Command = Controller->Commands[Channel]; - DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel]; - struct completion *Completion = &Completions[Channel]; - - wait_for_completion(Completion); - - if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) { - memset(InquiryStandardData, 0, sizeof(DAC960_SCSI_Inquiry_T)); - InquiryStandardData->PeripheralDeviceType = 0x1F; - continue; - } else - memcpy(InquiryStandardData, NewInquiryStandardData, sizeof(DAC960_SCSI_Inquiry_T)); - - /* Preserve Channel and TargetID values from the previous loop */ - Command->Completion = Completion; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->BusAddress = NewInquiryUnitSerialNumberDMA; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 1; /* EVPD = 1 */ - DCDB->CDB[2] = 0x80; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->CDB[5] = 0; /* Control */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_QueueCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - wait_for_completion(Completion); - - if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - } - } - free_dma_loaf(Controller->PCIDevice, &local_dma); - return true; -} - - -/* - DAC960_V2_ReadDeviceConfiguration reads the Device Configuration Information - for DAC960 V2 Firmware Controllers by requesting the Physical Device - Information and SCSI Inquiry Unit Serial Number information for each - device connected to Controller. -*/ - -static bool DAC960_V2_ReadDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - unsigned char Channel = 0, TargetID = 0, LogicalUnit = 0; - unsigned short PhysicalDeviceIndex = 0; - - while (true) - { - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = - Controller->V2.NewPhysicalDeviceInformation; - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber = - Controller->V2.NewInquiryUnitSerialNumber; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber; - - if (!DAC960_V2_NewPhysicalDeviceInfo(Controller, Channel, TargetID, LogicalUnit)) - break; - - PhysicalDeviceInfo = kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), - GFP_ATOMIC); - if (PhysicalDeviceInfo == NULL) - return DAC960_Failure(Controller, "PHYSICAL DEVICE ALLOCATION"); - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex] = - PhysicalDeviceInfo; - memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - - InquiryUnitSerialNumber = kmalloc( - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), GFP_ATOMIC); - if (InquiryUnitSerialNumber == NULL) { - kfree(PhysicalDeviceInfo); - return DAC960_Failure(Controller, "SERIAL NUMBER ALLOCATION"); - } - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex] = - InquiryUnitSerialNumber; - - Channel = NewPhysicalDeviceInfo->Channel; - TargetID = NewPhysicalDeviceInfo->TargetID; - LogicalUnit = NewPhysicalDeviceInfo->LogicalUnit; - - /* - Some devices do NOT have Unit Serial Numbers. - This command fails for them. But, we still want to - remember those devices are there. Construct a - UnitSerialNumber structure for the failure case. - */ - if (!DAC960_V2_NewInquiryUnitSerialNumber(Controller, Channel, TargetID, LogicalUnit)) { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - - PhysicalDeviceIndex++; - LogicalUnit++; - } - return true; -} - - -/* - DAC960_SanitizeInquiryData sanitizes the Vendor, Model, Revision, and - Product Serial Number fields of the Inquiry Standard Data and Inquiry - Unit Serial Number structures. -*/ - -static void DAC960_SanitizeInquiryData(DAC960_SCSI_Inquiry_T - *InquiryStandardData, - DAC960_SCSI_Inquiry_UnitSerialNumber_T - *InquiryUnitSerialNumber, - unsigned char *Vendor, - unsigned char *Model, - unsigned char *Revision, - unsigned char *SerialNumber) -{ - int SerialNumberLength, i; - if (InquiryStandardData->PeripheralDeviceType == 0x1F) return; - for (i = 0; i < sizeof(InquiryStandardData->VendorIdentification); i++) - { - unsigned char VendorCharacter = - InquiryStandardData->VendorIdentification[i]; - Vendor[i] = (VendorCharacter >= ' ' && VendorCharacter <= '~' - ? VendorCharacter : ' '); - } - Vendor[sizeof(InquiryStandardData->VendorIdentification)] = '\0'; - for (i = 0; i < sizeof(InquiryStandardData->ProductIdentification); i++) - { - unsigned char ModelCharacter = - InquiryStandardData->ProductIdentification[i]; - Model[i] = (ModelCharacter >= ' ' && ModelCharacter <= '~' - ? ModelCharacter : ' '); - } - Model[sizeof(InquiryStandardData->ProductIdentification)] = '\0'; - for (i = 0; i < sizeof(InquiryStandardData->ProductRevisionLevel); i++) - { - unsigned char RevisionCharacter = - InquiryStandardData->ProductRevisionLevel[i]; - Revision[i] = (RevisionCharacter >= ' ' && RevisionCharacter <= '~' - ? RevisionCharacter : ' '); - } - Revision[sizeof(InquiryStandardData->ProductRevisionLevel)] = '\0'; - if (InquiryUnitSerialNumber->PeripheralDeviceType == 0x1F) return; - SerialNumberLength = InquiryUnitSerialNumber->PageLength; - if (SerialNumberLength > - sizeof(InquiryUnitSerialNumber->ProductSerialNumber)) - SerialNumberLength = sizeof(InquiryUnitSerialNumber->ProductSerialNumber); - for (i = 0; i < SerialNumberLength; i++) - { - unsigned char SerialNumberCharacter = - InquiryUnitSerialNumber->ProductSerialNumber[i]; - SerialNumber[i] = - (SerialNumberCharacter >= ' ' && SerialNumberCharacter <= '~' - ? SerialNumberCharacter : ' '); - } - SerialNumber[SerialNumberLength] = '\0'; -} - - -/* - DAC960_V1_ReportDeviceConfiguration reports the Device Configuration - Information for DAC960 V1 Firmware Controllers. -*/ - -static bool DAC960_V1_ReportDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - int LogicalDriveNumber, Channel, TargetID; - DAC960_Info(" Physical Devices:\n", Controller); - for (Channel = 0; Channel < Controller->Channels; Channel++) - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData[Channel][TargetID]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID]; - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - DAC960_V1_ErrorTableEntry_T *ErrorEntry = - &Controller->V1.ErrorTable.ErrorTableEntries[Channel][TargetID]; - char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)]; - char Model[1+sizeof(InquiryStandardData->ProductIdentification)]; - char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)]; - char SerialNumber[1+sizeof(InquiryUnitSerialNumber - ->ProductSerialNumber)]; - if (InquiryStandardData->PeripheralDeviceType == 0x1F) continue; - DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber, - Vendor, Model, Revision, SerialNumber); - DAC960_Info(" %d:%d%s Vendor: %s Model: %s Revision: %s\n", - Controller, Channel, TargetID, (TargetID < 10 ? " " : ""), - Vendor, Model, Revision); - if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F) - DAC960_Info(" Serial Number: %s\n", Controller, SerialNumber); - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType) - { - if (Controller->V1.DeviceResetCount[Channel][TargetID] > 0) - DAC960_Info(" Disk Status: %s, %u blocks, %d resets\n", - Controller, - (DeviceState->DeviceState == DAC960_V1_Device_Dead - ? "Dead" - : DeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "Write-Only" - : DeviceState->DeviceState - == DAC960_V1_Device_Online - ? "Online" : "Standby"), - DeviceState->DiskSize, - Controller->V1.DeviceResetCount[Channel][TargetID]); - else - DAC960_Info(" Disk Status: %s, %u blocks\n", Controller, - (DeviceState->DeviceState == DAC960_V1_Device_Dead - ? "Dead" - : DeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "Write-Only" - : DeviceState->DeviceState - == DAC960_V1_Device_Online - ? "Online" : "Standby"), - DeviceState->DiskSize); - } - if (ErrorEntry->ParityErrorCount > 0 || - ErrorEntry->SoftErrorCount > 0 || - ErrorEntry->HardErrorCount > 0 || - ErrorEntry->MiscErrorCount > 0) - DAC960_Info(" Errors - Parity: %d, Soft: %d, " - "Hard: %d, Misc: %d\n", Controller, - ErrorEntry->ParityErrorCount, - ErrorEntry->SoftErrorCount, - ErrorEntry->HardErrorCount, - ErrorEntry->MiscErrorCount); - } - DAC960_Info(" Logical Drives:\n", Controller); - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - { - DAC960_V1_LogicalDriveInformation_T *LogicalDriveInformation = - &Controller->V1.LogicalDriveInformation[LogicalDriveNumber]; - DAC960_Info(" /dev/rd/c%dd%d: RAID-%d, %s, %u blocks, %s\n", - Controller, Controller->ControllerNumber, LogicalDriveNumber, - LogicalDriveInformation->RAIDLevel, - (LogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Online - ? "Online" - : LogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Critical - ? "Critical" : "Offline"), - LogicalDriveInformation->LogicalDriveSize, - (LogicalDriveInformation->WriteBack - ? "Write Back" : "Write Thru")); - } - return true; -} - - -/* - DAC960_V2_ReportDeviceConfiguration reports the Device Configuration - Information for DAC960 V2 Firmware Controllers. -*/ - -static bool DAC960_V2_ReportDeviceConfiguration(DAC960_Controller_T - *Controller) -{ - int PhysicalDeviceIndex, LogicalDriveNumber; - DAC960_Info(" Physical Devices:\n", Controller); - for (PhysicalDeviceIndex = 0; - PhysicalDeviceIndex < DAC960_V2_MaxPhysicalDevices; - PhysicalDeviceIndex++) - { - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - DAC960_SCSI_Inquiry_T *InquiryStandardData = - (DAC960_SCSI_Inquiry_T *) &PhysicalDeviceInfo->SCSI_InquiryData; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)]; - char Model[1+sizeof(InquiryStandardData->ProductIdentification)]; - char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)]; - char SerialNumber[1+sizeof(InquiryUnitSerialNumber->ProductSerialNumber)]; - if (PhysicalDeviceInfo == NULL) break; - DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber, - Vendor, Model, Revision, SerialNumber); - DAC960_Info(" %d:%d%s Vendor: %s Model: %s Revision: %s\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID, - (PhysicalDeviceInfo->TargetID < 10 ? " " : ""), - Vendor, Model, Revision); - if (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers == 0) - DAC960_Info(" %sAsynchronous\n", Controller, - (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16 - ? "Wide " :"")); - else - DAC960_Info(" %sSynchronous at %d MB/sec\n", Controller, - (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16 - ? "Wide " :""), - (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers - * PhysicalDeviceInfo->NegotiatedDataWidthBits/8)); - if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F) - DAC960_Info(" Serial Number: %s\n", Controller, SerialNumber); - if (PhysicalDeviceInfo->PhysicalDeviceState == - DAC960_V2_Device_Unconfigured) - continue; - DAC960_Info(" Disk Status: %s, %u blocks\n", Controller, - (PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Online - ? "Online" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Rebuild - ? "Rebuild" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Missing - ? "Missing" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Critical - ? "Critical" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead - ? "Dead" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_SuspectedDead - ? "Suspected-Dead" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_CommandedOffline - ? "Commanded-Offline" - : PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Standby - ? "Standby" : "Unknown"), - PhysicalDeviceInfo->ConfigurableDeviceSize); - if (PhysicalDeviceInfo->ParityErrors == 0 && - PhysicalDeviceInfo->SoftErrors == 0 && - PhysicalDeviceInfo->HardErrors == 0 && - PhysicalDeviceInfo->MiscellaneousErrors == 0 && - PhysicalDeviceInfo->CommandTimeouts == 0 && - PhysicalDeviceInfo->Retries == 0 && - PhysicalDeviceInfo->Aborts == 0 && - PhysicalDeviceInfo->PredictedFailuresDetected == 0) - continue; - DAC960_Info(" Errors - Parity: %d, Soft: %d, " - "Hard: %d, Misc: %d\n", Controller, - PhysicalDeviceInfo->ParityErrors, - PhysicalDeviceInfo->SoftErrors, - PhysicalDeviceInfo->HardErrors, - PhysicalDeviceInfo->MiscellaneousErrors); - DAC960_Info(" Timeouts: %d, Retries: %d, " - "Aborts: %d, Predicted: %d\n", Controller, - PhysicalDeviceInfo->CommandTimeouts, - PhysicalDeviceInfo->Retries, - PhysicalDeviceInfo->Aborts, - PhysicalDeviceInfo->PredictedFailuresDetected); - } - DAC960_Info(" Logical Drives:\n", Controller); - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - static const unsigned char *ReadCacheStatus[] = { - "Read Cache Disabled", - "Read Cache Enabled", - "Read Ahead Enabled", - "Intelligent Read Ahead Enabled", - "-", "-", "-", "-" - }; - static const unsigned char *WriteCacheStatus[] = { - "Write Cache Disabled", - "Logical Device Read Only", - "Write Cache Enabled", - "Intelligent Write Cache Enabled", - "-", "-", "-", "-" - }; - unsigned char *GeometryTranslation; - if (LogicalDeviceInfo == NULL) continue; - switch (LogicalDeviceInfo->DriveGeometry) - { - case DAC960_V2_Geometry_128_32: - GeometryTranslation = "128/32"; - break; - case DAC960_V2_Geometry_255_63: - GeometryTranslation = "255/63"; - break; - default: - GeometryTranslation = "Invalid"; - DAC960_Error("Illegal Logical Device Geometry %d\n", - Controller, LogicalDeviceInfo->DriveGeometry); - break; - } - DAC960_Info(" /dev/rd/c%dd%d: RAID-%d, %s, %u blocks\n", - Controller, Controller->ControllerNumber, LogicalDriveNumber, - LogicalDeviceInfo->RAIDLevel, - (LogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Online - ? "Online" - : LogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Critical - ? "Critical" : "Offline"), - LogicalDeviceInfo->ConfigurableDeviceSize); - DAC960_Info(" Logical Device %s, BIOS Geometry: %s\n", - Controller, - (LogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized - ? "Initialized" : "Uninitialized"), - GeometryTranslation); - if (LogicalDeviceInfo->StripeSize == 0) - { - if (LogicalDeviceInfo->CacheLineSize == 0) - DAC960_Info(" Stripe Size: N/A, " - "Segment Size: N/A\n", Controller); - else - DAC960_Info(" Stripe Size: N/A, " - "Segment Size: %dKB\n", Controller, - 1 << (LogicalDeviceInfo->CacheLineSize - 2)); - } - else - { - if (LogicalDeviceInfo->CacheLineSize == 0) - DAC960_Info(" Stripe Size: %dKB, " - "Segment Size: N/A\n", Controller, - 1 << (LogicalDeviceInfo->StripeSize - 2)); - else - DAC960_Info(" Stripe Size: %dKB, " - "Segment Size: %dKB\n", Controller, - 1 << (LogicalDeviceInfo->StripeSize - 2), - 1 << (LogicalDeviceInfo->CacheLineSize - 2)); - } - DAC960_Info(" %s, %s\n", Controller, - ReadCacheStatus[ - LogicalDeviceInfo->LogicalDeviceControl.ReadCache], - WriteCacheStatus[ - LogicalDeviceInfo->LogicalDeviceControl.WriteCache]); - if (LogicalDeviceInfo->SoftErrors > 0 || - LogicalDeviceInfo->CommandsFailed > 0 || - LogicalDeviceInfo->DeferredWriteErrors) - DAC960_Info(" Errors - Soft: %d, Failed: %d, " - "Deferred Write: %d\n", Controller, - LogicalDeviceInfo->SoftErrors, - LogicalDeviceInfo->CommandsFailed, - LogicalDeviceInfo->DeferredWriteErrors); - - } - return true; -} - -/* - DAC960_RegisterBlockDevice registers the Block Device structures - associated with Controller. -*/ - -static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) -{ - int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - int n; - - /* - Register the Block Device Major Number for this DAC960 Controller. - */ - if (register_blkdev(MajorNumber, "dac960") < 0) - return false; - - for (n = 0; n < DAC960_MaxLogicalDrives; n++) { - struct gendisk *disk = Controller->disks[n]; - struct request_queue *RequestQueue; - - /* for now, let all request queues share controller's lock */ - RequestQueue = blk_init_queue(DAC960_RequestFunction,&Controller->queue_lock); - if (!RequestQueue) { - printk("DAC960: failure to allocate request queue\n"); - continue; - } - Controller->RequestQueue[n] = RequestQueue; - RequestQueue->queuedata = Controller; - blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); - blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); - disk->queue = RequestQueue; - sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n); - disk->major = MajorNumber; - disk->first_minor = n << DAC960_MaxPartitionsBits; - disk->fops = &DAC960_BlockDeviceOperations; - } - /* - Indicate the Block Device Registration completed successfully, - */ - return true; -} - - -/* - DAC960_UnregisterBlockDevice unregisters the Block Device structures - associated with Controller. -*/ - -static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller) -{ - int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber; - int disk; - - /* does order matter when deleting gendisk and cleanup in request queue? */ - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) { - del_gendisk(Controller->disks[disk]); - blk_cleanup_queue(Controller->RequestQueue[disk]); - Controller->RequestQueue[disk] = NULL; - } - - /* - Unregister the Block Device Major Number for this DAC960 Controller. - */ - unregister_blkdev(MajorNumber, "dac960"); -} - -/* - DAC960_ComputeGenericDiskInfo computes the values for the Generic Disk - Information Partition Sector Counts and Block Sizes. -*/ - -static void DAC960_ComputeGenericDiskInfo(DAC960_Controller_T *Controller) -{ - int disk; - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) - set_capacity(Controller->disks[disk], disk_size(Controller, disk)); -} - -/* - DAC960_ReportErrorStatus reports Controller BIOS Messages passed through - the Error Status Register when the driver performs the BIOS handshaking. - It returns true for fatal errors and false otherwise. -*/ - -static bool DAC960_ReportErrorStatus(DAC960_Controller_T *Controller, - unsigned char ErrorStatus, - unsigned char Parameter0, - unsigned char Parameter1) -{ - switch (ErrorStatus) - { - case 0x00: - DAC960_Notice("Physical Device %d:%d Not Responding\n", - Controller, Parameter1, Parameter0); - break; - case 0x08: - if (Controller->DriveSpinUpMessageDisplayed) break; - DAC960_Notice("Spinning Up Drives\n", Controller); - Controller->DriveSpinUpMessageDisplayed = true; - break; - case 0x30: - DAC960_Notice("Configuration Checksum Error\n", Controller); - break; - case 0x60: - DAC960_Notice("Mirror Race Recovery Failed\n", Controller); - break; - case 0x70: - DAC960_Notice("Mirror Race Recovery In Progress\n", Controller); - break; - case 0x90: - DAC960_Notice("Physical Device %d:%d COD Mismatch\n", - Controller, Parameter1, Parameter0); - break; - case 0xA0: - DAC960_Notice("Logical Drive Installation Aborted\n", Controller); - break; - case 0xB0: - DAC960_Notice("Mirror Race On A Critical Logical Drive\n", Controller); - break; - case 0xD0: - DAC960_Notice("New Controller Configuration Found\n", Controller); - break; - case 0xF0: - DAC960_Error("Fatal Memory Parity Error for Controller at\n", Controller); - return true; - default: - DAC960_Error("Unknown Initialization Error %02X for Controller at\n", - Controller, ErrorStatus); - return true; - } - return false; -} - - -/* - * DAC960_DetectCleanup releases the resources that were allocated - * during DAC960_DetectController(). DAC960_DetectController can - * has several internal failure points, so not ALL resources may - * have been allocated. It's important to free only - * resources that HAVE been allocated. The code below always - * tests that the resource has been allocated before attempting to - * free it. - */ -static void DAC960_DetectCleanup(DAC960_Controller_T *Controller) -{ - int i; - - /* Free the memory mailbox, status, and related structures */ - free_dma_loaf(Controller->PCIDevice, &Controller->DmaPages); - if (Controller->MemoryMappedAddress) { - switch(Controller->HardwareType) - { - case DAC960_GEM_Controller: - DAC960_GEM_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_BA_Controller: - DAC960_BA_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_LP_Controller: - DAC960_LP_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_LA_Controller: - DAC960_LA_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_PG_Controller: - DAC960_PG_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_PD_Controller: - DAC960_PD_DisableInterrupts(Controller->BaseAddress); - break; - case DAC960_P_Controller: - DAC960_PD_DisableInterrupts(Controller->BaseAddress); - break; - } - iounmap(Controller->MemoryMappedAddress); - } - if (Controller->IRQ_Channel) - free_irq(Controller->IRQ_Channel, Controller); - if (Controller->IO_Address) - release_region(Controller->IO_Address, 0x80); - pci_disable_device(Controller->PCIDevice); - for (i = 0; (i < DAC960_MaxLogicalDrives) && Controller->disks[i]; i++) - put_disk(Controller->disks[i]); - DAC960_Controllers[Controller->ControllerNumber] = NULL; - kfree(Controller); -} - - -/* - DAC960_DetectController detects Mylex DAC960/AcceleRAID/eXtremeRAID - PCI RAID Controllers by interrogating the PCI Configuration Space for - Controller Type. -*/ - -static DAC960_Controller_T * -DAC960_DetectController(struct pci_dev *PCI_Device, - const struct pci_device_id *entry) -{ - struct DAC960_privdata *privdata = - (struct DAC960_privdata *)entry->driver_data; - irq_handler_t InterruptHandler = privdata->InterruptHandler; - unsigned int MemoryWindowSize = privdata->MemoryWindowSize; - DAC960_Controller_T *Controller = NULL; - unsigned char DeviceFunction = PCI_Device->devfn; - unsigned char ErrorStatus, Parameter0, Parameter1; - unsigned int IRQ_Channel; - void __iomem *BaseAddress; - int i; - - Controller = kzalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC); - if (Controller == NULL) { - DAC960_Error("Unable to allocate Controller structure for " - "Controller at\n", NULL); - return NULL; - } - Controller->ControllerNumber = DAC960_ControllerCount; - DAC960_Controllers[DAC960_ControllerCount++] = Controller; - Controller->Bus = PCI_Device->bus->number; - Controller->FirmwareType = privdata->FirmwareType; - Controller->HardwareType = privdata->HardwareType; - Controller->Device = DeviceFunction >> 3; - Controller->Function = DeviceFunction & 0x7; - Controller->PCIDevice = PCI_Device; - strcpy(Controller->FullModelName, "DAC960"); - - if (pci_enable_device(PCI_Device)) - goto Failure; - - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_BA_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_LP_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_LA_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_PG_Controller: - Controller->PCI_Address = pci_resource_start(PCI_Device, 0); - break; - case DAC960_PD_Controller: - Controller->IO_Address = pci_resource_start(PCI_Device, 0); - Controller->PCI_Address = pci_resource_start(PCI_Device, 1); - break; - case DAC960_P_Controller: - Controller->IO_Address = pci_resource_start(PCI_Device, 0); - Controller->PCI_Address = pci_resource_start(PCI_Device, 1); - break; - } - - pci_set_drvdata(PCI_Device, (void *)((long)Controller->ControllerNumber)); - for (i = 0; i < DAC960_MaxLogicalDrives; i++) { - Controller->disks[i] = alloc_disk(1<disks[i]) - goto Failure; - Controller->disks[i]->private_data = (void *)((long)i); - } - init_waitqueue_head(&Controller->CommandWaitQueue); - init_waitqueue_head(&Controller->HealthStatusWaitQueue); - spin_lock_init(&Controller->queue_lock); - DAC960_AnnounceDriver(Controller); - /* - Map the Controller Register Window. - */ - if (MemoryWindowSize < PAGE_SIZE) - MemoryWindowSize = PAGE_SIZE; - Controller->MemoryMappedAddress = - ioremap_nocache(Controller->PCI_Address & PAGE_MASK, MemoryWindowSize); - Controller->BaseAddress = - Controller->MemoryMappedAddress + (Controller->PCI_Address & ~PAGE_MASK); - if (Controller->MemoryMappedAddress == NULL) - { - DAC960_Error("Unable to map Controller Register Window for " - "Controller at\n", Controller); - goto Failure; - } - BaseAddress = Controller->BaseAddress; - switch (Controller->HardwareType) - { - case DAC960_GEM_Controller: - DAC960_GEM_DisableInterrupts(BaseAddress); - DAC960_GEM_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_GEM_InitializationInProgressP(BaseAddress)) - { - if (DAC960_GEM_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_GEM_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_GEM_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_BA_Controller: - DAC960_BA_DisableInterrupts(BaseAddress); - DAC960_BA_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_BA_InitializationInProgressP(BaseAddress)) - { - if (DAC960_BA_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_BA_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_BA_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_LP_Controller: - DAC960_LP_DisableInterrupts(BaseAddress); - DAC960_LP_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_LP_InitializationInProgressP(BaseAddress)) - { - if (DAC960_LP_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V2_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_LP_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_LP_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V2_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V2_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V2_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V2_QueueReadWriteCommand; - break; - case DAC960_LA_Controller: - DAC960_LA_DisableInterrupts(BaseAddress); - DAC960_LA_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_LA_InitializationInProgressP(BaseAddress)) - { - if (DAC960_LA_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_LA_EnableInterrupts(BaseAddress); - if (Controller->V1.DualModeMemoryMailboxInterface) - Controller->QueueCommand = DAC960_LA_QueueCommandDualMode; - else Controller->QueueCommand = DAC960_LA_QueueCommandSingleMode; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_PG_Controller: - DAC960_PG_DisableInterrupts(BaseAddress); - DAC960_PG_AcknowledgeHardwareMailboxStatus(BaseAddress); - udelay(1000); - while (DAC960_PG_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PG_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to Enable Memory Mailbox Interface " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PG_EnableInterrupts(BaseAddress); - if (Controller->V1.DualModeMemoryMailboxInterface) - Controller->QueueCommand = DAC960_PG_QueueCommandDualMode; - else Controller->QueueCommand = DAC960_PG_QueueCommandSingleMode; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_PD_Controller: - if (!request_region(Controller->IO_Address, 0x80, - Controller->FullModelName)) { - DAC960_Error("IO port 0x%lx busy for Controller at\n", - Controller, Controller->IO_Address); - goto Failure; - } - DAC960_PD_DisableInterrupts(BaseAddress); - DAC960_PD_AcknowledgeStatus(BaseAddress); - udelay(1000); - while (DAC960_PD_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to allocate DMA mapped memory " - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PD_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_PD_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - case DAC960_P_Controller: - if (!request_region(Controller->IO_Address, 0x80, - Controller->FullModelName)){ - DAC960_Error("IO port 0x%lx busy for Controller at\n", - Controller, Controller->IO_Address); - goto Failure; - } - DAC960_PD_DisableInterrupts(BaseAddress); - DAC960_PD_AcknowledgeStatus(BaseAddress); - udelay(1000); - while (DAC960_PD_InitializationInProgressP(BaseAddress)) - { - if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus, - &Parameter0, &Parameter1) && - DAC960_ReportErrorStatus(Controller, ErrorStatus, - Parameter0, Parameter1)) - goto Failure; - udelay(10); - } - if (!DAC960_V1_EnableMemoryMailboxInterface(Controller)) - { - DAC960_Error("Unable to allocate DMA mapped memory" - "for Controller at\n", Controller); - goto Failure; - } - DAC960_PD_EnableInterrupts(BaseAddress); - Controller->QueueCommand = DAC960_P_QueueCommand; - Controller->ReadControllerConfiguration = - DAC960_V1_ReadControllerConfiguration; - Controller->ReadDeviceConfiguration = - DAC960_V1_ReadDeviceConfiguration; - Controller->ReportDeviceConfiguration = - DAC960_V1_ReportDeviceConfiguration; - Controller->QueueReadWriteCommand = - DAC960_V1_QueueReadWriteCommand; - break; - } - /* - Acquire shared access to the IRQ Channel. - */ - IRQ_Channel = PCI_Device->irq; - if (request_irq(IRQ_Channel, InterruptHandler, IRQF_SHARED, - Controller->FullModelName, Controller) < 0) - { - DAC960_Error("Unable to acquire IRQ Channel %d for Controller at\n", - Controller, Controller->IRQ_Channel); - goto Failure; - } - Controller->IRQ_Channel = IRQ_Channel; - Controller->InitialCommand.CommandIdentifier = 1; - Controller->InitialCommand.Controller = Controller; - Controller->Commands[0] = &Controller->InitialCommand; - Controller->FreeCommands = &Controller->InitialCommand; - return Controller; - -Failure: - if (Controller->IO_Address == 0) - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A " - "PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->PCI_Address); - else - DAC960_Error("PCI Bus %d Device %d Function %d I/O Address " - "0x%X PCI Address 0x%X\n", Controller, - Controller->Bus, Controller->Device, - Controller->Function, Controller->IO_Address, - Controller->PCI_Address); - DAC960_DetectCleanup(Controller); - DAC960_ControllerCount--; - return NULL; -} - -/* - DAC960_InitializeController initializes Controller. -*/ - -static bool -DAC960_InitializeController(DAC960_Controller_T *Controller) -{ - if (DAC960_ReadControllerConfiguration(Controller) && - DAC960_ReportControllerConfiguration(Controller) && - DAC960_CreateAuxiliaryStructures(Controller) && - DAC960_ReadDeviceConfiguration(Controller) && - DAC960_ReportDeviceConfiguration(Controller) && - DAC960_RegisterBlockDevice(Controller)) - { - /* - Initialize the Monitoring Timer. - */ - timer_setup(&Controller->MonitoringTimer, - DAC960_MonitoringTimerFunction, 0); - Controller->MonitoringTimer.expires = - jiffies + DAC960_MonitoringTimerInterval; - add_timer(&Controller->MonitoringTimer); - Controller->ControllerInitialized = true; - return true; - } - return false; -} - - -/* - DAC960_FinalizeController finalizes Controller. -*/ - -static void DAC960_FinalizeController(DAC960_Controller_T *Controller) -{ - if (Controller->ControllerInitialized) - { - unsigned long flags; - - /* - * Acquiring and releasing lock here eliminates - * a very low probability race. - * - * The code below allocates controller command structures - * from the free list without holding the controller lock. - * This is safe assuming there is no other activity on - * the controller at the time. - * - * But, there might be a monitoring command still - * in progress. Setting the Shutdown flag while holding - * the lock ensures that there is no monitoring command - * in the interrupt handler currently, and any monitoring - * commands that complete from this time on will NOT return - * their command structure to the free list. - */ - - spin_lock_irqsave(&Controller->queue_lock, flags); - Controller->ShutdownMonitoringTimer = 1; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - - del_timer_sync(&Controller->MonitoringTimer); - if (Controller->FirmwareType == DAC960_V1_Controller) - { - DAC960_Notice("Flushing Cache...", Controller); - DAC960_V1_ExecuteType3(Controller, DAC960_V1_Flush, 0); - DAC960_Notice("done\n", Controller); - - if (Controller->HardwareType == DAC960_PD_Controller) - release_region(Controller->IO_Address, 0x80); - } - else - { - DAC960_Notice("Flushing Cache...", Controller); - DAC960_V2_DeviceOperation(Controller, DAC960_V2_PauseDevice, - DAC960_V2_RAID_Controller); - DAC960_Notice("done\n", Controller); - } - } - DAC960_UnregisterBlockDevice(Controller); - DAC960_DestroyAuxiliaryStructures(Controller); - DAC960_DestroyProcEntries(Controller); - DAC960_DetectCleanup(Controller); -} - - -/* - DAC960_Probe verifies controller's existence and - initializes the DAC960 Driver for that controller. -*/ - -static int -DAC960_Probe(struct pci_dev *dev, const struct pci_device_id *entry) -{ - int disk; - DAC960_Controller_T *Controller; - - if (DAC960_ControllerCount == DAC960_MaxControllers) - { - DAC960_Error("More than %d DAC960 Controllers detected - " - "ignoring from Controller at\n", - NULL, DAC960_MaxControllers); - return -ENODEV; - } - - Controller = DAC960_DetectController(dev, entry); - if (!Controller) - return -ENODEV; - - if (!DAC960_InitializeController(Controller)) { - DAC960_FinalizeController(Controller); - return -ENODEV; - } - - for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) { - set_capacity(Controller->disks[disk], disk_size(Controller, disk)); - add_disk(Controller->disks[disk]); - } - DAC960_CreateProcEntries(Controller); - return 0; -} - - -/* - DAC960_Finalize finalizes the DAC960 Driver. -*/ - -static void DAC960_Remove(struct pci_dev *PCI_Device) -{ - int Controller_Number = (long)pci_get_drvdata(PCI_Device); - DAC960_Controller_T *Controller = DAC960_Controllers[Controller_Number]; - if (Controller != NULL) - DAC960_FinalizeController(Controller); -} - - -/* - DAC960_V1_QueueReadWriteCommand prepares and queues a Read/Write Command for - DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_ScatterGatherSegment_T *ScatterGatherList = - Command->V1.ScatterGatherList; - struct scatterlist *ScatterList = Command->V1.ScatterList; - - DAC960_V1_ClearCommand(Command); - - if (Command->SegmentCount == 1) - { - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - CommandMailbox->Type5.CommandOpcode = DAC960_V1_Read; - else - CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; - - CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; - CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber; - CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; - CommandMailbox->Type5.BusAddress = - (DAC960_BusAddress32_T)sg_dma_address(ScatterList); - } - else - { - int i; - - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - CommandMailbox->Type5.CommandOpcode = DAC960_V1_ReadWithScatterGather; - else - CommandMailbox->Type5.CommandOpcode = DAC960_V1_WriteWithScatterGather; - - CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; - CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber; - CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; - CommandMailbox->Type5.BusAddress = Command->V1.ScatterGatherListDMA; - - CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; - - for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) { - ScatterGatherList->SegmentDataPointer = - (DAC960_BusAddress32_T)sg_dma_address(ScatterList); - ScatterGatherList->SegmentByteCount = - (DAC960_ByteCount32_T)sg_dma_len(ScatterList); - } - } - DAC960_QueueCommand(Command); -} - - -/* - DAC960_V2_QueueReadWriteCommand prepares and queues a Read/Write Command for - DAC960 V2 Firmware Controllers. -*/ - -static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - struct scatterlist *ScatterList = Command->V2.ScatterList; - - DAC960_V2_ClearCommand(Command); - - CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10; - CommandMailbox->SCSI_10.CommandControlBits.DataTransferControllerToHost = - (Command->DmaDirection == PCI_DMA_FROMDEVICE); - CommandMailbox->SCSI_10.DataTransferSize = - Command->BlockCount << DAC960_BlockSizeBits; - CommandMailbox->SCSI_10.RequestSenseBusAddress = Command->V2.RequestSenseDMA; - CommandMailbox->SCSI_10.PhysicalDevice = - Controller->V2.LogicalDriveToVirtualDevice[Command->LogicalDriveNumber]; - CommandMailbox->SCSI_10.RequestSenseSize = sizeof(DAC960_SCSI_RequestSense_T); - CommandMailbox->SCSI_10.CDBLength = 10; - CommandMailbox->SCSI_10.SCSI_CDB[0] = - (Command->DmaDirection == PCI_DMA_FROMDEVICE ? 0x28 : 0x2A); - CommandMailbox->SCSI_10.SCSI_CDB[2] = Command->BlockNumber >> 24; - CommandMailbox->SCSI_10.SCSI_CDB[3] = Command->BlockNumber >> 16; - CommandMailbox->SCSI_10.SCSI_CDB[4] = Command->BlockNumber >> 8; - CommandMailbox->SCSI_10.SCSI_CDB[5] = Command->BlockNumber; - CommandMailbox->SCSI_10.SCSI_CDB[7] = Command->BlockCount >> 8; - CommandMailbox->SCSI_10.SCSI_CDB[8] = Command->BlockCount; - - if (Command->SegmentCount == 1) - { - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - (DAC960_BusAddress64_T)sg_dma_address(ScatterList); - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->SCSI_10.DataTransferSize; - } - else - { - DAC960_V2_ScatterGatherSegment_T *ScatterGatherList; - int i; - - if (Command->SegmentCount > 2) - { - ScatterGatherList = Command->V2.ScatterGatherList; - CommandMailbox->SCSI_10.CommandControlBits - .AdditionalScatterGatherListMemory = true; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ExtendedScatterGather.ScatterGatherList0Length = Command->SegmentCount; - CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ExtendedScatterGather.ScatterGatherList0Address = - Command->V2.ScatterGatherListDMA; - } - else - ScatterGatherList = CommandMailbox->SCSI_10.DataTransferMemoryAddress - .ScatterGatherSegments; - - for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) { - ScatterGatherList->SegmentDataPointer = - (DAC960_BusAddress64_T)sg_dma_address(ScatterList); - ScatterGatherList->SegmentByteCount = - (DAC960_ByteCount64_T)sg_dma_len(ScatterList); - } - } - DAC960_QueueCommand(Command); -} - - -static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_queue *req_q) -{ - struct request *Request; - DAC960_Command_T *Command; - - while(1) { - Request = blk_peek_request(req_q); - if (!Request) - return 1; - - Command = DAC960_AllocateCommand(Controller); - if (Command == NULL) - return 0; - - if (rq_data_dir(Request) == READ) { - Command->DmaDirection = PCI_DMA_FROMDEVICE; - Command->CommandType = DAC960_ReadCommand; - } else { - Command->DmaDirection = PCI_DMA_TODEVICE; - Command->CommandType = DAC960_WriteCommand; - } - Command->Completion = Request->end_io_data; - Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; - Command->BlockNumber = blk_rq_pos(Request); - Command->BlockCount = blk_rq_sectors(Request); - Command->Request = Request; - blk_start_request(Request); - Command->SegmentCount = blk_rq_map_sg(req_q, - Command->Request, Command->cmd_sglist); - /* pci_map_sg MAY change the value of SegCount */ - Command->SegmentCount = pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, - Command->SegmentCount, Command->DmaDirection); - - DAC960_QueueReadWriteCommand(Command); - } -} - -/* - DAC960_ProcessRequest attempts to remove one I/O Request from Controller's - I/O Request Queue and queues it to the Controller. WaitForCommand is true if - this function should wait for a Command to become available if necessary. - This function returns true if an I/O Request was queued and false otherwise. -*/ -static void DAC960_ProcessRequest(DAC960_Controller_T *controller) -{ - int i; - - if (!controller->ControllerInitialized) - return; - - /* Do this better later! */ - for (i = controller->req_q_index; i < DAC960_MaxLogicalDrives; i++) { - struct request_queue *req_q = controller->RequestQueue[i]; - - if (req_q == NULL) - continue; - - if (!DAC960_process_queue(controller, req_q)) { - controller->req_q_index = i; - return; - } - } - - if (controller->req_q_index == 0) - return; - - for (i = 0; i < controller->req_q_index; i++) { - struct request_queue *req_q = controller->RequestQueue[i]; - - if (req_q == NULL) - continue; - - if (!DAC960_process_queue(controller, req_q)) { - controller->req_q_index = i; - return; - } - } -} - - -/* - DAC960_queue_partial_rw extracts one bio from the request already - associated with argument command, and construct a new command block to retry I/O - only on that bio. Queue that command to the controller. - - This function re-uses a previously-allocated Command, - there is no failure mode from trying to allocate a command. -*/ - -static void DAC960_queue_partial_rw(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - struct request *Request = Command->Request; - struct request_queue *req_q = Controller->RequestQueue[Command->LogicalDriveNumber]; - - if (Command->DmaDirection == PCI_DMA_FROMDEVICE) - Command->CommandType = DAC960_ReadRetryCommand; - else - Command->CommandType = DAC960_WriteRetryCommand; - - /* - * We could be more efficient with these mapping requests - * and map only the portions that we need. But since this - * code should almost never be called, just go with a - * simple coding. - */ - (void)blk_rq_map_sg(req_q, Command->Request, Command->cmd_sglist); - - (void)pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, 1, Command->DmaDirection); - /* - * Resubmitting the request sector at a time is really tedious. - * But, this should almost never happen. So, we're willing to pay - * this price so that in the end, as much of the transfer is completed - * successfully as possible. - */ - Command->SegmentCount = 1; - Command->BlockNumber = blk_rq_pos(Request); - Command->BlockCount = 1; - DAC960_QueueReadWriteCommand(Command); - return; -} - -/* - DAC960_RequestFunction is the I/O Request Function for DAC960 Controllers. -*/ - -static void DAC960_RequestFunction(struct request_queue *RequestQueue) -{ - DAC960_ProcessRequest(RequestQueue->queuedata); -} - -/* - DAC960_ProcessCompletedBuffer performs completion processing for an - individual Buffer. -*/ - -static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command, - bool SuccessfulIO) -{ - struct request *Request = Command->Request; - blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR; - - pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist, - Command->SegmentCount, Command->DmaDirection); - - if (!__blk_end_request(Request, Error, Command->BlockCount << 9)) { - if (Command->Completion) { - complete(Command->Completion); - Command->Completion = NULL; - } - return true; - } - return false; -} - -/* - DAC960_V1_ReadWriteError prints an appropriate error message for Command - when an error occurs on a Read or Write operation. -*/ - -static void DAC960_V1_ReadWriteError(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - unsigned char *CommandName = "UNKNOWN"; - switch (Command->CommandType) - { - case DAC960_ReadCommand: - case DAC960_ReadRetryCommand: - CommandName = "READ"; - break; - case DAC960_WriteCommand: - case DAC960_WriteRetryCommand: - CommandName = "WRITE"; - break; - case DAC960_MonitoringCommand: - case DAC960_ImmediateCommand: - case DAC960_QueuedCommand: - break; - } - switch (Command->V1.CommandStatus) - { - case DAC960_V1_IrrecoverableDataError: - DAC960_Error("Irrecoverable Data Error on %s:\n", - Controller, CommandName); - break; - case DAC960_V1_LogicalDriveNonexistentOrOffline: - DAC960_Error("Logical Drive Nonexistent or Offline on %s:\n", - Controller, CommandName); - break; - case DAC960_V1_AccessBeyondEndOfLogicalDrive: - DAC960_Error("Attempt to Access Beyond End of Logical Drive " - "on %s:\n", Controller, CommandName); - break; - case DAC960_V1_BadDataEncountered: - DAC960_Error("Bad Data Encountered on %s:\n", Controller, CommandName); - break; - default: - DAC960_Error("Unexpected Error Status %04X on %s:\n", - Controller, Command->V1.CommandStatus, CommandName); - break; - } - DAC960_Error(" /dev/rd/c%dd%d: absolute blocks %u..%u\n", - Controller, Controller->ControllerNumber, - Command->LogicalDriveNumber, Command->BlockNumber, - Command->BlockNumber + Command->BlockCount - 1); -} - - -/* - DAC960_V1_ProcessCompletedCommand performs completion processing for Command - for DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_CommandType_T CommandType = Command->CommandType; - DAC960_V1_CommandOpcode_T CommandOpcode = - Command->V1.CommandMailbox.Common.CommandOpcode; - DAC960_V1_CommandStatus_T CommandStatus = Command->V1.CommandStatus; - - if (CommandType == DAC960_ReadCommand || - CommandType == DAC960_WriteCommand) - { - -#ifdef FORCE_RETRY_DEBUG - CommandStatus = DAC960_V1_IrrecoverableDataError; -#endif - - if (CommandStatus == DAC960_V1_NormalCompletion) { - - if (!DAC960_ProcessCompletedRequest(Command, true)) - BUG(); - - } else if (CommandStatus == DAC960_V1_IrrecoverableDataError || - CommandStatus == DAC960_V1_BadDataEncountered) - { - /* - * break the command down into pieces and resubmit each - * piece, hoping that some of them will succeed. - */ - DAC960_queue_partial_rw(Command); - return; - } - else - { - if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) - DAC960_V1_ReadWriteError(Command); - - if (!DAC960_ProcessCompletedRequest(Command, false)) - BUG(); - } - } - else if (CommandType == DAC960_ReadRetryCommand || - CommandType == DAC960_WriteRetryCommand) - { - bool normal_completion; -#ifdef FORCE_RETRY_FAILURE_DEBUG - static int retry_count = 1; -#endif - /* - Perform completion processing for the portion that was - retried, and submit the next portion, if any. - */ - normal_completion = true; - if (CommandStatus != DAC960_V1_NormalCompletion) { - normal_completion = false; - if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline) - DAC960_V1_ReadWriteError(Command); - } - -#ifdef FORCE_RETRY_FAILURE_DEBUG - if (!(++retry_count % 10000)) { - printk("V1 error retry failure test\n"); - normal_completion = false; - DAC960_V1_ReadWriteError(Command); - } -#endif - - if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) { - DAC960_queue_partial_rw(Command); - return; - } - } - - else if (CommandType == DAC960_MonitoringCommand) - { - if (Controller->ShutdownMonitoringTimer) - return; - if (CommandOpcode == DAC960_V1_Enquiry) - { - DAC960_V1_Enquiry_T *OldEnquiry = &Controller->V1.Enquiry; - DAC960_V1_Enquiry_T *NewEnquiry = Controller->V1.NewEnquiry; - unsigned int OldCriticalLogicalDriveCount = - OldEnquiry->CriticalLogicalDriveCount; - unsigned int NewCriticalLogicalDriveCount = - NewEnquiry->CriticalLogicalDriveCount; - if (NewEnquiry->NumberOfLogicalDrives > Controller->LogicalDriveCount) - { - int LogicalDriveNumber = Controller->LogicalDriveCount - 1; - while (++LogicalDriveNumber < NewEnquiry->NumberOfLogicalDrives) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "Now Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives; - DAC960_ComputeGenericDiskInfo(Controller); - } - if (NewEnquiry->NumberOfLogicalDrives < Controller->LogicalDriveCount) - { - int LogicalDriveNumber = NewEnquiry->NumberOfLogicalDrives - 1; - while (++LogicalDriveNumber < Controller->LogicalDriveCount) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "No Longer Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives; - DAC960_ComputeGenericDiskInfo(Controller); - } - if (NewEnquiry->StatusFlags.DeferredWriteError != - OldEnquiry->StatusFlags.DeferredWriteError) - DAC960_Critical("Deferred Write Error Flag is now %s\n", Controller, - (NewEnquiry->StatusFlags.DeferredWriteError - ? "TRUE" : "FALSE")); - if ((NewCriticalLogicalDriveCount > 0 || - NewCriticalLogicalDriveCount != OldCriticalLogicalDriveCount) || - (NewEnquiry->OfflineLogicalDriveCount > 0 || - NewEnquiry->OfflineLogicalDriveCount != - OldEnquiry->OfflineLogicalDriveCount) || - (NewEnquiry->DeadDriveCount > 0 || - NewEnquiry->DeadDriveCount != - OldEnquiry->DeadDriveCount) || - (NewEnquiry->EventLogSequenceNumber != - OldEnquiry->EventLogSequenceNumber) || - Controller->MonitoringTimerCount == 0 || - time_after_eq(jiffies, Controller->SecondaryMonitoringTime - + DAC960_SecondaryMonitoringInterval)) - { - Controller->V1.NeedLogicalDriveInformation = true; - Controller->V1.NewEventLogSequenceNumber = - NewEnquiry->EventLogSequenceNumber; - Controller->V1.NeedErrorTableInformation = true; - Controller->V1.NeedDeviceStateInformation = true; - Controller->V1.StartDeviceStateScan = true; - Controller->V1.NeedBackgroundInitializationStatus = - Controller->V1.BackgroundInitializationStatusSupported; - Controller->SecondaryMonitoringTime = jiffies; - } - if (NewEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress || - NewEnquiry->RebuildFlag - == DAC960_V1_BackgroundRebuildInProgress || - OldEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress || - OldEnquiry->RebuildFlag == DAC960_V1_BackgroundRebuildInProgress) - { - Controller->V1.NeedRebuildProgress = true; - Controller->V1.RebuildProgressFirst = - (NewEnquiry->CriticalLogicalDriveCount < - OldEnquiry->CriticalLogicalDriveCount); - } - if (OldEnquiry->RebuildFlag == DAC960_V1_BackgroundCheckInProgress) - switch (NewEnquiry->RebuildFlag) - { - case DAC960_V1_NoStandbyRebuildOrCheckInProgress: - DAC960_Progress("Consistency Check Completed Successfully\n", - Controller); - break; - case DAC960_V1_StandbyRebuildInProgress: - case DAC960_V1_BackgroundRebuildInProgress: - break; - case DAC960_V1_BackgroundCheckInProgress: - Controller->V1.NeedConsistencyCheckProgress = true; - break; - case DAC960_V1_StandbyRebuildCompletedWithError: - DAC960_Progress("Consistency Check Completed with Error\n", - Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed: - DAC960_Progress("Consistency Check Failed - " - "Physical Device Failed\n", Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed: - DAC960_Progress("Consistency Check Failed - " - "Logical Drive Failed\n", Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses: - DAC960_Progress("Consistency Check Failed - Other Causes\n", - Controller); - break; - case DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated: - DAC960_Progress("Consistency Check Successfully Terminated\n", - Controller); - break; - } - else if (NewEnquiry->RebuildFlag - == DAC960_V1_BackgroundCheckInProgress) - Controller->V1.NeedConsistencyCheckProgress = true; - Controller->MonitoringAlertMode = - (NewEnquiry->CriticalLogicalDriveCount > 0 || - NewEnquiry->OfflineLogicalDriveCount > 0 || - NewEnquiry->DeadDriveCount > 0); - if (NewEnquiry->RebuildFlag > DAC960_V1_BackgroundCheckInProgress) - { - Controller->V1.PendingRebuildFlag = NewEnquiry->RebuildFlag; - Controller->V1.RebuildFlagPending = true; - } - memcpy(&Controller->V1.Enquiry, &Controller->V1.NewEnquiry, - sizeof(DAC960_V1_Enquiry_T)); - } - else if (CommandOpcode == DAC960_V1_PerformEventLogOperation) - { - static char - *DAC960_EventMessages[] = - { "killed because write recovery failed", - "killed because of SCSI bus reset failure", - "killed because of double check condition", - "killed because it was removed", - "killed because of gross error on SCSI chip", - "killed because of bad tag returned from drive", - "killed because of timeout on SCSI command", - "killed because of reset SCSI command issued from system", - "killed because busy or parity error count exceeded limit", - "killed because of 'kill drive' command from system", - "killed because of selection timeout", - "killed due to SCSI phase sequence error", - "killed due to unknown status" }; - DAC960_V1_EventLogEntry_T *EventLogEntry = - Controller->V1.EventLogEntry; - if (EventLogEntry->SequenceNumber == - Controller->V1.OldEventLogSequenceNumber) - { - unsigned char SenseKey = EventLogEntry->SenseKey; - unsigned char AdditionalSenseCode = - EventLogEntry->AdditionalSenseCode; - unsigned char AdditionalSenseCodeQualifier = - EventLogEntry->AdditionalSenseCodeQualifier; - if (SenseKey == DAC960_SenseKey_VendorSpecific && - AdditionalSenseCode == 0x80 && - AdditionalSenseCodeQualifier < - ARRAY_SIZE(DAC960_EventMessages)) - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - DAC960_EventMessages[ - AdditionalSenseCodeQualifier]); - else if (SenseKey == DAC960_SenseKey_UnitAttention && - AdditionalSenseCode == 0x29) - { - if (Controller->MonitoringTimerCount > 0) - Controller->V1.DeviceResetCount[EventLogEntry->Channel] - [EventLogEntry->TargetID]++; - } - else if (!(SenseKey == DAC960_SenseKey_NoSense || - (SenseKey == DAC960_SenseKey_NotReady && - AdditionalSenseCode == 0x04 && - (AdditionalSenseCodeQualifier == 0x01 || - AdditionalSenseCodeQualifier == 0x02)))) - { - DAC960_Critical("Physical Device %d:%d Error Log: " - "Sense Key = %X, ASC = %02X, ASCQ = %02X\n", - Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - SenseKey, - AdditionalSenseCode, - AdditionalSenseCodeQualifier); - DAC960_Critical("Physical Device %d:%d Error Log: " - "Information = %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - Controller, - EventLogEntry->Channel, - EventLogEntry->TargetID, - EventLogEntry->Information[0], - EventLogEntry->Information[1], - EventLogEntry->Information[2], - EventLogEntry->Information[3], - EventLogEntry->CommandSpecificInformation[0], - EventLogEntry->CommandSpecificInformation[1], - EventLogEntry->CommandSpecificInformation[2], - EventLogEntry->CommandSpecificInformation[3]); - } - } - Controller->V1.OldEventLogSequenceNumber++; - } - else if (CommandOpcode == DAC960_V1_GetErrorTable) - { - DAC960_V1_ErrorTable_T *OldErrorTable = &Controller->V1.ErrorTable; - DAC960_V1_ErrorTable_T *NewErrorTable = Controller->V1.NewErrorTable; - int Channel, TargetID; - for (Channel = 0; Channel < Controller->Channels; Channel++) - for (TargetID = 0; TargetID < Controller->Targets; TargetID++) - { - DAC960_V1_ErrorTableEntry_T *NewErrorEntry = - &NewErrorTable->ErrorTableEntries[Channel][TargetID]; - DAC960_V1_ErrorTableEntry_T *OldErrorEntry = - &OldErrorTable->ErrorTableEntries[Channel][TargetID]; - if ((NewErrorEntry->ParityErrorCount != - OldErrorEntry->ParityErrorCount) || - (NewErrorEntry->SoftErrorCount != - OldErrorEntry->SoftErrorCount) || - (NewErrorEntry->HardErrorCount != - OldErrorEntry->HardErrorCount) || - (NewErrorEntry->MiscErrorCount != - OldErrorEntry->MiscErrorCount)) - DAC960_Critical("Physical Device %d:%d Errors: " - "Parity = %d, Soft = %d, " - "Hard = %d, Misc = %d\n", - Controller, Channel, TargetID, - NewErrorEntry->ParityErrorCount, - NewErrorEntry->SoftErrorCount, - NewErrorEntry->HardErrorCount, - NewErrorEntry->MiscErrorCount); - } - memcpy(&Controller->V1.ErrorTable, Controller->V1.NewErrorTable, - sizeof(DAC960_V1_ErrorTable_T)); - } - else if (CommandOpcode == DAC960_V1_GetDeviceState) - { - DAC960_V1_DeviceState_T *OldDeviceState = - &Controller->V1.DeviceState[Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - DAC960_V1_DeviceState_T *NewDeviceState = - Controller->V1.NewDeviceState; - if (NewDeviceState->DeviceState != OldDeviceState->DeviceState) - DAC960_Critical("Physical Device %d:%d is now %s\n", Controller, - Controller->V1.DeviceStateChannel, - Controller->V1.DeviceStateTargetID, - (NewDeviceState->DeviceState - == DAC960_V1_Device_Dead - ? "DEAD" - : NewDeviceState->DeviceState - == DAC960_V1_Device_WriteOnly - ? "WRITE-ONLY" - : NewDeviceState->DeviceState - == DAC960_V1_Device_Online - ? "ONLINE" : "STANDBY")); - if (OldDeviceState->DeviceState == DAC960_V1_Device_Dead && - NewDeviceState->DeviceState != DAC960_V1_Device_Dead) - { - Controller->V1.NeedDeviceInquiryInformation = true; - Controller->V1.NeedDeviceSerialNumberInformation = true; - Controller->V1.DeviceResetCount - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID] = 0; - } - memcpy(OldDeviceState, NewDeviceState, - sizeof(DAC960_V1_DeviceState_T)); - } - else if (CommandOpcode == DAC960_V1_GetLogicalDriveInformation) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < Controller->LogicalDriveCount; - LogicalDriveNumber++) - { - DAC960_V1_LogicalDriveInformation_T *OldLogicalDriveInformation = - &Controller->V1.LogicalDriveInformation[LogicalDriveNumber]; - DAC960_V1_LogicalDriveInformation_T *NewLogicalDriveInformation = - &(*Controller->V1.NewLogicalDriveInformation)[LogicalDriveNumber]; - if (NewLogicalDriveInformation->LogicalDriveState != - OldLogicalDriveInformation->LogicalDriveState) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (NewLogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Online - ? "ONLINE" - : NewLogicalDriveInformation->LogicalDriveState - == DAC960_V1_LogicalDrive_Critical - ? "CRITICAL" : "OFFLINE")); - if (NewLogicalDriveInformation->WriteBack != - OldLogicalDriveInformation->WriteBack) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (NewLogicalDriveInformation->WriteBack - ? "WRITE BACK" : "WRITE THRU")); - } - memcpy(&Controller->V1.LogicalDriveInformation, - Controller->V1.NewLogicalDriveInformation, - sizeof(DAC960_V1_LogicalDriveInformationArray_T)); - } - else if (CommandOpcode == DAC960_V1_GetRebuildProgress) - { - unsigned int LogicalDriveNumber = - Controller->V1.RebuildProgress->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.RebuildProgress->LogicalDriveSize; - unsigned int BlocksCompleted = - LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks; - if (CommandStatus == DAC960_V1_NoRebuildOrCheckInProgress && - Controller->V1.LastRebuildStatus == DAC960_V1_NormalCompletion) - CommandStatus = DAC960_V1_RebuildSuccessful; - switch (CommandStatus) - { - case DAC960_V1_NormalCompletion: - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Rebuild in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - break; - case DAC960_V1_RebuildFailed_LogicalDriveFailure: - DAC960_Progress("Rebuild Failed due to " - "Logical Drive Failure\n", Controller); - break; - case DAC960_V1_RebuildFailed_BadBlocksOnOther: - DAC960_Progress("Rebuild Failed due to " - "Bad Blocks on Other Drives\n", Controller); - break; - case DAC960_V1_RebuildFailed_NewDriveFailed: - DAC960_Progress("Rebuild Failed due to " - "Failure of Drive Being Rebuilt\n", Controller); - break; - case DAC960_V1_NoRebuildOrCheckInProgress: - break; - case DAC960_V1_RebuildSuccessful: - DAC960_Progress("Rebuild Completed Successfully\n", Controller); - break; - case DAC960_V1_RebuildSuccessfullyTerminated: - DAC960_Progress("Rebuild Successfully Terminated\n", Controller); - break; - } - Controller->V1.LastRebuildStatus = CommandStatus; - if (CommandType != DAC960_MonitoringCommand && - Controller->V1.RebuildStatusPending) - { - Command->V1.CommandStatus = Controller->V1.PendingRebuildStatus; - Controller->V1.RebuildStatusPending = false; - } - else if (CommandType == DAC960_MonitoringCommand && - CommandStatus != DAC960_V1_NormalCompletion && - CommandStatus != DAC960_V1_NoRebuildOrCheckInProgress) - { - Controller->V1.PendingRebuildStatus = CommandStatus; - Controller->V1.RebuildStatusPending = true; - } - } - else if (CommandOpcode == DAC960_V1_RebuildStat) - { - unsigned int LogicalDriveNumber = - Controller->V1.RebuildProgress->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.RebuildProgress->LogicalDriveSize; - unsigned int BlocksCompleted = - LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks; - if (CommandStatus == DAC960_V1_NormalCompletion) - { - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Consistency Check in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - } - } - else if (CommandOpcode == DAC960_V1_BackgroundInitializationControl) - { - unsigned int LogicalDriveNumber = - Controller->V1.BackgroundInitializationStatus->LogicalDriveNumber; - unsigned int LogicalDriveSize = - Controller->V1.BackgroundInitializationStatus->LogicalDriveSize; - unsigned int BlocksCompleted = - Controller->V1.BackgroundInitializationStatus->BlocksCompleted; - switch (CommandStatus) - { - case DAC960_V1_NormalCompletion: - switch (Controller->V1.BackgroundInitializationStatus->Status) - { - case DAC960_V1_BackgroundInitializationInvalid: - break; - case DAC960_V1_BackgroundInitializationStarted: - DAC960_Progress("Background Initialization Started\n", - Controller); - break; - case DAC960_V1_BackgroundInitializationInProgress: - if (BlocksCompleted == - Controller->V1.LastBackgroundInitializationStatus. - BlocksCompleted && - LogicalDriveNumber == - Controller->V1.LastBackgroundInitializationStatus. - LogicalDriveNumber) - break; - Controller->EphemeralProgressMessage = true; - DAC960_Progress("Background Initialization in Progress: " - "Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (100 * (BlocksCompleted >> 7)) - / (LogicalDriveSize >> 7)); - Controller->EphemeralProgressMessage = false; - break; - case DAC960_V1_BackgroundInitializationSuspended: - DAC960_Progress("Background Initialization Suspended\n", - Controller); - break; - case DAC960_V1_BackgroundInitializationCancelled: - DAC960_Progress("Background Initialization Cancelled\n", - Controller); - break; - } - memcpy(&Controller->V1.LastBackgroundInitializationStatus, - Controller->V1.BackgroundInitializationStatus, - sizeof(DAC960_V1_BackgroundInitializationStatus_T)); - break; - case DAC960_V1_BackgroundInitSuccessful: - if (Controller->V1.BackgroundInitializationStatus->Status == - DAC960_V1_BackgroundInitializationInProgress) - DAC960_Progress("Background Initialization " - "Completed Successfully\n", Controller); - Controller->V1.BackgroundInitializationStatus->Status = - DAC960_V1_BackgroundInitializationInvalid; - break; - case DAC960_V1_BackgroundInitAborted: - if (Controller->V1.BackgroundInitializationStatus->Status == - DAC960_V1_BackgroundInitializationInProgress) - DAC960_Progress("Background Initialization Aborted\n", - Controller); - Controller->V1.BackgroundInitializationStatus->Status = - DAC960_V1_BackgroundInitializationInvalid; - break; - case DAC960_V1_NoBackgroundInitInProgress: - break; - } - } - else if (CommandOpcode == DAC960_V1_DCDB) - { - /* - This is a bit ugly. - - The InquiryStandardData and - the InquiryUntitSerialNumber information - retrieval operations BOTH use the DAC960_V1_DCDB - commands. the test above can't distinguish between - these two cases. - - Instead, we rely on the order of code later in this - function to ensure that DeviceInquiryInformation commands - are submitted before DeviceSerialNumber commands. - */ - if (Controller->V1.NeedDeviceInquiryInformation) - { - DAC960_SCSI_Inquiry_T *InquiryStandardData = - &Controller->V1.InquiryStandardData - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - if (CommandStatus != DAC960_V1_NormalCompletion) - { - memset(InquiryStandardData, 0, - sizeof(DAC960_SCSI_Inquiry_T)); - InquiryStandardData->PeripheralDeviceType = 0x1F; - } - else - memcpy(InquiryStandardData, - Controller->V1.NewInquiryStandardData, - sizeof(DAC960_SCSI_Inquiry_T)); - Controller->V1.NeedDeviceInquiryInformation = false; - } - else if (Controller->V1.NeedDeviceSerialNumberInformation) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - &Controller->V1.InquiryUnitSerialNumber - [Controller->V1.DeviceStateChannel] - [Controller->V1.DeviceStateTargetID]; - if (CommandStatus != DAC960_V1_NormalCompletion) - { - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } - else - memcpy(InquiryUnitSerialNumber, - Controller->V1.NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - Controller->V1.NeedDeviceSerialNumberInformation = false; - } - } - /* - Begin submitting new monitoring commands. - */ - if (Controller->V1.NewEventLogSequenceNumber - - Controller->V1.OldEventLogSequenceNumber > 0) - { - Command->V1.CommandMailbox.Type3E.CommandOpcode = - DAC960_V1_PerformEventLogOperation; - Command->V1.CommandMailbox.Type3E.OperationType = - DAC960_V1_GetEventLogEntry; - Command->V1.CommandMailbox.Type3E.OperationQualifier = 1; - Command->V1.CommandMailbox.Type3E.SequenceNumber = - Controller->V1.OldEventLogSequenceNumber; - Command->V1.CommandMailbox.Type3E.BusAddress = - Controller->V1.EventLogEntryDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedErrorTableInformation) - { - Controller->V1.NeedErrorTableInformation = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetErrorTable; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.NewErrorTableDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedRebuildProgress && - Controller->V1.RebuildProgressFirst) - { - Controller->V1.NeedRebuildProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetRebuildProgress; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedDeviceStateInformation) - { - if (Controller->V1.NeedDeviceInquiryInformation) - { - DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB; - dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA; - - dma_addr_t NewInquiryStandardDataDMA = - Controller->V1.NewInquiryStandardDataDMA; - - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA; - DCDB->Channel = Controller->V1.DeviceStateChannel; - DCDB->TargetID = Controller->V1.DeviceStateTargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->BusAddress = NewInquiryStandardDataDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 0; /* EVPD = 0 */ - DCDB->CDB[2] = 0; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T); - DCDB->CDB[5] = 0; /* Control */ - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedDeviceSerialNumberInformation) - { - DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB; - dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA; - dma_addr_t NewInquiryUnitSerialNumberDMA = - Controller->V1.NewInquiryUnitSerialNumberDMA; - - Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB; - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA; - DCDB->Channel = Controller->V1.DeviceStateChannel; - DCDB->TargetID = Controller->V1.DeviceStateTargetID; - DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem; - DCDB->EarlyStatus = false; - DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds; - DCDB->NoAutomaticRequestSense = false; - DCDB->DisconnectPermitted = true; - DCDB->TransferLength = - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->BusAddress = NewInquiryUnitSerialNumberDMA; - DCDB->CDBLength = 6; - DCDB->TransferLengthHigh4 = 0; - DCDB->SenseLength = sizeof(DCDB->SenseData); - DCDB->CDB[0] = 0x12; /* INQUIRY */ - DCDB->CDB[1] = 1; /* EVPD = 1 */ - DCDB->CDB[2] = 0x80; /* Page Code */ - DCDB->CDB[3] = 0; /* Reserved */ - DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T); - DCDB->CDB[5] = 0; /* Control */ - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.StartDeviceStateScan) - { - Controller->V1.DeviceStateChannel = 0; - Controller->V1.DeviceStateTargetID = 0; - Controller->V1.StartDeviceStateScan = false; - } - else if (++Controller->V1.DeviceStateTargetID == Controller->Targets) - { - Controller->V1.DeviceStateChannel++; - Controller->V1.DeviceStateTargetID = 0; - } - if (Controller->V1.DeviceStateChannel < Controller->Channels) - { - Controller->V1.NewDeviceState->DeviceState = - DAC960_V1_Device_Dead; - Command->V1.CommandMailbox.Type3D.CommandOpcode = - DAC960_V1_GetDeviceState; - Command->V1.CommandMailbox.Type3D.Channel = - Controller->V1.DeviceStateChannel; - Command->V1.CommandMailbox.Type3D.TargetID = - Controller->V1.DeviceStateTargetID; - Command->V1.CommandMailbox.Type3D.BusAddress = - Controller->V1.NewDeviceStateDMA; - DAC960_QueueCommand(Command); - return; - } - Controller->V1.NeedDeviceStateInformation = false; - } - if (Controller->V1.NeedLogicalDriveInformation) - { - Controller->V1.NeedLogicalDriveInformation = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetLogicalDriveInformation; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.NewLogicalDriveInformationDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedRebuildProgress) - { - Controller->V1.NeedRebuildProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_GetRebuildProgress; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedConsistencyCheckProgress) - { - Controller->V1.NeedConsistencyCheckProgress = false; - Command->V1.CommandMailbox.Type3.CommandOpcode = - DAC960_V1_RebuildStat; - Command->V1.CommandMailbox.Type3.BusAddress = - Controller->V1.RebuildProgressDMA; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V1.NeedBackgroundInitializationStatus) - { - Controller->V1.NeedBackgroundInitializationStatus = false; - Command->V1.CommandMailbox.Type3B.CommandOpcode = - DAC960_V1_BackgroundInitializationControl; - Command->V1.CommandMailbox.Type3B.CommandOpcode2 = 0x20; - Command->V1.CommandMailbox.Type3B.BusAddress = - Controller->V1.BackgroundInitializationStatusDMA; - DAC960_QueueCommand(Command); - return; - } - Controller->MonitoringTimerCount++; - Controller->MonitoringTimer.expires = - jiffies + DAC960_MonitoringTimerInterval; - add_timer(&Controller->MonitoringTimer); - } - if (CommandType == DAC960_ImmediateCommand) - { - complete(Command->Completion); - Command->Completion = NULL; - return; - } - if (CommandType == DAC960_QueuedCommand) - { - DAC960_V1_KernelCommand_T *KernelCommand = Command->V1.KernelCommand; - KernelCommand->CommandStatus = Command->V1.CommandStatus; - Command->V1.KernelCommand = NULL; - if (CommandOpcode == DAC960_V1_DCDB) - Controller->V1.DirectCommandActive[KernelCommand->DCDB->Channel] - [KernelCommand->DCDB->TargetID] = - false; - DAC960_DeallocateCommand(Command); - KernelCommand->CompletionFunction(KernelCommand); - return; - } - /* - Queue a Status Monitoring Command to the Controller using the just - completed Command if one was deferred previously due to lack of a - free Command when the Monitoring Timer Function was called. - */ - if (Controller->MonitoringCommandDeferred) - { - Controller->MonitoringCommandDeferred = false; - DAC960_V1_QueueMonitoringCommand(Command); - return; - } - /* - Deallocate the Command. - */ - DAC960_DeallocateCommand(Command); - /* - Wake up any processes waiting on a free Command. - */ - wake_up(&Controller->CommandWaitQueue); -} - - -/* - DAC960_V2_ReadWriteError prints an appropriate error message for Command - when an error occurs on a Read or Write operation. -*/ - -static void DAC960_V2_ReadWriteError(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - static const unsigned char *SenseErrors[] = { - "NO SENSE", "RECOVERED ERROR", - "NOT READY", "MEDIUM ERROR", - "HARDWARE ERROR", "ILLEGAL REQUEST", - "UNIT ATTENTION", "DATA PROTECT", - "BLANK CHECK", "VENDOR-SPECIFIC", - "COPY ABORTED", "ABORTED COMMAND", - "EQUAL", "VOLUME OVERFLOW", - "MISCOMPARE", "RESERVED" - }; - unsigned char *CommandName = "UNKNOWN"; - switch (Command->CommandType) - { - case DAC960_ReadCommand: - case DAC960_ReadRetryCommand: - CommandName = "READ"; - break; - case DAC960_WriteCommand: - case DAC960_WriteRetryCommand: - CommandName = "WRITE"; - break; - case DAC960_MonitoringCommand: - case DAC960_ImmediateCommand: - case DAC960_QueuedCommand: - break; - } - DAC960_Error("Error Condition %s on %s:\n", Controller, - SenseErrors[Command->V2.RequestSense->SenseKey], CommandName); - DAC960_Error(" /dev/rd/c%dd%d: absolute blocks %u..%u\n", - Controller, Controller->ControllerNumber, - Command->LogicalDriveNumber, Command->BlockNumber, - Command->BlockNumber + Command->BlockCount - 1); -} - - -/* - DAC960_V2_ReportEvent prints an appropriate message when a Controller Event - occurs. -*/ - -static void DAC960_V2_ReportEvent(DAC960_Controller_T *Controller, - DAC960_V2_Event_T *Event) -{ - DAC960_SCSI_RequestSense_T *RequestSense = - (DAC960_SCSI_RequestSense_T *) &Event->RequestSenseData; - unsigned char MessageBuffer[DAC960_LineBufferSize]; - static struct { int EventCode; unsigned char *EventMessage; } EventList[] = - { /* Physical Device Events (0x0000 - 0x007F) */ - { 0x0001, "P Online" }, - { 0x0002, "P Standby" }, - { 0x0005, "P Automatic Rebuild Started" }, - { 0x0006, "P Manual Rebuild Started" }, - { 0x0007, "P Rebuild Completed" }, - { 0x0008, "P Rebuild Cancelled" }, - { 0x0009, "P Rebuild Failed for Unknown Reasons" }, - { 0x000A, "P Rebuild Failed due to New Physical Device" }, - { 0x000B, "P Rebuild Failed due to Logical Drive Failure" }, - { 0x000C, "S Offline" }, - { 0x000D, "P Found" }, - { 0x000E, "P Removed" }, - { 0x000F, "P Unconfigured" }, - { 0x0010, "P Expand Capacity Started" }, - { 0x0011, "P Expand Capacity Completed" }, - { 0x0012, "P Expand Capacity Failed" }, - { 0x0013, "P Command Timed Out" }, - { 0x0014, "P Command Aborted" }, - { 0x0015, "P Command Retried" }, - { 0x0016, "P Parity Error" }, - { 0x0017, "P Soft Error" }, - { 0x0018, "P Miscellaneous Error" }, - { 0x0019, "P Reset" }, - { 0x001A, "P Active Spare Found" }, - { 0x001B, "P Warm Spare Found" }, - { 0x001C, "S Sense Data Received" }, - { 0x001D, "P Initialization Started" }, - { 0x001E, "P Initialization Completed" }, - { 0x001F, "P Initialization Failed" }, - { 0x0020, "P Initialization Cancelled" }, - { 0x0021, "P Failed because Write Recovery Failed" }, - { 0x0022, "P Failed because SCSI Bus Reset Failed" }, - { 0x0023, "P Failed because of Double Check Condition" }, - { 0x0024, "P Failed because Device Cannot Be Accessed" }, - { 0x0025, "P Failed because of Gross Error on SCSI Processor" }, - { 0x0026, "P Failed because of Bad Tag from Device" }, - { 0x0027, "P Failed because of Command Timeout" }, - { 0x0028, "P Failed because of System Reset" }, - { 0x0029, "P Failed because of Busy Status or Parity Error" }, - { 0x002A, "P Failed because Host Set Device to Failed State" }, - { 0x002B, "P Failed because of Selection Timeout" }, - { 0x002C, "P Failed because of SCSI Bus Phase Error" }, - { 0x002D, "P Failed because Device Returned Unknown Status" }, - { 0x002E, "P Failed because Device Not Ready" }, - { 0x002F, "P Failed because Device Not Found at Startup" }, - { 0x0030, "P Failed because COD Write Operation Failed" }, - { 0x0031, "P Failed because BDT Write Operation Failed" }, - { 0x0039, "P Missing at Startup" }, - { 0x003A, "P Start Rebuild Failed due to Physical Drive Too Small" }, - { 0x003C, "P Temporarily Offline Device Automatically Made Online" }, - { 0x003D, "P Standby Rebuild Started" }, - /* Logical Device Events (0x0080 - 0x00FF) */ - { 0x0080, "M Consistency Check Started" }, - { 0x0081, "M Consistency Check Completed" }, - { 0x0082, "M Consistency Check Cancelled" }, - { 0x0083, "M Consistency Check Completed With Errors" }, - { 0x0084, "M Consistency Check Failed due to Logical Drive Failure" }, - { 0x0085, "M Consistency Check Failed due to Physical Device Failure" }, - { 0x0086, "L Offline" }, - { 0x0087, "L Critical" }, - { 0x0088, "L Online" }, - { 0x0089, "M Automatic Rebuild Started" }, - { 0x008A, "M Manual Rebuild Started" }, - { 0x008B, "M Rebuild Completed" }, - { 0x008C, "M Rebuild Cancelled" }, - { 0x008D, "M Rebuild Failed for Unknown Reasons" }, - { 0x008E, "M Rebuild Failed due to New Physical Device" }, - { 0x008F, "M Rebuild Failed due to Logical Drive Failure" }, - { 0x0090, "M Initialization Started" }, - { 0x0091, "M Initialization Completed" }, - { 0x0092, "M Initialization Cancelled" }, - { 0x0093, "M Initialization Failed" }, - { 0x0094, "L Found" }, - { 0x0095, "L Deleted" }, - { 0x0096, "M Expand Capacity Started" }, - { 0x0097, "M Expand Capacity Completed" }, - { 0x0098, "M Expand Capacity Failed" }, - { 0x0099, "L Bad Block Found" }, - { 0x009A, "L Size Changed" }, - { 0x009B, "L Type Changed" }, - { 0x009C, "L Bad Data Block Found" }, - { 0x009E, "L Read of Data Block in BDT" }, - { 0x009F, "L Write Back Data for Disk Block Lost" }, - { 0x00A0, "L Temporarily Offline RAID-5/3 Drive Made Online" }, - { 0x00A1, "L Temporarily Offline RAID-6/1/0/7 Drive Made Online" }, - { 0x00A2, "L Standby Rebuild Started" }, - /* Fault Management Events (0x0100 - 0x017F) */ - { 0x0140, "E Fan %d Failed" }, - { 0x0141, "E Fan %d OK" }, - { 0x0142, "E Fan %d Not Present" }, - { 0x0143, "E Power Supply %d Failed" }, - { 0x0144, "E Power Supply %d OK" }, - { 0x0145, "E Power Supply %d Not Present" }, - { 0x0146, "E Temperature Sensor %d Temperature Exceeds Safe Limit" }, - { 0x0147, "E Temperature Sensor %d Temperature Exceeds Working Limit" }, - { 0x0148, "E Temperature Sensor %d Temperature Normal" }, - { 0x0149, "E Temperature Sensor %d Not Present" }, - { 0x014A, "E Enclosure Management Unit %d Access Critical" }, - { 0x014B, "E Enclosure Management Unit %d Access OK" }, - { 0x014C, "E Enclosure Management Unit %d Access Offline" }, - /* Controller Events (0x0180 - 0x01FF) */ - { 0x0181, "C Cache Write Back Error" }, - { 0x0188, "C Battery Backup Unit Found" }, - { 0x0189, "C Battery Backup Unit Charge Level Low" }, - { 0x018A, "C Battery Backup Unit Charge Level OK" }, - { 0x0193, "C Installation Aborted" }, - { 0x0195, "C Battery Backup Unit Physically Removed" }, - { 0x0196, "C Memory Error During Warm Boot" }, - { 0x019E, "C Memory Soft ECC Error Corrected" }, - { 0x019F, "C Memory Hard ECC Error Corrected" }, - { 0x01A2, "C Battery Backup Unit Failed" }, - { 0x01AB, "C Mirror Race Recovery Failed" }, - { 0x01AC, "C Mirror Race on Critical Drive" }, - /* Controller Internal Processor Events */ - { 0x0380, "C Internal Controller Hung" }, - { 0x0381, "C Internal Controller Firmware Breakpoint" }, - { 0x0390, "C Internal Controller i960 Processor Specific Error" }, - { 0x03A0, "C Internal Controller StrongARM Processor Specific Error" }, - { 0, "" } }; - int EventListIndex = 0, EventCode; - unsigned char EventType, *EventMessage; - if (Event->EventCode == 0x1C && - RequestSense->SenseKey == DAC960_SenseKey_VendorSpecific && - (RequestSense->AdditionalSenseCode == 0x80 || - RequestSense->AdditionalSenseCode == 0x81)) - Event->EventCode = ((RequestSense->AdditionalSenseCode - 0x80) << 8) | - RequestSense->AdditionalSenseCodeQualifier; - while (true) - { - EventCode = EventList[EventListIndex].EventCode; - if (EventCode == Event->EventCode || EventCode == 0) break; - EventListIndex++; - } - EventType = EventList[EventListIndex].EventMessage[0]; - EventMessage = &EventList[EventListIndex].EventMessage[2]; - if (EventCode == 0) - { - DAC960_Critical("Unknown Controller Event Code %04X\n", - Controller, Event->EventCode); - return; - } - switch (EventType) - { - case 'P': - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - Event->Channel, Event->TargetID, EventMessage); - break; - case 'L': - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller, - Event->LogicalUnit, Controller->ControllerNumber, - Event->LogicalUnit, EventMessage); - break; - case 'M': - DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller, - Event->LogicalUnit, Controller->ControllerNumber, - Event->LogicalUnit, EventMessage); - break; - case 'S': - if (RequestSense->SenseKey == DAC960_SenseKey_NoSense || - (RequestSense->SenseKey == DAC960_SenseKey_NotReady && - RequestSense->AdditionalSenseCode == 0x04 && - (RequestSense->AdditionalSenseCodeQualifier == 0x01 || - RequestSense->AdditionalSenseCodeQualifier == 0x02))) - break; - DAC960_Critical("Physical Device %d:%d %s\n", Controller, - Event->Channel, Event->TargetID, EventMessage); - DAC960_Critical("Physical Device %d:%d Request Sense: " - "Sense Key = %X, ASC = %02X, ASCQ = %02X\n", - Controller, - Event->Channel, - Event->TargetID, - RequestSense->SenseKey, - RequestSense->AdditionalSenseCode, - RequestSense->AdditionalSenseCodeQualifier); - DAC960_Critical("Physical Device %d:%d Request Sense: " - "Information = %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - Controller, - Event->Channel, - Event->TargetID, - RequestSense->Information[0], - RequestSense->Information[1], - RequestSense->Information[2], - RequestSense->Information[3], - RequestSense->CommandSpecificInformation[0], - RequestSense->CommandSpecificInformation[1], - RequestSense->CommandSpecificInformation[2], - RequestSense->CommandSpecificInformation[3]); - break; - case 'E': - if (Controller->SuppressEnclosureMessages) break; - sprintf(MessageBuffer, EventMessage, Event->LogicalUnit); - DAC960_Critical("Enclosure %d %s\n", Controller, - Event->TargetID, MessageBuffer); - break; - case 'C': - DAC960_Critical("Controller %s\n", Controller, EventMessage); - break; - default: - DAC960_Critical("Unknown Controller Event Code %04X\n", - Controller, Event->EventCode); - break; - } -} - - -/* - DAC960_V2_ReportProgress prints an appropriate progress message for - Logical Device Long Operations. -*/ - -static void DAC960_V2_ReportProgress(DAC960_Controller_T *Controller, - unsigned char *MessageString, - unsigned int LogicalDeviceNumber, - unsigned long BlocksCompleted, - unsigned long LogicalDeviceSize) -{ - Controller->EphemeralProgressMessage = true; - DAC960_Progress("%s in Progress: Logical Drive %d (/dev/rd/c%dd%d) " - "%d%% completed\n", Controller, - MessageString, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (100 * (BlocksCompleted >> 7)) / (LogicalDeviceSize >> 7)); - Controller->EphemeralProgressMessage = false; -} - - -/* - DAC960_V2_ProcessCompletedCommand performs completion processing for Command - for DAC960 V2 Firmware Controllers. -*/ - -static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_CommandType_T CommandType = Command->CommandType; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_IOCTL_Opcode_T IOCTLOpcode = CommandMailbox->Common.IOCTL_Opcode; - DAC960_V2_CommandOpcode_T CommandOpcode = CommandMailbox->SCSI_10.CommandOpcode; - DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus; - - if (CommandType == DAC960_ReadCommand || - CommandType == DAC960_WriteCommand) - { - -#ifdef FORCE_RETRY_DEBUG - CommandStatus = DAC960_V2_AbormalCompletion; -#endif - Command->V2.RequestSense->SenseKey = DAC960_SenseKey_MediumError; - - if (CommandStatus == DAC960_V2_NormalCompletion) { - - if (!DAC960_ProcessCompletedRequest(Command, true)) - BUG(); - - } else if (Command->V2.RequestSense->SenseKey == DAC960_SenseKey_MediumError) - { - /* - * break the command down into pieces and resubmit each - * piece, hoping that some of them will succeed. - */ - DAC960_queue_partial_rw(Command); - return; - } - else - { - if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady) - DAC960_V2_ReadWriteError(Command); - /* - Perform completion processing for all buffers in this I/O Request. - */ - (void)DAC960_ProcessCompletedRequest(Command, false); - } - } - else if (CommandType == DAC960_ReadRetryCommand || - CommandType == DAC960_WriteRetryCommand) - { - bool normal_completion; - -#ifdef FORCE_RETRY_FAILURE_DEBUG - static int retry_count = 1; -#endif - /* - Perform completion processing for the portion that was - retried, and submit the next portion, if any. - */ - normal_completion = true; - if (CommandStatus != DAC960_V2_NormalCompletion) { - normal_completion = false; - if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady) - DAC960_V2_ReadWriteError(Command); - } - -#ifdef FORCE_RETRY_FAILURE_DEBUG - if (!(++retry_count % 10000)) { - printk("V2 error retry failure test\n"); - normal_completion = false; - DAC960_V2_ReadWriteError(Command); - } -#endif - - if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) { - DAC960_queue_partial_rw(Command); - return; - } - } - else if (CommandType == DAC960_MonitoringCommand) - { - if (Controller->ShutdownMonitoringTimer) - return; - if (IOCTLOpcode == DAC960_V2_GetControllerInfo) - { - DAC960_V2_ControllerInfo_T *NewControllerInfo = - Controller->V2.NewControllerInformation; - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - Controller->LogicalDriveCount = - NewControllerInfo->LogicalDevicesPresent; - Controller->V2.NeedLogicalDeviceInformation = true; - Controller->V2.NeedPhysicalDeviceInformation = true; - Controller->V2.StartLogicalDeviceInformationScan = true; - Controller->V2.StartPhysicalDeviceInformationScan = true; - Controller->MonitoringAlertMode = - (NewControllerInfo->LogicalDevicesCritical > 0 || - NewControllerInfo->LogicalDevicesOffline > 0 || - NewControllerInfo->PhysicalDisksCritical > 0 || - NewControllerInfo->PhysicalDisksOffline > 0); - memcpy(ControllerInfo, NewControllerInfo, - sizeof(DAC960_V2_ControllerInfo_T)); - } - else if (IOCTLOpcode == DAC960_V2_GetEvent) - { - if (CommandStatus == DAC960_V2_NormalCompletion) { - DAC960_V2_ReportEvent(Controller, Controller->V2.Event); - } - Controller->V2.NextEventSequenceNumber++; - } - else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid && - CommandStatus == DAC960_V2_NormalCompletion) - { - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo = - Controller->V2.NewPhysicalDeviceInformation; - unsigned int PhysicalDeviceIndex = Controller->V2.PhysicalDeviceIndex; - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - unsigned int DeviceIndex; - while (PhysicalDeviceInfo != NULL && - (NewPhysicalDeviceInfo->Channel > - PhysicalDeviceInfo->Channel || - (NewPhysicalDeviceInfo->Channel == - PhysicalDeviceInfo->Channel && - (NewPhysicalDeviceInfo->TargetID > - PhysicalDeviceInfo->TargetID || - (NewPhysicalDeviceInfo->TargetID == - PhysicalDeviceInfo->TargetID && - NewPhysicalDeviceInfo->LogicalUnit > - PhysicalDeviceInfo->LogicalUnit))))) - { - DAC960_Critical("Physical Device %d:%d No Longer Exists\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID); - Controller->V2.PhysicalDeviceInformation - [PhysicalDeviceIndex] = NULL; - Controller->V2.InquiryUnitSerialNumber - [PhysicalDeviceIndex] = NULL; - kfree(PhysicalDeviceInfo); - kfree(InquiryUnitSerialNumber); - for (DeviceIndex = PhysicalDeviceIndex; - DeviceIndex < DAC960_V2_MaxPhysicalDevices - 1; - DeviceIndex++) - { - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = - Controller->V2.PhysicalDeviceInformation[DeviceIndex+1]; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex+1]; - } - Controller->V2.PhysicalDeviceInformation - [DAC960_V2_MaxPhysicalDevices-1] = NULL; - Controller->V2.InquiryUnitSerialNumber - [DAC960_V2_MaxPhysicalDevices-1] = NULL; - PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex]; - InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex]; - } - if (PhysicalDeviceInfo == NULL || - (NewPhysicalDeviceInfo->Channel != - PhysicalDeviceInfo->Channel) || - (NewPhysicalDeviceInfo->TargetID != - PhysicalDeviceInfo->TargetID) || - (NewPhysicalDeviceInfo->LogicalUnit != - PhysicalDeviceInfo->LogicalUnit)) - { - PhysicalDeviceInfo = - kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), GFP_ATOMIC); - InquiryUnitSerialNumber = - kmalloc(sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), - GFP_ATOMIC); - if (InquiryUnitSerialNumber == NULL || - PhysicalDeviceInfo == NULL) - { - kfree(InquiryUnitSerialNumber); - InquiryUnitSerialNumber = NULL; - kfree(PhysicalDeviceInfo); - PhysicalDeviceInfo = NULL; - } - DAC960_Critical("Physical Device %d:%d Now Exists%s\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - (PhysicalDeviceInfo != NULL - ? "" : " - Allocation Failed")); - if (PhysicalDeviceInfo != NULL) - { - memset(PhysicalDeviceInfo, 0, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - PhysicalDeviceInfo->PhysicalDeviceState = - DAC960_V2_Device_InvalidState; - memset(InquiryUnitSerialNumber, 0, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - for (DeviceIndex = DAC960_V2_MaxPhysicalDevices - 1; - DeviceIndex > PhysicalDeviceIndex; - DeviceIndex--) - { - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = - Controller->V2.PhysicalDeviceInformation[DeviceIndex-1]; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex-1]; - } - Controller->V2.PhysicalDeviceInformation - [PhysicalDeviceIndex] = - PhysicalDeviceInfo; - Controller->V2.InquiryUnitSerialNumber - [PhysicalDeviceIndex] = - InquiryUnitSerialNumber; - Controller->V2.NeedDeviceSerialNumberInformation = true; - } - } - if (PhysicalDeviceInfo != NULL) - { - if (NewPhysicalDeviceInfo->PhysicalDeviceState != - PhysicalDeviceInfo->PhysicalDeviceState) - DAC960_Critical( - "Physical Device %d:%d is now %s\n", Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - (NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Online - ? "ONLINE" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Rebuild - ? "REBUILD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Missing - ? "MISSING" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Critical - ? "CRITICAL" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead - ? "DEAD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_SuspectedDead - ? "SUSPECTED-DEAD" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_CommandedOffline - ? "COMMANDED-OFFLINE" - : NewPhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Standby - ? "STANDBY" : "UNKNOWN")); - if ((NewPhysicalDeviceInfo->ParityErrors != - PhysicalDeviceInfo->ParityErrors) || - (NewPhysicalDeviceInfo->SoftErrors != - PhysicalDeviceInfo->SoftErrors) || - (NewPhysicalDeviceInfo->HardErrors != - PhysicalDeviceInfo->HardErrors) || - (NewPhysicalDeviceInfo->MiscellaneousErrors != - PhysicalDeviceInfo->MiscellaneousErrors) || - (NewPhysicalDeviceInfo->CommandTimeouts != - PhysicalDeviceInfo->CommandTimeouts) || - (NewPhysicalDeviceInfo->Retries != - PhysicalDeviceInfo->Retries) || - (NewPhysicalDeviceInfo->Aborts != - PhysicalDeviceInfo->Aborts) || - (NewPhysicalDeviceInfo->PredictedFailuresDetected != - PhysicalDeviceInfo->PredictedFailuresDetected)) - { - DAC960_Critical("Physical Device %d:%d Errors: " - "Parity = %d, Soft = %d, " - "Hard = %d, Misc = %d\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - NewPhysicalDeviceInfo->ParityErrors, - NewPhysicalDeviceInfo->SoftErrors, - NewPhysicalDeviceInfo->HardErrors, - NewPhysicalDeviceInfo->MiscellaneousErrors); - DAC960_Critical("Physical Device %d:%d Errors: " - "Timeouts = %d, Retries = %d, " - "Aborts = %d, Predicted = %d\n", - Controller, - NewPhysicalDeviceInfo->Channel, - NewPhysicalDeviceInfo->TargetID, - NewPhysicalDeviceInfo->CommandTimeouts, - NewPhysicalDeviceInfo->Retries, - NewPhysicalDeviceInfo->Aborts, - NewPhysicalDeviceInfo - ->PredictedFailuresDetected); - } - if ((PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_Dead || - PhysicalDeviceInfo->PhysicalDeviceState - == DAC960_V2_Device_InvalidState) && - NewPhysicalDeviceInfo->PhysicalDeviceState - != DAC960_V2_Device_Dead) - Controller->V2.NeedDeviceSerialNumberInformation = true; - memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo, - sizeof(DAC960_V2_PhysicalDeviceInfo_T)); - } - NewPhysicalDeviceInfo->LogicalUnit++; - Controller->V2.PhysicalDeviceIndex++; - } - else if (IOCTLOpcode == DAC960_V2_GetPhysicalDeviceInfoValid) - { - unsigned int DeviceIndex; - for (DeviceIndex = Controller->V2.PhysicalDeviceIndex; - DeviceIndex < DAC960_V2_MaxPhysicalDevices; - DeviceIndex++) - { - DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo = - Controller->V2.PhysicalDeviceInformation[DeviceIndex]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[DeviceIndex]; - if (PhysicalDeviceInfo == NULL) break; - DAC960_Critical("Physical Device %d:%d No Longer Exists\n", - Controller, - PhysicalDeviceInfo->Channel, - PhysicalDeviceInfo->TargetID); - Controller->V2.PhysicalDeviceInformation[DeviceIndex] = NULL; - Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = NULL; - kfree(PhysicalDeviceInfo); - kfree(InquiryUnitSerialNumber); - } - Controller->V2.NeedPhysicalDeviceInformation = false; - } - else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid && - CommandStatus == DAC960_V2_NormalCompletion) - { - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo = - Controller->V2.NewLogicalDeviceInformation; - unsigned short LogicalDeviceNumber = - NewLogicalDeviceInfo->LogicalDeviceNumber; - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber]; - if (LogicalDeviceInfo == NULL) - { - DAC960_V2_PhysicalDevice_T PhysicalDevice; - PhysicalDevice.Controller = 0; - PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel; - PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID; - PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit; - Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] = - PhysicalDevice; - LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T), - GFP_ATOMIC); - Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] = - LogicalDeviceInfo; - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "Now Exists%s\n", Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (LogicalDeviceInfo != NULL - ? "" : " - Allocation Failed")); - if (LogicalDeviceInfo != NULL) - { - memset(LogicalDeviceInfo, 0, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - DAC960_ComputeGenericDiskInfo(Controller); - } - } - if (LogicalDeviceInfo != NULL) - { - unsigned long LogicalDeviceSize = - NewLogicalDeviceInfo->ConfigurableDeviceSize; - if (NewLogicalDeviceInfo->LogicalDeviceState != - LogicalDeviceInfo->LogicalDeviceState) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "is now %s\n", Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (NewLogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Online - ? "ONLINE" - : NewLogicalDeviceInfo->LogicalDeviceState - == DAC960_V2_LogicalDevice_Critical - ? "CRITICAL" : "OFFLINE")); - if ((NewLogicalDeviceInfo->SoftErrors != - LogicalDeviceInfo->SoftErrors) || - (NewLogicalDeviceInfo->CommandsFailed != - LogicalDeviceInfo->CommandsFailed) || - (NewLogicalDeviceInfo->DeferredWriteErrors != - LogicalDeviceInfo->DeferredWriteErrors)) - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) Errors: " - "Soft = %d, Failed = %d, Deferred Write = %d\n", - Controller, LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - NewLogicalDeviceInfo->SoftErrors, - NewLogicalDeviceInfo->CommandsFailed, - NewLogicalDeviceInfo->DeferredWriteErrors); - if (NewLogicalDeviceInfo->ConsistencyCheckInProgress) - DAC960_V2_ReportProgress(Controller, - "Consistency Check", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->ConsistencyCheckBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->RebuildInProgress) - DAC960_V2_ReportProgress(Controller, - "Rebuild", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->RebuildBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->BackgroundInitializationInProgress) - DAC960_V2_ReportProgress(Controller, - "Background Initialization", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->BackgroundInitializationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->ForegroundInitializationInProgress) - DAC960_V2_ReportProgress(Controller, - "Foreground Initialization", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->ForegroundInitializationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->DataMigrationInProgress) - DAC960_V2_ReportProgress(Controller, - "Data Migration", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->DataMigrationBlockNumber, - LogicalDeviceSize); - else if (NewLogicalDeviceInfo->PatrolOperationInProgress) - DAC960_V2_ReportProgress(Controller, - "Patrol Operation", - LogicalDeviceNumber, - NewLogicalDeviceInfo - ->PatrolOperationBlockNumber, - LogicalDeviceSize); - if (LogicalDeviceInfo->BackgroundInitializationInProgress && - !NewLogicalDeviceInfo->BackgroundInitializationInProgress) - DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) " - "Background Initialization %s\n", - Controller, - LogicalDeviceNumber, - Controller->ControllerNumber, - LogicalDeviceNumber, - (NewLogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized - ? "Completed" : "Failed")); - memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo, - sizeof(DAC960_V2_LogicalDeviceInfo_T)); - } - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDeviceNumber] = true; - NewLogicalDeviceInfo->LogicalDeviceNumber++; - } - else if (IOCTLOpcode == DAC960_V2_GetLogicalDeviceInfoValid) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - if (LogicalDeviceInfo == NULL || - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDriveNumber]) - continue; - DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) " - "No Longer Exists\n", Controller, - LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - Controller->V2.LogicalDeviceInformation - [LogicalDriveNumber] = NULL; - kfree(LogicalDeviceInfo); - Controller->LogicalDriveInitiallyAccessible - [LogicalDriveNumber] = false; - DAC960_ComputeGenericDiskInfo(Controller); - } - Controller->V2.NeedLogicalDeviceInformation = false; - } - else if (CommandOpcode == DAC960_V2_SCSI_10_Passthru) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.InquiryUnitSerialNumber[Controller->V2.PhysicalDeviceIndex - 1]; - - if (CommandStatus != DAC960_V2_NormalCompletion) { - memset(InquiryUnitSerialNumber, - 0, sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - } else - memcpy(InquiryUnitSerialNumber, - Controller->V2.NewInquiryUnitSerialNumber, - sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T)); - - Controller->V2.NeedDeviceSerialNumberInformation = false; - } - - if (Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - - Controller->V2.NextEventSequenceNumber > 0) - { - CommandMailbox->GetEvent.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->GetEvent.DataTransferSize = sizeof(DAC960_V2_Event_T); - CommandMailbox->GetEvent.EventSequenceNumberHigh16 = - Controller->V2.NextEventSequenceNumber >> 16; - CommandMailbox->GetEvent.ControllerNumber = 0; - CommandMailbox->GetEvent.IOCTL_Opcode = - DAC960_V2_GetEvent; - CommandMailbox->GetEvent.EventSequenceNumberLow16 = - Controller->V2.NextEventSequenceNumber & 0xFFFF; - CommandMailbox->GetEvent.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.EventDMA; - CommandMailbox->GetEvent.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->GetEvent.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.NeedPhysicalDeviceInformation) - { - if (Controller->V2.NeedDeviceSerialNumberInformation) - { - DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber = - Controller->V2.NewInquiryUnitSerialNumber; - InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F; - - DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox, - Controller->V2.NewPhysicalDeviceInformation->Channel, - Controller->V2.NewPhysicalDeviceInformation->TargetID, - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit - 1); - - - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.StartPhysicalDeviceInformationScan) - { - Controller->V2.PhysicalDeviceIndex = 0; - Controller->V2.NewPhysicalDeviceInformation->Channel = 0; - Controller->V2.NewPhysicalDeviceInformation->TargetID = 0; - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit = 0; - Controller->V2.StartPhysicalDeviceInformationScan = false; - } - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalDeviceInfo_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = - Controller->V2.NewPhysicalDeviceInformation->LogicalUnit; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = - Controller->V2.NewPhysicalDeviceInformation->TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = - Controller->V2.NewPhysicalDeviceInformation->Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetPhysicalDeviceInfoValid; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewPhysicalDeviceInformationDMA; - CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->PhysicalDeviceInfo.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - if (Controller->V2.NeedLogicalDeviceInformation) - { - if (Controller->V2.StartLogicalDeviceInformationScan) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - Controller->V2.LogicalDriveFoundDuringScan - [LogicalDriveNumber] = false; - Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber = 0; - Controller->V2.StartLogicalDeviceInformationScan = false; - } - CommandMailbox->LogicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->LogicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_LogicalDeviceInfo_T); - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_GetLogicalDeviceInfoValid; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewLogicalDeviceInformationDMA; - CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->LogicalDeviceInfo.DataTransferSize; - DAC960_QueueCommand(Command); - return; - } - Controller->MonitoringTimerCount++; - Controller->MonitoringTimer.expires = - jiffies + DAC960_HealthStatusMonitoringInterval; - add_timer(&Controller->MonitoringTimer); - } - if (CommandType == DAC960_ImmediateCommand) - { - complete(Command->Completion); - Command->Completion = NULL; - return; - } - if (CommandType == DAC960_QueuedCommand) - { - DAC960_V2_KernelCommand_T *KernelCommand = Command->V2.KernelCommand; - KernelCommand->CommandStatus = CommandStatus; - KernelCommand->RequestSenseLength = Command->V2.RequestSenseLength; - KernelCommand->DataTransferLength = Command->V2.DataTransferResidue; - Command->V2.KernelCommand = NULL; - DAC960_DeallocateCommand(Command); - KernelCommand->CompletionFunction(KernelCommand); - return; - } - /* - Queue a Status Monitoring Command to the Controller using the just - completed Command if one was deferred previously due to lack of a - free Command when the Monitoring Timer Function was called. - */ - if (Controller->MonitoringCommandDeferred) - { - Controller->MonitoringCommandDeferred = false; - DAC960_V2_QueueMonitoringCommand(Command); - return; - } - /* - Deallocate the Command. - */ - DAC960_DeallocateCommand(Command); - /* - Wake up any processes waiting on a free Command. - */ - wake_up(&Controller->CommandWaitQueue); -} - -/* - DAC960_GEM_InterruptHandler handles hardware interrupts from DAC960 GEM Series - Controllers. -*/ - -static irqreturn_t DAC960_GEM_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_GEM_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - -/* - DAC960_BA_InterruptHandler handles hardware interrupts from DAC960 BA Series - Controllers. -*/ - -static irqreturn_t DAC960_BA_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_BA_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_LP_InterruptHandler handles hardware interrupts from DAC960 LP Series - Controllers. -*/ - -static irqreturn_t DAC960_LP_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_LP_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V2.NextStatusMailbox; - while (NextStatusMailbox->Fields.CommandIdentifier > 0) - { - DAC960_V2_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - Command->V2.RequestSenseLength = - NextStatusMailbox->Fields.RequestSenseLength; - Command->V2.DataTransferResidue = - NextStatusMailbox->Fields.DataTransferResidue; - NextStatusMailbox->Words[0] = 0; - if (++NextStatusMailbox > Controller->V2.LastStatusMailbox) - NextStatusMailbox = Controller->V2.FirstStatusMailbox; - DAC960_V2_ProcessCompletedCommand(Command); - } - Controller->V2.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_LA_InterruptHandler handles hardware interrupts from DAC960 LA Series - Controllers. -*/ - -static irqreturn_t DAC960_LA_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_LA_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V1.NextStatusMailbox; - while (NextStatusMailbox->Fields.Valid) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - NextStatusMailbox->Word = 0; - if (++NextStatusMailbox > Controller->V1.LastStatusMailbox) - NextStatusMailbox = Controller->V1.FirstStatusMailbox; - DAC960_V1_ProcessCompletedCommand(Command); - } - Controller->V1.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_PG_InterruptHandler handles hardware interrupts from DAC960 PG Series - Controllers. -*/ - -static irqreturn_t DAC960_PG_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_PG_AcknowledgeInterrupt(ControllerBaseAddress); - NextStatusMailbox = Controller->V1.NextStatusMailbox; - while (NextStatusMailbox->Fields.Valid) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - NextStatusMailbox->Fields.CommandIdentifier; - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus; - NextStatusMailbox->Word = 0; - if (++NextStatusMailbox > Controller->V1.LastStatusMailbox) - NextStatusMailbox = Controller->V1.FirstStatusMailbox; - DAC960_V1_ProcessCompletedCommand(Command); - } - Controller->V1.NextStatusMailbox = NextStatusMailbox; - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_PD_InterruptHandler handles hardware interrupts from DAC960 PD Series - Controllers. -*/ - -static irqreturn_t DAC960_PD_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while (DAC960_PD_StatusAvailableP(ControllerBaseAddress)) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress); - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - Command->V1.CommandStatus = - DAC960_PD_ReadStatusRegister(ControllerBaseAddress); - DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress); - DAC960_PD_AcknowledgeStatus(ControllerBaseAddress); - DAC960_V1_ProcessCompletedCommand(Command); - } - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_P_InterruptHandler handles hardware interrupts from DAC960 P Series - Controllers. - - Translations of DAC960_V1_Enquiry and DAC960_V1_GetDeviceState rely - on the data having been placed into DAC960_Controller_T, rather than - an arbitrary buffer. -*/ - -static irqreturn_t DAC960_P_InterruptHandler(int IRQ_Channel, - void *DeviceIdentifier) -{ - DAC960_Controller_T *Controller = DeviceIdentifier; - void __iomem *ControllerBaseAddress = Controller->BaseAddress; - unsigned long flags; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while (DAC960_PD_StatusAvailableP(ControllerBaseAddress)) - { - DAC960_V1_CommandIdentifier_T CommandIdentifier = - DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress); - DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1]; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_CommandOpcode_T CommandOpcode = - CommandMailbox->Common.CommandOpcode; - Command->V1.CommandStatus = - DAC960_PD_ReadStatusRegister(ControllerBaseAddress); - DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress); - DAC960_PD_AcknowledgeStatus(ControllerBaseAddress); - switch (CommandOpcode) - { - case DAC960_V1_Enquiry_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Enquiry; - DAC960_P_To_PD_TranslateEnquiry(Controller->V1.NewEnquiry); - break; - case DAC960_V1_GetDeviceState_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_GetDeviceState; - DAC960_P_To_PD_TranslateDeviceState(Controller->V1.NewDeviceState); - break; - case DAC960_V1_Read_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Read; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_Write_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Write; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_ReadWithScatterGather_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_ReadWithScatterGather; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - case DAC960_V1_WriteWithScatterGather_Old: - Command->V1.CommandMailbox.Common.CommandOpcode = - DAC960_V1_WriteWithScatterGather; - DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox); - break; - default: - break; - } - DAC960_V1_ProcessCompletedCommand(Command); - } - /* - Attempt to remove additional I/O Requests from the Controller's - I/O Request Queue and queue them to the Controller. - */ - DAC960_ProcessRequest(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return IRQ_HANDLED; -} - - -/* - DAC960_V1_QueueMonitoringCommand queues a Monitoring Command to DAC960 V1 - Firmware Controllers. -*/ - -static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_MonitoringCommand; - CommandMailbox->Type3.CommandOpcode = DAC960_V1_Enquiry; - CommandMailbox->Type3.BusAddress = Controller->V1.NewEnquiryDMA; - DAC960_QueueCommand(Command); -} - - -/* - DAC960_V2_QueueMonitoringCommand queues a Monitoring Command to DAC960 V2 - Firmware Controllers. -*/ - -static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *Command) -{ - DAC960_Controller_T *Controller = Command->Controller; - DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_MonitoringCommand; - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = - sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - DAC960_QueueCommand(Command); -} - - -/* - DAC960_MonitoringTimerFunction is the timer function for monitoring - the status of DAC960 Controllers. -*/ - -static void DAC960_MonitoringTimerFunction(struct timer_list *t) -{ - DAC960_Controller_T *Controller = from_timer(Controller, t, MonitoringTimer); - DAC960_Command_T *Command; - unsigned long flags; - - if (Controller->FirmwareType == DAC960_V1_Controller) - { - spin_lock_irqsave(&Controller->queue_lock, flags); - /* - Queue a Status Monitoring Command to Controller. - */ - Command = DAC960_AllocateCommand(Controller); - if (Command != NULL) - DAC960_V1_QueueMonitoringCommand(Command); - else Controller->MonitoringCommandDeferred = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - } - else - { - DAC960_V2_ControllerInfo_T *ControllerInfo = - &Controller->V2.ControllerInformation; - unsigned int StatusChangeCounter = - Controller->V2.HealthStatusBuffer->StatusChangeCounter; - bool ForceMonitoringCommand = false; - if (time_after(jiffies, Controller->SecondaryMonitoringTime - + DAC960_SecondaryMonitoringInterval)) - { - int LogicalDriveNumber; - for (LogicalDriveNumber = 0; - LogicalDriveNumber < DAC960_MaxLogicalDrives; - LogicalDriveNumber++) - { - DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo = - Controller->V2.LogicalDeviceInformation[LogicalDriveNumber]; - if (LogicalDeviceInfo == NULL) continue; - if (!LogicalDeviceInfo->LogicalDeviceControl - .LogicalDeviceInitialized) - { - ForceMonitoringCommand = true; - break; - } - } - Controller->SecondaryMonitoringTime = jiffies; - } - if (StatusChangeCounter == Controller->V2.StatusChangeCounter && - Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - == Controller->V2.NextEventSequenceNumber && - (ControllerInfo->BackgroundInitializationsActive + - ControllerInfo->LogicalDeviceInitializationsActive + - ControllerInfo->PhysicalDeviceInitializationsActive + - ControllerInfo->ConsistencyChecksActive + - ControllerInfo->RebuildsActive + - ControllerInfo->OnlineExpansionsActive == 0 || - time_before(jiffies, Controller->PrimaryMonitoringTime - + DAC960_MonitoringTimerInterval)) && - !ForceMonitoringCommand) - { - Controller->MonitoringTimer.expires = - jiffies + DAC960_HealthStatusMonitoringInterval; - add_timer(&Controller->MonitoringTimer); - return; - } - Controller->V2.StatusChangeCounter = StatusChangeCounter; - Controller->PrimaryMonitoringTime = jiffies; - - spin_lock_irqsave(&Controller->queue_lock, flags); - /* - Queue a Status Monitoring Command to Controller. - */ - Command = DAC960_AllocateCommand(Controller); - if (Command != NULL) - DAC960_V2_QueueMonitoringCommand(Command); - else Controller->MonitoringCommandDeferred = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - /* - Wake up any processes waiting on a Health Status Buffer change. - */ - wake_up(&Controller->HealthStatusWaitQueue); - } -} - -/* - DAC960_CheckStatusBuffer verifies that there is room to hold ByteCount - additional bytes in the Combined Status Buffer and grows the buffer if - necessary. It returns true if there is enough room and false otherwise. -*/ - -static bool DAC960_CheckStatusBuffer(DAC960_Controller_T *Controller, - unsigned int ByteCount) -{ - unsigned char *NewStatusBuffer; - if (Controller->InitialStatusLength + 1 + - Controller->CurrentStatusLength + ByteCount + 1 <= - Controller->CombinedStatusBufferLength) - return true; - if (Controller->CombinedStatusBufferLength == 0) - { - unsigned int NewStatusBufferLength = DAC960_InitialStatusBufferSize; - while (NewStatusBufferLength < ByteCount) - NewStatusBufferLength *= 2; - Controller->CombinedStatusBuffer = kmalloc(NewStatusBufferLength, - GFP_ATOMIC); - if (Controller->CombinedStatusBuffer == NULL) return false; - Controller->CombinedStatusBufferLength = NewStatusBufferLength; - return true; - } - NewStatusBuffer = kmalloc_array(2, Controller->CombinedStatusBufferLength, - GFP_ATOMIC); - if (NewStatusBuffer == NULL) - { - DAC960_Warning("Unable to expand Combined Status Buffer - Truncating\n", - Controller); - return false; - } - memcpy(NewStatusBuffer, Controller->CombinedStatusBuffer, - Controller->CombinedStatusBufferLength); - kfree(Controller->CombinedStatusBuffer); - Controller->CombinedStatusBuffer = NewStatusBuffer; - Controller->CombinedStatusBufferLength *= 2; - Controller->CurrentStatusBuffer = - &NewStatusBuffer[Controller->InitialStatusLength + 1]; - return true; -} - - -/* - DAC960_Message prints Driver Messages. -*/ - -static void DAC960_Message(DAC960_MessageLevel_T MessageLevel, - unsigned char *Format, - DAC960_Controller_T *Controller, - ...) -{ - static unsigned char Buffer[DAC960_LineBufferSize]; - static bool BeginningOfLine = true; - va_list Arguments; - int Length = 0; - va_start(Arguments, Controller); - Length = vsprintf(Buffer, Format, Arguments); - va_end(Arguments); - if (Controller == NULL) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - DAC960_ControllerCount, Buffer); - else if (MessageLevel == DAC960_AnnounceLevel || - MessageLevel == DAC960_InfoLevel) - { - if (!Controller->ControllerInitialized) - { - if (DAC960_CheckStatusBuffer(Controller, Length)) - { - strcpy(&Controller->CombinedStatusBuffer - [Controller->InitialStatusLength], - Buffer); - Controller->InitialStatusLength += Length; - Controller->CurrentStatusBuffer = - &Controller->CombinedStatusBuffer - [Controller->InitialStatusLength + 1]; - } - if (MessageLevel == DAC960_AnnounceLevel) - { - static int AnnouncementLines = 0; - if (++AnnouncementLines <= 2) - printk("%sDAC960: %s", DAC960_MessageLevelMap[MessageLevel], - Buffer); - } - else - { - if (BeginningOfLine) - { - if (Buffer[0] != '\n' || Length > 1) - printk("%sDAC960#%d: %s", - DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else printk("%s", Buffer); - } - } - else if (DAC960_CheckStatusBuffer(Controller, Length)) - { - strcpy(&Controller->CurrentStatusBuffer[ - Controller->CurrentStatusLength], Buffer); - Controller->CurrentStatusLength += Length; - } - } - else if (MessageLevel == DAC960_ProgressLevel) - { - strcpy(Controller->ProgressBuffer, Buffer); - Controller->ProgressBufferLength = Length; - if (Controller->EphemeralProgressMessage) - { - if (time_after_eq(jiffies, Controller->LastProgressReportTime - + DAC960_ProgressReportingInterval)) - { - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - Controller->LastProgressReportTime = jiffies; - } - } - else printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else if (MessageLevel == DAC960_UserCriticalLevel) - { - strcpy(&Controller->UserStatusBuffer[Controller->UserStatusLength], - Buffer); - Controller->UserStatusLength += Length; - if (Buffer[0] != '\n' || Length > 1) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - } - else - { - if (BeginningOfLine) - printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel], - Controller->ControllerNumber, Buffer); - else printk("%s", Buffer); - } - BeginningOfLine = (Buffer[Length-1] == '\n'); -} - - -/* - DAC960_ParsePhysicalDevice parses spaces followed by a Physical Device - Channel:TargetID specification from a User Command string. It updates - Channel and TargetID and returns true on success and false on failure. -*/ - -static bool DAC960_ParsePhysicalDevice(DAC960_Controller_T *Controller, - char *UserCommandString, - unsigned char *Channel, - unsigned char *TargetID) -{ - char *NewUserCommandString = UserCommandString; - unsigned long XChannel, XTargetID; - while (*UserCommandString == ' ') UserCommandString++; - if (UserCommandString == NewUserCommandString) - return false; - XChannel = simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != ':' || - XChannel >= Controller->Channels) - return false; - UserCommandString = ++NewUserCommandString; - XTargetID = simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != '\0' || - XTargetID >= Controller->Targets) - return false; - *Channel = XChannel; - *TargetID = XTargetID; - return true; -} - - -/* - DAC960_ParseLogicalDrive parses spaces followed by a Logical Drive Number - specification from a User Command string. It updates LogicalDriveNumber and - returns true on success and false on failure. -*/ - -static bool DAC960_ParseLogicalDrive(DAC960_Controller_T *Controller, - char *UserCommandString, - unsigned char *LogicalDriveNumber) -{ - char *NewUserCommandString = UserCommandString; - unsigned long XLogicalDriveNumber; - while (*UserCommandString == ' ') UserCommandString++; - if (UserCommandString == NewUserCommandString) - return false; - XLogicalDriveNumber = - simple_strtoul(UserCommandString, &NewUserCommandString, 10); - if (NewUserCommandString == UserCommandString || - *NewUserCommandString != '\0' || - XLogicalDriveNumber > DAC960_MaxLogicalDrives - 1) - return false; - *LogicalDriveNumber = XLogicalDriveNumber; - return true; -} - - -/* - DAC960_V1_SetDeviceState sets the Device State for a Physical Device for - DAC960 V1 Firmware Controllers. -*/ - -static void DAC960_V1_SetDeviceState(DAC960_Controller_T *Controller, - DAC960_Command_T *Command, - unsigned char Channel, - unsigned char TargetID, - DAC960_V1_PhysicalDeviceState_T - DeviceState, - const unsigned char *DeviceStateString) -{ - DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; - CommandMailbox->Type3D.CommandOpcode = DAC960_V1_StartDevice; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - CommandMailbox->Type3D.DeviceState = DeviceState; - CommandMailbox->Type3D.Modifier = 0; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("%s of Physical Device %d:%d Succeeded\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_UnableToStartDevice: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Unable to Start Device\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_NoDeviceAtAddress: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "No Device at Address\n", Controller, - DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_InvalidChannelOrTargetOrModifier: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Invalid Channel or Target or Modifier\n", - Controller, DeviceStateString, Channel, TargetID); - break; - case DAC960_V1_ChannelBusy: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Channel Busy\n", Controller, - DeviceStateString, Channel, TargetID); - break; - default: - DAC960_UserCritical("%s of Physical Device %d:%d Failed - " - "Unexpected Status %04X\n", Controller, - DeviceStateString, Channel, TargetID, - Command->V1.CommandStatus); - break; - } -} - - -/* - DAC960_V1_ExecuteUserCommand executes a User Command for DAC960 V1 Firmware - Controllers. -*/ - -static bool DAC960_V1_ExecuteUserCommand(DAC960_Controller_T *Controller, - unsigned char *UserCommand) -{ - DAC960_Command_T *Command; - DAC960_V1_CommandMailbox_T *CommandMailbox; - unsigned long flags; - unsigned char Channel, TargetID, LogicalDriveNumber; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - Controller->UserStatusLength = 0; - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V1.CommandMailbox; - if (strcmp(UserCommand, "flush-cache") == 0) - { - CommandMailbox->Type3.CommandOpcode = DAC960_V1_Flush; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Cache Flush Completed\n", Controller); - } - else if (strncmp(UserCommand, "kill", 4) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[4], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState != DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Dead, "Kill"); - else DAC960_UserCritical("Kill of Physical Device %d:%d Illegal\n", - Controller, Channel, TargetID); - } - else if (strncmp(UserCommand, "make-online", 11) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[11], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState == DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Online, "Make Online"); - else DAC960_UserCritical("Make Online of Physical Device %d:%d Illegal\n", - Controller, Channel, TargetID); - - } - else if (strncmp(UserCommand, "make-standby", 12) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[12], - &Channel, &TargetID)) - { - DAC960_V1_DeviceState_T *DeviceState = - &Controller->V1.DeviceState[Channel][TargetID]; - if (DeviceState->Present && - DeviceState->DeviceType == DAC960_V1_DiskType && - DeviceState->DeviceState == DAC960_V1_Device_Dead) - DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID, - DAC960_V1_Device_Standby, "Make Standby"); - else DAC960_UserCritical("Make Standby of Physical " - "Device %d:%d Illegal\n", - Controller, Channel, TargetID); - } - else if (strncmp(UserCommand, "rebuild", 7) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[7], - &Channel, &TargetID)) - { - CommandMailbox->Type3D.CommandOpcode = DAC960_V1_RebuildAsync; - CommandMailbox->Type3D.Channel = Channel; - CommandMailbox->Type3D.TargetID = TargetID; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Initiated\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_AttemptToRebuildOnlineDrive: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Attempt to Rebuild Online or " - "Unresponsive Drive\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_NewDiskFailedDuringRebuild: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "New Disk Failed During Rebuild\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_InvalidDeviceAddress: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Invalid Device Address\n", - Controller, Channel, TargetID); - break; - case DAC960_V1_RebuildOrCheckAlreadyInProgress: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Rebuild or Consistency Check Already " - "in Progress\n", Controller, Channel, TargetID); - break; - default: - DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - " - "Unexpected Status %04X\n", Controller, - Channel, TargetID, Command->V1.CommandStatus); - break; - } - } - else if (strncmp(UserCommand, "check-consistency", 17) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[17], - &LogicalDriveNumber)) - { - CommandMailbox->Type3C.CommandOpcode = DAC960_V1_CheckConsistencyAsync; - CommandMailbox->Type3C.LogicalDriveNumber = LogicalDriveNumber; - CommandMailbox->Type3C.AutoRestore = true; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Initiated\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_DependentDiskIsDead: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Dependent Physical Device is DEAD\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_InvalidOrNonredundantLogicalDrive: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Invalid or Nonredundant Logical Drive\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - case DAC960_V1_RebuildOrCheckAlreadyInProgress: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - Rebuild or " - "Consistency Check Already in Progress\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber); - break; - default: - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) Failed - " - "Unexpected Status %04X\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, Command->V1.CommandStatus); - break; - } - } - else if (strcmp(UserCommand, "cancel-rebuild") == 0 || - strcmp(UserCommand, "cancel-consistency-check") == 0) - { - /* - the OldRebuildRateConstant is never actually used - once its value is retrieved from the controller. - */ - unsigned char *OldRebuildRateConstant; - dma_addr_t OldRebuildRateConstantDMA; - - OldRebuildRateConstant = pci_alloc_consistent( Controller->PCIDevice, - sizeof(char), &OldRebuildRateConstantDMA); - if (OldRebuildRateConstant == NULL) { - DAC960_UserCritical("Cancellation of Rebuild or " - "Consistency Check Failed - " - "Out of Memory", - Controller); - goto failure; - } - CommandMailbox->Type3R.CommandOpcode = DAC960_V1_RebuildControl; - CommandMailbox->Type3R.RebuildRateConstant = 0xFF; - CommandMailbox->Type3R.BusAddress = OldRebuildRateConstantDMA; - DAC960_ExecuteCommand(Command); - switch (Command->V1.CommandStatus) - { - case DAC960_V1_NormalCompletion: - DAC960_UserCritical("Rebuild or Consistency Check Cancelled\n", - Controller); - break; - default: - DAC960_UserCritical("Cancellation of Rebuild or " - "Consistency Check Failed - " - "Unexpected Status %04X\n", - Controller, Command->V1.CommandStatus); - break; - } -failure: - pci_free_consistent(Controller->PCIDevice, sizeof(char), - OldRebuildRateConstant, OldRebuildRateConstantDMA); - } - else DAC960_UserCritical("Illegal User Command: '%s'\n", - Controller, UserCommand); - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return true; -} - - -/* - DAC960_V2_TranslatePhysicalDevice translates a Physical Device Channel and - TargetID into a Logical Device. It returns true on success and false - on failure. -*/ - -static bool DAC960_V2_TranslatePhysicalDevice(DAC960_Command_T *Command, - unsigned char Channel, - unsigned char TargetID, - unsigned short - *LogicalDeviceNumber) -{ - DAC960_V2_CommandMailbox_T SavedCommandMailbox, *CommandMailbox; - DAC960_Controller_T *Controller = Command->Controller; - - CommandMailbox = &Command->V2.CommandMailbox; - memcpy(&SavedCommandMailbox, CommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - - CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->PhysicalDeviceInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->PhysicalDeviceInfo.DataTransferSize = - sizeof(DAC960_V2_PhysicalToLogicalDevice_T); - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID; - CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel; - CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_TranslatePhysicalToLogicalDevice; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.PhysicalToLogicalDeviceDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - - DAC960_ExecuteCommand(Command); - *LogicalDeviceNumber = Controller->V2.PhysicalToLogicalDevice->LogicalDeviceNumber; - - memcpy(CommandMailbox, &SavedCommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - return (Command->V2.CommandStatus == DAC960_V2_NormalCompletion); -} - - -/* - DAC960_V2_ExecuteUserCommand executes a User Command for DAC960 V2 Firmware - Controllers. -*/ - -static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, - unsigned char *UserCommand) -{ - DAC960_Command_T *Command; - DAC960_V2_CommandMailbox_T *CommandMailbox; - unsigned long flags; - unsigned char Channel, TargetID, LogicalDriveNumber; - unsigned short LogicalDeviceNumber; - - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - Controller->UserStatusLength = 0; - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V2.CommandMailbox; - CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->Common.CommandControlBits.DataTransferControllerToHost = true; - CommandMailbox->Common.CommandControlBits.NoAutoRequestSense = true; - if (strcmp(UserCommand, "flush-cache") == 0) - { - CommandMailbox->DeviceOperation.IOCTL_Opcode = DAC960_V2_PauseDevice; - CommandMailbox->DeviceOperation.OperationDevice = - DAC960_V2_RAID_Controller; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Cache Flush Completed\n", Controller); - } - else if (strncmp(UserCommand, "kill", 4) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[4], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Dead; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Kill of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "make-online", 11) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[11], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Online; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Make Online of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "make-standby", 12) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[12], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->SetDeviceState.IOCTL_Opcode = - DAC960_V2_SetDeviceState; - CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState = - DAC960_V2_Device_Standby; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Make Standby of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Succeeded" : "Failed")); - } - else if (strncmp(UserCommand, "rebuild", 7) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[7], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_RebuildDeviceStart; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - } - else if (strncmp(UserCommand, "cancel-rebuild", 14) == 0 && - DAC960_ParsePhysicalDevice(Controller, &UserCommand[14], - &Channel, &TargetID) && - DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID, - &LogicalDeviceNumber)) - { - CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber = - LogicalDeviceNumber; - CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = - DAC960_V2_RebuildDeviceStop; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n", - Controller, Channel, TargetID, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Cancelled" : "Not Cancelled")); - } - else if (strncmp(UserCommand, "check-consistency", 17) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[17], - &LogicalDriveNumber)) - { - CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber = - LogicalDriveNumber; - CommandMailbox->ConsistencyCheck.IOCTL_Opcode = - DAC960_V2_ConsistencyCheckStart; - CommandMailbox->ConsistencyCheck.RestoreConsistency = true; - CommandMailbox->ConsistencyCheck.InitializedAreaOnly = false; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) %s\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - } - else if (strncmp(UserCommand, "cancel-consistency-check", 24) == 0 && - DAC960_ParseLogicalDrive(Controller, &UserCommand[24], - &LogicalDriveNumber)) - { - CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber = - LogicalDriveNumber; - CommandMailbox->ConsistencyCheck.IOCTL_Opcode = - DAC960_V2_ConsistencyCheckStop; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Consistency Check of Logical Drive %d " - "(/dev/rd/c%dd%d) %s\n", - Controller, LogicalDriveNumber, - Controller->ControllerNumber, - LogicalDriveNumber, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Cancelled" : "Not Cancelled")); - } - else if (strcmp(UserCommand, "perform-discovery") == 0) - { - CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_StartDiscovery; - DAC960_ExecuteCommand(Command); - DAC960_UserCritical("Discovery %s\n", Controller, - (Command->V2.CommandStatus - == DAC960_V2_NormalCompletion - ? "Initiated" : "Not Initiated")); - if (Command->V2.CommandStatus == DAC960_V2_NormalCompletion) - { - CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL; - CommandMailbox->ControllerInfo.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->ControllerInfo.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->ControllerInfo.DataTransferSize = - sizeof(DAC960_V2_ControllerInfo_T); - CommandMailbox->ControllerInfo.ControllerNumber = 0; - CommandMailbox->ControllerInfo.IOCTL_Opcode = - DAC960_V2_GetControllerInfo; - /* - * How does this NOT race with the queued Monitoring - * usage of this structure? - */ - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = - Controller->V2.NewControllerInformationDMA; - CommandMailbox->ControllerInfo.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->ControllerInfo.DataTransferSize; - while (1) { - DAC960_ExecuteCommand(Command); - if (!Controller->V2.NewControllerInformation->PhysicalScanActive) - break; - msleep(1000); - } - DAC960_UserCritical("Discovery Completed\n", Controller); - } - } - else if (strcmp(UserCommand, "suppress-enclosure-messages") == 0) - Controller->SuppressEnclosureMessages = true; - else DAC960_UserCritical("Illegal User Command: '%s'\n", - Controller, UserCommand); - - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - return true; -} - -static int __maybe_unused dac960_proc_show(struct seq_file *m, void *v) -{ - unsigned char *StatusMessage = "OK\n"; - int ControllerNumber; - for (ControllerNumber = 0; - ControllerNumber < DAC960_ControllerCount; - ControllerNumber++) - { - DAC960_Controller_T *Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) continue; - if (Controller->MonitoringAlertMode) - { - StatusMessage = "ALERT\n"; - break; - } - } - seq_puts(m, StatusMessage); - return 0; -} - -static int __maybe_unused dac960_initial_status_proc_show(struct seq_file *m, - void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; - seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer); - return 0; -} - -static int __maybe_unused dac960_current_status_proc_show(struct seq_file *m, - void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; - unsigned char *StatusMessage = - "No Rebuild or Consistency Check in Progress\n"; - int ProgressMessageLength = strlen(StatusMessage); - if (jiffies != Controller->LastCurrentStatusTime) - { - Controller->CurrentStatusLength = 0; - DAC960_AnnounceDriver(Controller); - DAC960_ReportControllerConfiguration(Controller); - DAC960_ReportDeviceConfiguration(Controller); - if (Controller->ProgressBufferLength > 0) - ProgressMessageLength = Controller->ProgressBufferLength; - if (DAC960_CheckStatusBuffer(Controller, 2 + ProgressMessageLength)) - { - unsigned char *CurrentStatusBuffer = Controller->CurrentStatusBuffer; - CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' '; - CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' '; - if (Controller->ProgressBufferLength > 0) - strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength], - Controller->ProgressBuffer); - else - strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength], - StatusMessage); - Controller->CurrentStatusLength += ProgressMessageLength; - } - Controller->LastCurrentStatusTime = jiffies; - } - seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer); - return 0; -} - -static int dac960_user_command_proc_show(struct seq_file *m, void *v) -{ - DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; - - seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer); - return 0; -} - -static int dac960_user_command_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dac960_user_command_proc_show, PDE_DATA(inode)); -} - -static ssize_t dac960_user_command_proc_write(struct file *file, - const char __user *Buffer, - size_t Count, loff_t *pos) -{ - DAC960_Controller_T *Controller = PDE_DATA(file_inode(file)); - unsigned char CommandBuffer[80]; - int Length; - if (Count > sizeof(CommandBuffer)-1) return -EINVAL; - if (copy_from_user(CommandBuffer, Buffer, Count)) return -EFAULT; - CommandBuffer[Count] = '\0'; - Length = strlen(CommandBuffer); - if (Length > 0 && CommandBuffer[Length-1] == '\n') - CommandBuffer[--Length] = '\0'; - if (Controller->FirmwareType == DAC960_V1_Controller) - return (DAC960_V1_ExecuteUserCommand(Controller, CommandBuffer) - ? Count : -EBUSY); - else - return (DAC960_V2_ExecuteUserCommand(Controller, CommandBuffer) - ? Count : -EBUSY); -} - -static const struct file_operations dac960_user_command_proc_fops = { - .owner = THIS_MODULE, - .open = dac960_user_command_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = dac960_user_command_proc_write, -}; - -/* - DAC960_CreateProcEntries creates the /proc/rd/... entries for the - DAC960 Driver. -*/ - -static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller) -{ - struct proc_dir_entry *ControllerProcEntry; - - if (DAC960_ProcDirectoryEntry == NULL) { - DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL); - proc_create_single("status", 0, DAC960_ProcDirectoryEntry, - dac960_proc_show); - } - - snprintf(Controller->ControllerName, sizeof(Controller->ControllerName), - "c%d", Controller->ControllerNumber); - ControllerProcEntry = proc_mkdir(Controller->ControllerName, - DAC960_ProcDirectoryEntry); - proc_create_single_data("initial_status", 0, ControllerProcEntry, - dac960_initial_status_proc_show, Controller); - proc_create_single_data("current_status", 0, ControllerProcEntry, - dac960_current_status_proc_show, Controller); - proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); - Controller->ControllerProcEntry = ControllerProcEntry; -} - - -/* - DAC960_DestroyProcEntries destroys the /proc/rd/... entries for the - DAC960 Driver. -*/ - -static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller) -{ - if (Controller->ControllerProcEntry == NULL) - return; - remove_proc_entry("initial_status", Controller->ControllerProcEntry); - remove_proc_entry("current_status", Controller->ControllerProcEntry); - remove_proc_entry("user_command", Controller->ControllerProcEntry); - remove_proc_entry(Controller->ControllerName, DAC960_ProcDirectoryEntry); - Controller->ControllerProcEntry = NULL; -} - -#ifdef DAC960_GAM_MINOR - -static long DAC960_gam_get_controller_info(DAC960_ControllerInfo_T __user *UserSpaceControllerInfo) -{ - DAC960_ControllerInfo_T ControllerInfo; - DAC960_Controller_T *Controller; - int ControllerNumber; - long ErrorCode; - - if (UserSpaceControllerInfo == NULL) - ErrorCode = -EINVAL; - else ErrorCode = get_user(ControllerNumber, - &UserSpaceControllerInfo->ControllerNumber); - if (ErrorCode != 0) - goto out; - ErrorCode = -ENXIO; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) { - goto out; - } - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T)); - ControllerInfo.ControllerNumber = ControllerNumber; - ControllerInfo.FirmwareType = Controller->FirmwareType; - ControllerInfo.Channels = Controller->Channels; - ControllerInfo.Targets = Controller->Targets; - ControllerInfo.PCI_Bus = Controller->Bus; - ControllerInfo.PCI_Device = Controller->Device; - ControllerInfo.PCI_Function = Controller->Function; - ControllerInfo.IRQ_Channel = Controller->IRQ_Channel; - ControllerInfo.PCI_Address = Controller->PCI_Address; - strcpy(ControllerInfo.ModelName, Controller->ModelName); - strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion); - ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo, - sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0); -out: - return ErrorCode; -} - -static long DAC960_gam_v1_execute_command(DAC960_V1_UserCommand_T __user *UserSpaceUserCommand) -{ - DAC960_V1_UserCommand_T UserCommand; - DAC960_Controller_T *Controller; - DAC960_Command_T *Command = NULL; - DAC960_V1_CommandOpcode_T CommandOpcode; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_DCDB_T DCDB; - DAC960_V1_DCDB_T *DCDB_IOBUF = NULL; - dma_addr_t DCDB_IOBUFDMA; - unsigned long flags; - int ControllerNumber, DataTransferLength; - unsigned char *DataTransferBuffer = NULL; - dma_addr_t DataTransferBufferDMA; - long ErrorCode; - - if (UserSpaceUserCommand == NULL) { - ErrorCode = -EINVAL; - goto out; - } - if (copy_from_user(&UserCommand, UserSpaceUserCommand, - sizeof(DAC960_V1_UserCommand_T))) { - ErrorCode = -EFAULT; - goto out; - } - ControllerNumber = UserCommand.ControllerNumber; - ErrorCode = -ENXIO; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) - goto out; - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - ErrorCode = -EINVAL; - if (Controller->FirmwareType != DAC960_V1_Controller) - goto out; - CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode; - DataTransferLength = UserCommand.DataTransferLength; - if (CommandOpcode & 0x80) - goto out; - if (CommandOpcode == DAC960_V1_DCDB) - { - if (copy_from_user(&DCDB, UserCommand.DCDB, - sizeof(DAC960_V1_DCDB_T))) { - ErrorCode = -EFAULT; - goto out; - } - if (DCDB.Channel >= DAC960_V1_MaxChannels) - goto out; - if (!((DataTransferLength == 0 && - DCDB.Direction - == DAC960_V1_DCDB_NoDataTransfer) || - (DataTransferLength > 0 && - DCDB.Direction - == DAC960_V1_DCDB_DataTransferDeviceToSystem) || - (DataTransferLength < 0 && - DCDB.Direction - == DAC960_V1_DCDB_DataTransferSystemToDevice))) - goto out; - if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength) - != abs(DataTransferLength)) - goto out; - DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice, - sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA); - if (DCDB_IOBUF == NULL) { - ErrorCode = -ENOMEM; - goto out; - } - } - ErrorCode = -ENOMEM; - if (DataTransferLength > 0) - { - DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice, - DataTransferLength, - &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - } - else if (DataTransferLength < 0) - { - DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, - -DataTransferLength, &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - if (copy_from_user(DataTransferBuffer, - UserCommand.DataTransferBuffer, - -DataTransferLength)) { - ErrorCode = -EFAULT; - goto out; - } - } - if (CommandOpcode == DAC960_V1_DCDB) - { - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - while (Controller->V1.DirectCommandActive[DCDB.Channel] - [DCDB.TargetID]) - { - spin_unlock_irq(&Controller->queue_lock); - __wait_event(Controller->CommandWaitQueue, - !Controller->V1.DirectCommandActive - [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&Controller->queue_lock); - } - Controller->V1.DirectCommandActive[DCDB.Channel] - [DCDB.TargetID] = true; - spin_unlock_irqrestore(&Controller->queue_lock, flags); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - memcpy(&Command->V1.CommandMailbox, &UserCommand.CommandMailbox, - sizeof(DAC960_V1_CommandMailbox_T)); - Command->V1.CommandMailbox.Type3.BusAddress = DCDB_IOBUFDMA; - DCDB.BusAddress = DataTransferBufferDMA; - memcpy(DCDB_IOBUF, &DCDB, sizeof(DAC960_V1_DCDB_T)); - } - else - { - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - DAC960_V1_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - memcpy(&Command->V1.CommandMailbox, &UserCommand.CommandMailbox, - sizeof(DAC960_V1_CommandMailbox_T)); - if (DataTransferBuffer != NULL) - Command->V1.CommandMailbox.Type3.BusAddress = - DataTransferBufferDMA; - } - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V1.CommandStatus; - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - if (DataTransferLength > 0) - { - if (copy_to_user(UserCommand.DataTransferBuffer, - DataTransferBuffer, DataTransferLength)) { - ErrorCode = -EFAULT; - goto Failure1; - } - } - if (CommandOpcode == DAC960_V1_DCDB) - { - /* - I don't believe Target or Channel in the DCDB_IOBUF - should be any different from the contents of DCDB. - */ - Controller->V1.DirectCommandActive[DCDB.Channel] - [DCDB.TargetID] = false; - if (copy_to_user(UserCommand.DCDB, DCDB_IOBUF, - sizeof(DAC960_V1_DCDB_T))) { - ErrorCode = -EFAULT; - goto Failure1; - } - } - ErrorCode = CommandStatus; - Failure1: - if (DataTransferBuffer != NULL) - pci_free_consistent(Controller->PCIDevice, abs(DataTransferLength), - DataTransferBuffer, DataTransferBufferDMA); - if (DCDB_IOBUF != NULL) - pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T), - DCDB_IOBUF, DCDB_IOBUFDMA); - out: - return ErrorCode; -} - -static long DAC960_gam_v2_execute_command(DAC960_V2_UserCommand_T __user *UserSpaceUserCommand) -{ - DAC960_V2_UserCommand_T UserCommand; - DAC960_Controller_T *Controller; - DAC960_Command_T *Command = NULL; - DAC960_V2_CommandMailbox_T *CommandMailbox; - DAC960_V2_CommandStatus_T CommandStatus; - unsigned long flags; - int ControllerNumber, DataTransferLength; - int DataTransferResidue, RequestSenseLength; - unsigned char *DataTransferBuffer = NULL; - dma_addr_t DataTransferBufferDMA; - unsigned char *RequestSenseBuffer = NULL; - dma_addr_t RequestSenseBufferDMA; - long ErrorCode = -EINVAL; - - if (UserSpaceUserCommand == NULL) - goto out; - if (copy_from_user(&UserCommand, UserSpaceUserCommand, - sizeof(DAC960_V2_UserCommand_T))) { - ErrorCode = -EFAULT; - goto out; - } - ErrorCode = -ENXIO; - ControllerNumber = UserCommand.ControllerNumber; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) - goto out; - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - if (Controller->FirmwareType != DAC960_V2_Controller){ - ErrorCode = -EINVAL; - goto out; - } - DataTransferLength = UserCommand.DataTransferLength; - ErrorCode = -ENOMEM; - if (DataTransferLength > 0) - { - DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice, - DataTransferLength, - &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - } - else if (DataTransferLength < 0) - { - DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, - -DataTransferLength, &DataTransferBufferDMA); - if (DataTransferBuffer == NULL) - goto out; - if (copy_from_user(DataTransferBuffer, - UserCommand.DataTransferBuffer, - -DataTransferLength)) { - ErrorCode = -EFAULT; - goto Failure2; - } - } - RequestSenseLength = UserCommand.RequestSenseLength; - if (RequestSenseLength > 0) - { - RequestSenseBuffer = pci_zalloc_consistent(Controller->PCIDevice, - RequestSenseLength, - &RequestSenseBufferDMA); - if (RequestSenseBuffer == NULL) - { - ErrorCode = -ENOMEM; - goto Failure2; - } - } - spin_lock_irqsave(&Controller->queue_lock, flags); - while ((Command = DAC960_AllocateCommand(Controller)) == NULL) - DAC960_WaitForCommand(Controller); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - DAC960_V2_ClearCommand(Command); - Command->CommandType = DAC960_ImmediateCommand; - CommandMailbox = &Command->V2.CommandMailbox; - memcpy(CommandMailbox, &UserCommand.CommandMailbox, - sizeof(DAC960_V2_CommandMailbox_T)); - CommandMailbox->Common.CommandControlBits - .AdditionalScatterGatherListMemory = false; - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = true; - CommandMailbox->Common.DataTransferSize = 0; - CommandMailbox->Common.DataTransferPageNumber = 0; - memset(&CommandMailbox->Common.DataTransferMemoryAddress, 0, - sizeof(DAC960_V2_DataTransferMemoryAddress_T)); - if (DataTransferLength != 0) - { - if (DataTransferLength > 0) - { - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = true; - CommandMailbox->Common.DataTransferSize = DataTransferLength; - } - else - { - CommandMailbox->Common.CommandControlBits - .DataTransferControllerToHost = false; - CommandMailbox->Common.DataTransferSize = -DataTransferLength; - } - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentDataPointer = DataTransferBufferDMA; - CommandMailbox->Common.DataTransferMemoryAddress - .ScatterGatherSegments[0] - .SegmentByteCount = - CommandMailbox->Common.DataTransferSize; - } - if (RequestSenseLength > 0) - { - CommandMailbox->Common.CommandControlBits - .NoAutoRequestSense = false; - CommandMailbox->Common.RequestSenseSize = RequestSenseLength; - CommandMailbox->Common.RequestSenseBusAddress = - RequestSenseBufferDMA; - } - DAC960_ExecuteCommand(Command); - CommandStatus = Command->V2.CommandStatus; - RequestSenseLength = Command->V2.RequestSenseLength; - DataTransferResidue = Command->V2.DataTransferResidue; - spin_lock_irqsave(&Controller->queue_lock, flags); - DAC960_DeallocateCommand(Command); - spin_unlock_irqrestore(&Controller->queue_lock, flags); - if (RequestSenseLength > UserCommand.RequestSenseLength) - RequestSenseLength = UserCommand.RequestSenseLength; - if (copy_to_user(&UserSpaceUserCommand->DataTransferLength, - &DataTransferResidue, - sizeof(DataTransferResidue))) { - ErrorCode = -EFAULT; - goto Failure2; - } - if (copy_to_user(&UserSpaceUserCommand->RequestSenseLength, - &RequestSenseLength, sizeof(RequestSenseLength))) { - ErrorCode = -EFAULT; - goto Failure2; - } - if (DataTransferLength > 0) - { - if (copy_to_user(UserCommand.DataTransferBuffer, - DataTransferBuffer, DataTransferLength)) { - ErrorCode = -EFAULT; - goto Failure2; - } - } - if (RequestSenseLength > 0) - { - if (copy_to_user(UserCommand.RequestSenseBuffer, - RequestSenseBuffer, RequestSenseLength)) { - ErrorCode = -EFAULT; - goto Failure2; - } - } - ErrorCode = CommandStatus; - Failure2: - pci_free_consistent(Controller->PCIDevice, abs(DataTransferLength), - DataTransferBuffer, DataTransferBufferDMA); - if (RequestSenseBuffer != NULL) - pci_free_consistent(Controller->PCIDevice, RequestSenseLength, - RequestSenseBuffer, RequestSenseBufferDMA); -out: - return ErrorCode; -} - -static long DAC960_gam_v2_get_health_status(DAC960_V2_GetHealthStatus_T __user *UserSpaceGetHealthStatus) -{ - DAC960_V2_GetHealthStatus_T GetHealthStatus; - DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer; - DAC960_Controller_T *Controller; - int ControllerNumber; - long ErrorCode; - - if (UserSpaceGetHealthStatus == NULL) { - ErrorCode = -EINVAL; - goto out; - } - if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus, - sizeof(DAC960_V2_GetHealthStatus_T))) { - ErrorCode = -EFAULT; - goto out; - } - ErrorCode = -ENXIO; - ControllerNumber = GetHealthStatus.ControllerNumber; - if (ControllerNumber < 0 || - ControllerNumber > DAC960_ControllerCount - 1) - goto out; - Controller = DAC960_Controllers[ControllerNumber]; - if (Controller == NULL) - goto out; - if (Controller->FirmwareType != DAC960_V2_Controller) { - ErrorCode = -EINVAL; - goto out; - } - if (copy_from_user(&HealthStatusBuffer, - GetHealthStatus.HealthStatusBuffer, - sizeof(DAC960_V2_HealthStatusBuffer_T))) { - ErrorCode = -EFAULT; - goto out; - } - ErrorCode = wait_event_interruptible_timeout(Controller->HealthStatusWaitQueue, - !(Controller->V2.HealthStatusBuffer->StatusChangeCounter - == HealthStatusBuffer.StatusChangeCounter && - Controller->V2.HealthStatusBuffer->NextEventSequenceNumber - == HealthStatusBuffer.NextEventSequenceNumber), - DAC960_MonitoringTimerInterval); - if (ErrorCode == -ERESTARTSYS) { - ErrorCode = -EINTR; - goto out; - } - if (copy_to_user(GetHealthStatus.HealthStatusBuffer, - Controller->V2.HealthStatusBuffer, - sizeof(DAC960_V2_HealthStatusBuffer_T))) - ErrorCode = -EFAULT; - else - ErrorCode = 0; - -out: - return ErrorCode; -} - -/* - * DAC960_gam_ioctl is the ioctl function for performing RAID operations. -*/ - -static long DAC960_gam_ioctl(struct file *file, unsigned int Request, - unsigned long Argument) -{ - long ErrorCode = 0; - void __user *argp = (void __user *)Argument; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - - mutex_lock(&DAC960_mutex); - switch (Request) - { - case DAC960_IOCTL_GET_CONTROLLER_COUNT: - ErrorCode = DAC960_ControllerCount; - break; - case DAC960_IOCTL_GET_CONTROLLER_INFO: - ErrorCode = DAC960_gam_get_controller_info(argp); - break; - case DAC960_IOCTL_V1_EXECUTE_COMMAND: - ErrorCode = DAC960_gam_v1_execute_command(argp); - break; - case DAC960_IOCTL_V2_EXECUTE_COMMAND: - ErrorCode = DAC960_gam_v2_execute_command(argp); - break; - case DAC960_IOCTL_V2_GET_HEALTH_STATUS: - ErrorCode = DAC960_gam_v2_get_health_status(argp); - break; - default: - ErrorCode = -ENOTTY; - } - mutex_unlock(&DAC960_mutex); - return ErrorCode; -} - -static const struct file_operations DAC960_gam_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = DAC960_gam_ioctl, - .llseek = noop_llseek, -}; - -static struct miscdevice DAC960_gam_dev = { - DAC960_GAM_MINOR, - "dac960_gam", - &DAC960_gam_fops -}; - -static int DAC960_gam_init(void) -{ - int ret; - - ret = misc_register(&DAC960_gam_dev); - if (ret) - printk(KERN_ERR "DAC960_gam: can't misc_register on minor %d\n", DAC960_GAM_MINOR); - return ret; -} - -static void DAC960_gam_cleanup(void) -{ - misc_deregister(&DAC960_gam_dev); -} - -#endif /* DAC960_GAM_MINOR */ - -static struct DAC960_privdata DAC960_GEM_privdata = { - .HardwareType = DAC960_GEM_Controller, - .FirmwareType = DAC960_V2_Controller, - .InterruptHandler = DAC960_GEM_InterruptHandler, - .MemoryWindowSize = DAC960_GEM_RegisterWindowSize, -}; - - -static struct DAC960_privdata DAC960_BA_privdata = { - .HardwareType = DAC960_BA_Controller, - .FirmwareType = DAC960_V2_Controller, - .InterruptHandler = DAC960_BA_InterruptHandler, - .MemoryWindowSize = DAC960_BA_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_LP_privdata = { - .HardwareType = DAC960_LP_Controller, - .FirmwareType = DAC960_V2_Controller, - .InterruptHandler = DAC960_LP_InterruptHandler, - .MemoryWindowSize = DAC960_LP_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_LA_privdata = { - .HardwareType = DAC960_LA_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_LA_InterruptHandler, - .MemoryWindowSize = DAC960_LA_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_PG_privdata = { - .HardwareType = DAC960_PG_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_PG_InterruptHandler, - .MemoryWindowSize = DAC960_PG_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_PD_privdata = { - .HardwareType = DAC960_PD_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_PD_InterruptHandler, - .MemoryWindowSize = DAC960_PD_RegisterWindowSize, -}; - -static struct DAC960_privdata DAC960_P_privdata = { - .HardwareType = DAC960_P_Controller, - .FirmwareType = DAC960_V1_Controller, - .InterruptHandler = DAC960_P_InterruptHandler, - .MemoryWindowSize = DAC960_PD_RegisterWindowSize, -}; - -static const struct pci_device_id DAC960_id_table[] = { - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_GEM, - .subvendor = PCI_VENDOR_ID_MYLEX, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_GEM_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_BA, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_BA_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_LP, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_LP_privdata, - }, - { - .vendor = PCI_VENDOR_ID_DEC, - .device = PCI_DEVICE_ID_DEC_21285, - .subvendor = PCI_VENDOR_ID_MYLEX, - .subdevice = PCI_DEVICE_ID_MYLEX_DAC960_LA, - .driver_data = (unsigned long) &DAC960_LA_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_PG, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_PG_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_PD, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_PD_privdata, - }, - { - .vendor = PCI_VENDOR_ID_MYLEX, - .device = PCI_DEVICE_ID_MYLEX_DAC960_P, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = (unsigned long) &DAC960_P_privdata, - }, - {0, }, -}; - -MODULE_DEVICE_TABLE(pci, DAC960_id_table); - -static struct pci_driver DAC960_pci_driver = { - .name = "DAC960", - .id_table = DAC960_id_table, - .probe = DAC960_Probe, - .remove = DAC960_Remove, -}; - -static int __init DAC960_init_module(void) -{ - int ret; - - ret = pci_register_driver(&DAC960_pci_driver); -#ifdef DAC960_GAM_MINOR - if (!ret) - DAC960_gam_init(); -#endif - return ret; -} - -static void __exit DAC960_cleanup_module(void) -{ - int i; - -#ifdef DAC960_GAM_MINOR - DAC960_gam_cleanup(); -#endif - - for (i = 0; i < DAC960_ControllerCount; i++) { - DAC960_Controller_T *Controller = DAC960_Controllers[i]; - if (Controller == NULL) - continue; - DAC960_FinalizeController(Controller); - } - if (DAC960_ProcDirectoryEntry != NULL) { - remove_proc_entry("rd/status", NULL); - remove_proc_entry("rd", NULL); - } - DAC960_ControllerCount = 0; - pci_unregister_driver(&DAC960_pci_driver); -} - -module_init(DAC960_init_module); -module_exit(DAC960_cleanup_module); - -MODULE_LICENSE("GPL"); diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h deleted file mode 100644 index 1439e651928b..000000000000 --- a/drivers/block/DAC960.h +++ /dev/null @@ -1,4414 +0,0 @@ -/* - - Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers - - Copyright 1998-2001 by Leonard N. Zubkoff - - This program is free software; you may redistribute and/or modify it under - the terms of the GNU General Public License Version 2 as published by the - Free Software Foundation. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for complete details. - - The author respectfully requests that any modifications to this software be - sent directly to him for evaluation and testing. - -*/ - - -/* - Define the maximum number of DAC960 Controllers supported by this driver. -*/ - -#define DAC960_MaxControllers 8 - - -/* - Define the maximum number of Controller Channels supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_V1_MaxChannels 3 -#define DAC960_V2_MaxChannels 4 - - -/* - Define the maximum number of Targets per Channel supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_V1_MaxTargets 16 -#define DAC960_V2_MaxTargets 128 - - -/* - Define the maximum number of Logical Drives supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_MaxLogicalDrives 32 - - -/* - Define the maximum number of Physical Devices supported by DAC960 - V1 and V2 Firmware Controllers. -*/ - -#define DAC960_V1_MaxPhysicalDevices 45 -#define DAC960_V2_MaxPhysicalDevices 272 - -/* - Define a 32/64 bit I/O Address data type. -*/ - -typedef unsigned long DAC960_IO_Address_T; - - -/* - Define a 32/64 bit PCI Bus Address data type. -*/ - -typedef unsigned long DAC960_PCI_Address_T; - - -/* - Define a 32 bit Bus Address data type. -*/ - -typedef unsigned int DAC960_BusAddress32_T; - - -/* - Define a 64 bit Bus Address data type. -*/ - -typedef unsigned long long DAC960_BusAddress64_T; - - -/* - Define a 32 bit Byte Count data type. -*/ - -typedef unsigned int DAC960_ByteCount32_T; - - -/* - Define a 64 bit Byte Count data type. -*/ - -typedef unsigned long long DAC960_ByteCount64_T; - - -/* - dma_loaf is used by helper routines to divide a region of - dma mapped memory into smaller pieces, where those pieces - are not of uniform size. - */ - -struct dma_loaf { - void *cpu_base; - dma_addr_t dma_base; - size_t length; - void *cpu_free; - dma_addr_t dma_free; -}; - -/* - Define the SCSI INQUIRY Standard Data structure. -*/ - -typedef struct DAC960_SCSI_Inquiry -{ - unsigned char PeripheralDeviceType:5; /* Byte 0 Bits 0-4 */ - unsigned char PeripheralQualifier:3; /* Byte 0 Bits 5-7 */ - unsigned char DeviceTypeModifier:7; /* Byte 1 Bits 0-6 */ - bool RMB:1; /* Byte 1 Bit 7 */ - unsigned char ANSI_ApprovedVersion:3; /* Byte 2 Bits 0-2 */ - unsigned char ECMA_Version:3; /* Byte 2 Bits 3-5 */ - unsigned char ISO_Version:2; /* Byte 2 Bits 6-7 */ - unsigned char ResponseDataFormat:4; /* Byte 3 Bits 0-3 */ - unsigned char :2; /* Byte 3 Bits 4-5 */ - bool TrmIOP:1; /* Byte 3 Bit 6 */ - bool AENC:1; /* Byte 3 Bit 7 */ - unsigned char AdditionalLength; /* Byte 4 */ - unsigned char :8; /* Byte 5 */ - unsigned char :8; /* Byte 6 */ - bool SftRe:1; /* Byte 7 Bit 0 */ - bool CmdQue:1; /* Byte 7 Bit 1 */ - bool :1; /* Byte 7 Bit 2 */ - bool Linked:1; /* Byte 7 Bit 3 */ - bool Sync:1; /* Byte 7 Bit 4 */ - bool WBus16:1; /* Byte 7 Bit 5 */ - bool WBus32:1; /* Byte 7 Bit 6 */ - bool RelAdr:1; /* Byte 7 Bit 7 */ - unsigned char VendorIdentification[8]; /* Bytes 8-15 */ - unsigned char ProductIdentification[16]; /* Bytes 16-31 */ - unsigned char ProductRevisionLevel[4]; /* Bytes 32-35 */ -} -DAC960_SCSI_Inquiry_T; - - -/* - Define the SCSI INQUIRY Unit Serial Number structure. -*/ - -typedef struct DAC960_SCSI_Inquiry_UnitSerialNumber -{ - unsigned char PeripheralDeviceType:5; /* Byte 0 Bits 0-4 */ - unsigned char PeripheralQualifier:3; /* Byte 0 Bits 5-7 */ - unsigned char PageCode; /* Byte 1 */ - unsigned char :8; /* Byte 2 */ - unsigned char PageLength; /* Byte 3 */ - unsigned char ProductSerialNumber[28]; /* Bytes 4-31 */ -} -DAC960_SCSI_Inquiry_UnitSerialNumber_T; - - -/* - Define the SCSI REQUEST SENSE Sense Key type. -*/ - -typedef enum -{ - DAC960_SenseKey_NoSense = 0x0, - DAC960_SenseKey_RecoveredError = 0x1, - DAC960_SenseKey_NotReady = 0x2, - DAC960_SenseKey_MediumError = 0x3, - DAC960_SenseKey_HardwareError = 0x4, - DAC960_SenseKey_IllegalRequest = 0x5, - DAC960_SenseKey_UnitAttention = 0x6, - DAC960_SenseKey_DataProtect = 0x7, - DAC960_SenseKey_BlankCheck = 0x8, - DAC960_SenseKey_VendorSpecific = 0x9, - DAC960_SenseKey_CopyAborted = 0xA, - DAC960_SenseKey_AbortedCommand = 0xB, - DAC960_SenseKey_Equal = 0xC, - DAC960_SenseKey_VolumeOverflow = 0xD, - DAC960_SenseKey_Miscompare = 0xE, - DAC960_SenseKey_Reserved = 0xF -} -__attribute__ ((packed)) -DAC960_SCSI_RequestSenseKey_T; - - -/* - Define the SCSI REQUEST SENSE structure. -*/ - -typedef struct DAC960_SCSI_RequestSense -{ - unsigned char ErrorCode:7; /* Byte 0 Bits 0-6 */ - bool Valid:1; /* Byte 0 Bit 7 */ - unsigned char SegmentNumber; /* Byte 1 */ - DAC960_SCSI_RequestSenseKey_T SenseKey:4; /* Byte 2 Bits 0-3 */ - unsigned char :1; /* Byte 2 Bit 4 */ - bool ILI:1; /* Byte 2 Bit 5 */ - bool EOM:1; /* Byte 2 Bit 6 */ - bool Filemark:1; /* Byte 2 Bit 7 */ - unsigned char Information[4]; /* Bytes 3-6 */ - unsigned char AdditionalSenseLength; /* Byte 7 */ - unsigned char CommandSpecificInformation[4]; /* Bytes 8-11 */ - unsigned char AdditionalSenseCode; /* Byte 12 */ - unsigned char AdditionalSenseCodeQualifier; /* Byte 13 */ -} -DAC960_SCSI_RequestSense_T; - - -/* - Define the DAC960 V1 Firmware Command Opcodes. -*/ - -typedef enum -{ - /* I/O Commands */ - DAC960_V1_ReadExtended = 0x33, - DAC960_V1_WriteExtended = 0x34, - DAC960_V1_ReadAheadExtended = 0x35, - DAC960_V1_ReadExtendedWithScatterGather = 0xB3, - DAC960_V1_WriteExtendedWithScatterGather = 0xB4, - DAC960_V1_Read = 0x36, - DAC960_V1_ReadWithScatterGather = 0xB6, - DAC960_V1_Write = 0x37, - DAC960_V1_WriteWithScatterGather = 0xB7, - DAC960_V1_DCDB = 0x04, - DAC960_V1_DCDBWithScatterGather = 0x84, - DAC960_V1_Flush = 0x0A, - /* Controller Status Related Commands */ - DAC960_V1_Enquiry = 0x53, - DAC960_V1_Enquiry2 = 0x1C, - DAC960_V1_GetLogicalDriveElement = 0x55, - DAC960_V1_GetLogicalDriveInformation = 0x19, - DAC960_V1_IOPortRead = 0x39, - DAC960_V1_IOPortWrite = 0x3A, - DAC960_V1_GetSDStats = 0x3E, - DAC960_V1_GetPDStats = 0x3F, - DAC960_V1_PerformEventLogOperation = 0x72, - /* Device Related Commands */ - DAC960_V1_StartDevice = 0x10, - DAC960_V1_GetDeviceState = 0x50, - DAC960_V1_StopChannel = 0x13, - DAC960_V1_StartChannel = 0x12, - DAC960_V1_ResetChannel = 0x1A, - /* Commands Associated with Data Consistency and Errors */ - DAC960_V1_Rebuild = 0x09, - DAC960_V1_RebuildAsync = 0x16, - DAC960_V1_CheckConsistency = 0x0F, - DAC960_V1_CheckConsistencyAsync = 0x1E, - DAC960_V1_RebuildStat = 0x0C, - DAC960_V1_GetRebuildProgress = 0x27, - DAC960_V1_RebuildControl = 0x1F, - DAC960_V1_ReadBadBlockTable = 0x0B, - DAC960_V1_ReadBadDataTable = 0x25, - DAC960_V1_ClearBadDataTable = 0x26, - DAC960_V1_GetErrorTable = 0x17, - DAC960_V1_AddCapacityAsync = 0x2A, - DAC960_V1_BackgroundInitializationControl = 0x2B, - /* Configuration Related Commands */ - DAC960_V1_ReadConfig2 = 0x3D, - DAC960_V1_WriteConfig2 = 0x3C, - DAC960_V1_ReadConfigurationOnDisk = 0x4A, - DAC960_V1_WriteConfigurationOnDisk = 0x4B, - DAC960_V1_ReadConfiguration = 0x4E, - DAC960_V1_ReadBackupConfiguration = 0x4D, - DAC960_V1_WriteConfiguration = 0x4F, - DAC960_V1_AddConfiguration = 0x4C, - DAC960_V1_ReadConfigurationLabel = 0x48, - DAC960_V1_WriteConfigurationLabel = 0x49, - /* Firmware Upgrade Related Commands */ - DAC960_V1_LoadImage = 0x20, - DAC960_V1_StoreImage = 0x21, - DAC960_V1_ProgramImage = 0x22, - /* Diagnostic Commands */ - DAC960_V1_SetDiagnosticMode = 0x31, - DAC960_V1_RunDiagnostic = 0x32, - /* Subsystem Service Commands */ - DAC960_V1_GetSubsystemData = 0x70, - DAC960_V1_SetSubsystemParameters = 0x71, - /* Version 2.xx Firmware Commands */ - DAC960_V1_Enquiry_Old = 0x05, - DAC960_V1_GetDeviceState_Old = 0x14, - DAC960_V1_Read_Old = 0x02, - DAC960_V1_Write_Old = 0x03, - DAC960_V1_ReadWithScatterGather_Old = 0x82, - DAC960_V1_WriteWithScatterGather_Old = 0x83 -} -__attribute__ ((packed)) -DAC960_V1_CommandOpcode_T; - - -/* - Define the DAC960 V1 Firmware Command Identifier type. -*/ - -typedef unsigned char DAC960_V1_CommandIdentifier_T; - - -/* - Define the DAC960 V1 Firmware Command Status Codes. -*/ - -#define DAC960_V1_NormalCompletion 0x0000 /* Common */ -#define DAC960_V1_CheckConditionReceived 0x0002 /* Common */ -#define DAC960_V1_NoDeviceAtAddress 0x0102 /* Common */ -#define DAC960_V1_InvalidDeviceAddress 0x0105 /* Common */ -#define DAC960_V1_InvalidParameter 0x0105 /* Common */ -#define DAC960_V1_IrrecoverableDataError 0x0001 /* I/O */ -#define DAC960_V1_LogicalDriveNonexistentOrOffline 0x0002 /* I/O */ -#define DAC960_V1_AccessBeyondEndOfLogicalDrive 0x0105 /* I/O */ -#define DAC960_V1_BadDataEncountered 0x010C /* I/O */ -#define DAC960_V1_DeviceBusy 0x0008 /* DCDB */ -#define DAC960_V1_DeviceNonresponsive 0x000E /* DCDB */ -#define DAC960_V1_CommandTerminatedAbnormally 0x000F /* DCDB */ -#define DAC960_V1_UnableToStartDevice 0x0002 /* Device */ -#define DAC960_V1_InvalidChannelOrTargetOrModifier 0x0105 /* Device */ -#define DAC960_V1_ChannelBusy 0x0106 /* Device */ -#define DAC960_V1_ChannelNotStopped 0x0002 /* Device */ -#define DAC960_V1_AttemptToRebuildOnlineDrive 0x0002 /* Consistency */ -#define DAC960_V1_RebuildBadBlocksEncountered 0x0003 /* Consistency */ -#define DAC960_V1_NewDiskFailedDuringRebuild 0x0004 /* Consistency */ -#define DAC960_V1_RebuildOrCheckAlreadyInProgress 0x0106 /* Consistency */ -#define DAC960_V1_DependentDiskIsDead 0x0002 /* Consistency */ -#define DAC960_V1_InconsistentBlocksFound 0x0003 /* Consistency */ -#define DAC960_V1_InvalidOrNonredundantLogicalDrive 0x0105 /* Consistency */ -#define DAC960_V1_NoRebuildOrCheckInProgress 0x0105 /* Consistency */ -#define DAC960_V1_RebuildInProgress_DataValid 0x0000 /* Consistency */ -#define DAC960_V1_RebuildFailed_LogicalDriveFailure 0x0002 /* Consistency */ -#define DAC960_V1_RebuildFailed_BadBlocksOnOther 0x0003 /* Consistency */ -#define DAC960_V1_RebuildFailed_NewDriveFailed 0x0004 /* Consistency */ -#define DAC960_V1_RebuildSuccessful 0x0100 /* Consistency */ -#define DAC960_V1_RebuildSuccessfullyTerminated 0x0107 /* Consistency */ -#define DAC960_V1_BackgroundInitSuccessful 0x0100 /* Consistency */ -#define DAC960_V1_BackgroundInitAborted 0x0005 /* Consistency */ -#define DAC960_V1_NoBackgroundInitInProgress 0x0105 /* Consistency */ -#define DAC960_V1_AddCapacityInProgress 0x0004 /* Consistency */ -#define DAC960_V1_AddCapacityFailedOrSuspended 0x00F4 /* Consistency */ -#define DAC960_V1_Config2ChecksumError 0x0002 /* Configuration */ -#define DAC960_V1_ConfigurationSuspended 0x0106 /* Configuration */ -#define DAC960_V1_FailedToConfigureNVRAM 0x0105 /* Configuration */ -#define DAC960_V1_ConfigurationNotSavedStateChange 0x0106 /* Configuration */ -#define DAC960_V1_SubsystemNotInstalled 0x0001 /* Subsystem */ -#define DAC960_V1_SubsystemFailed 0x0002 /* Subsystem */ -#define DAC960_V1_SubsystemBusy 0x0106 /* Subsystem */ - -typedef unsigned short DAC960_V1_CommandStatus_T; - - -/* - Define the DAC960 V1 Firmware Enquiry Command reply structure. -*/ - -typedef struct DAC960_V1_Enquiry -{ - unsigned char NumberOfLogicalDrives; /* Byte 0 */ - unsigned int :24; /* Bytes 1-3 */ - unsigned int LogicalDriveSizes[32]; /* Bytes 4-131 */ - unsigned short FlashAge; /* Bytes 132-133 */ - struct { - bool DeferredWriteError:1; /* Byte 134 Bit 0 */ - bool BatteryLow:1; /* Byte 134 Bit 1 */ - unsigned char :6; /* Byte 134 Bits 2-7 */ - } StatusFlags; - unsigned char :8; /* Byte 135 */ - unsigned char MinorFirmwareVersion; /* Byte 136 */ - unsigned char MajorFirmwareVersion; /* Byte 137 */ - enum { - DAC960_V1_NoStandbyRebuildOrCheckInProgress = 0x00, - DAC960_V1_StandbyRebuildInProgress = 0x01, - DAC960_V1_BackgroundRebuildInProgress = 0x02, - DAC960_V1_BackgroundCheckInProgress = 0x03, - DAC960_V1_StandbyRebuildCompletedWithError = 0xFF, - DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed = 0xF0, - DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed = 0xF1, - DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses = 0xF2, - DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated = 0xF3 - } __attribute__ ((packed)) RebuildFlag; /* Byte 138 */ - unsigned char MaxCommands; /* Byte 139 */ - unsigned char OfflineLogicalDriveCount; /* Byte 140 */ - unsigned char :8; /* Byte 141 */ - unsigned short EventLogSequenceNumber; /* Bytes 142-143 */ - unsigned char CriticalLogicalDriveCount; /* Byte 144 */ - unsigned int :24; /* Bytes 145-147 */ - unsigned char DeadDriveCount; /* Byte 148 */ - unsigned char :8; /* Byte 149 */ - unsigned char RebuildCount; /* Byte 150 */ - struct { - unsigned char :3; /* Byte 151 Bits 0-2 */ - bool BatteryBackupUnitPresent:1; /* Byte 151 Bit 3 */ - unsigned char :3; /* Byte 151 Bits 4-6 */ - unsigned char :1; /* Byte 151 Bit 7 */ - } MiscFlags; - struct { - unsigned char TargetID; - unsigned char Channel; - } DeadDrives[21]; /* Bytes 152-194 */ - unsigned char Reserved[62]; /* Bytes 195-255 */ -} -__attribute__ ((packed)) -DAC960_V1_Enquiry_T; - - -/* - Define the DAC960 V1 Firmware Enquiry2 Command reply structure. -*/ - -typedef struct DAC960_V1_Enquiry2 -{ - struct { - enum { - DAC960_V1_P_PD_PU = 0x01, - DAC960_V1_PL = 0x02, - DAC960_V1_PG = 0x10, - DAC960_V1_PJ = 0x11, - DAC960_V1_PR = 0x12, - DAC960_V1_PT = 0x13, - DAC960_V1_PTL0 = 0x14, - DAC960_V1_PRL = 0x15, - DAC960_V1_PTL1 = 0x16, - DAC960_V1_1164P = 0x20 - } __attribute__ ((packed)) SubModel; /* Byte 0 */ - unsigned char ActualChannels; /* Byte 1 */ - enum { - DAC960_V1_FiveChannelBoard = 0x01, - DAC960_V1_ThreeChannelBoard = 0x02, - DAC960_V1_TwoChannelBoard = 0x03, - DAC960_V1_ThreeChannelASIC_DAC = 0x04 - } __attribute__ ((packed)) Model; /* Byte 2 */ - enum { - DAC960_V1_EISA_Controller = 0x01, - DAC960_V1_MicroChannel_Controller = 0x02, - DAC960_V1_PCI_Controller = 0x03, - DAC960_V1_SCSItoSCSI_Controller = 0x08 - } __attribute__ ((packed)) ProductFamily; /* Byte 3 */ - } HardwareID; /* Bytes 0-3 */ - /* MajorVersion.MinorVersion-FirmwareType-TurnID */ - struct { - unsigned char MajorVersion; /* Byte 4 */ - unsigned char MinorVersion; /* Byte 5 */ - unsigned char TurnID; /* Byte 6 */ - char FirmwareType; /* Byte 7 */ - } FirmwareID; /* Bytes 4-7 */ - unsigned char :8; /* Byte 8 */ - unsigned int :24; /* Bytes 9-11 */ - unsigned char ConfiguredChannels; /* Byte 12 */ - unsigned char ActualChannels; /* Byte 13 */ - unsigned char MaxTargets; /* Byte 14 */ - unsigned char MaxTags; /* Byte 15 */ - unsigned char MaxLogicalDrives; /* Byte 16 */ - unsigned char MaxArms; /* Byte 17 */ - unsigned char MaxSpans; /* Byte 18 */ - unsigned char :8; /* Byte 19 */ - unsigned int :32; /* Bytes 20-23 */ - unsigned int MemorySize; /* Bytes 24-27 */ - unsigned int CacheSize; /* Bytes 28-31 */ - unsigned int FlashMemorySize; /* Bytes 32-35 */ - unsigned int NonVolatileMemorySize; /* Bytes 36-39 */ - struct { - enum { - DAC960_V1_RamType_DRAM = 0x0, - DAC960_V1_RamType_EDO = 0x1, - DAC960_V1_RamType_SDRAM = 0x2, - DAC960_V1_RamType_Last = 0x7 - } __attribute__ ((packed)) RamType:3; /* Byte 40 Bits 0-2 */ - enum { - DAC960_V1_ErrorCorrection_None = 0x0, - DAC960_V1_ErrorCorrection_Parity = 0x1, - DAC960_V1_ErrorCorrection_ECC = 0x2, - DAC960_V1_ErrorCorrection_Last = 0x7 - } __attribute__ ((packed)) ErrorCorrection:3; /* Byte 40 Bits 3-5 */ - bool FastPageMode:1; /* Byte 40 Bit 6 */ - bool LowPowerMemory:1; /* Byte 40 Bit 7 */ - unsigned char :8; /* Bytes 41 */ - } MemoryType; - unsigned short ClockSpeed; /* Bytes 42-43 */ - unsigned short MemorySpeed; /* Bytes 44-45 */ - unsigned short HardwareSpeed; /* Bytes 46-47 */ - unsigned int :32; /* Bytes 48-51 */ - unsigned int :32; /* Bytes 52-55 */ - unsigned char :8; /* Byte 56 */ - unsigned char :8; /* Byte 57 */ - unsigned short :16; /* Bytes 58-59 */ - unsigned short MaxCommands; /* Bytes 60-61 */ - unsigned short MaxScatterGatherEntries; /* Bytes 62-63 */ - unsigned short MaxDriveCommands; /* Bytes 64-65 */ - unsigned short MaxIODescriptors; /* Bytes 66-67 */ - unsigned short MaxCombinedSectors; /* Bytes 68-69 */ - unsigned char Latency; /* Byte 70 */ - unsigned char :8; /* Byte 71 */ - unsigned char SCSITimeout; /* Byte 72 */ - unsigned char :8; /* Byte 73 */ - unsigned short MinFreeLines; /* Bytes 74-75 */ - unsigned int :32; /* Bytes 76-79 */ - unsigned int :32; /* Bytes 80-83 */ - unsigned char RebuildRateConstant; /* Byte 84 */ - unsigned char :8; /* Byte 85 */ - unsigned char :8; /* Byte 86 */ - unsigned char :8; /* Byte 87 */ - unsigned int :32; /* Bytes 88-91 */ - unsigned int :32; /* Bytes 92-95 */ - unsigned short PhysicalDriveBlockSize; /* Bytes 96-97 */ - unsigned short LogicalDriveBlockSize; /* Bytes 98-99 */ - unsigned short MaxBlocksPerCommand; /* Bytes 100-101 */ - unsigned short BlockFactor; /* Bytes 102-103 */ - unsigned short CacheLineSize; /* Bytes 104-105 */ - struct { - enum { - DAC960_V1_Narrow_8bit = 0x0, - DAC960_V1_Wide_16bit = 0x1, - DAC960_V1_Wide_32bit = 0x2 - } __attribute__ ((packed)) BusWidth:2; /* Byte 106 Bits 0-1 */ - enum { - DAC960_V1_Fast = 0x0, - DAC960_V1_Ultra = 0x1, - DAC960_V1_Ultra2 = 0x2 - } __attribute__ ((packed)) BusSpeed:2; /* Byte 106 Bits 2-3 */ - bool Differential:1; /* Byte 106 Bit 4 */ - unsigned char :3; /* Byte 106 Bits 5-7 */ - } SCSICapability; - unsigned char :8; /* Byte 107 */ - unsigned int :32; /* Bytes 108-111 */ - unsigned short FirmwareBuildNumber; /* Bytes 112-113 */ - enum { - DAC960_V1_AEMI = 0x01, - DAC960_V1_OEM1 = 0x02, - DAC960_V1_OEM2 = 0x04, - DAC960_V1_OEM3 = 0x08, - DAC960_V1_Conner = 0x10, - DAC960_V1_SAFTE = 0x20 - } __attribute__ ((packed)) FaultManagementType; /* Byte 114 */ - unsigned char :8; /* Byte 115 */ - struct { - bool Clustering:1; /* Byte 116 Bit 0 */ - bool MylexOnlineRAIDExpansion:1; /* Byte 116 Bit 1 */ - bool ReadAhead:1; /* Byte 116 Bit 2 */ - bool BackgroundInitialization:1; /* Byte 116 Bit 3 */ - unsigned int :28; /* Bytes 116-119 */ - } FirmwareFeatures; - unsigned int :32; /* Bytes 120-123 */ - unsigned int :32; /* Bytes 124-127 */ -} -DAC960_V1_Enquiry2_T; - - -/* - Define the DAC960 V1 Firmware Logical Drive State type. -*/ - -typedef enum -{ - DAC960_V1_LogicalDrive_Online = 0x03, - DAC960_V1_LogicalDrive_Critical = 0x04, - DAC960_V1_LogicalDrive_Offline = 0xFF -} -__attribute__ ((packed)) -DAC960_V1_LogicalDriveState_T; - - -/* - Define the DAC960 V1 Firmware Logical Drive Information structure. -*/ - -typedef struct DAC960_V1_LogicalDriveInformation -{ - unsigned int LogicalDriveSize; /* Bytes 0-3 */ - DAC960_V1_LogicalDriveState_T LogicalDriveState; /* Byte 4 */ - unsigned char RAIDLevel:7; /* Byte 5 Bits 0-6 */ - bool WriteBack:1; /* Byte 5 Bit 7 */ - unsigned short :16; /* Bytes 6-7 */ -} -DAC960_V1_LogicalDriveInformation_T; - - -/* - Define the DAC960 V1 Firmware Get Logical Drive Information Command - reply structure. -*/ - -typedef DAC960_V1_LogicalDriveInformation_T - DAC960_V1_LogicalDriveInformationArray_T[DAC960_MaxLogicalDrives]; - - -/* - Define the DAC960 V1 Firmware Perform Event Log Operation Types. -*/ - -typedef enum -{ - DAC960_V1_GetEventLogEntry = 0x00 -} -__attribute__ ((packed)) -DAC960_V1_PerformEventLogOpType_T; - - -/* - Define the DAC960 V1 Firmware Get Event Log Entry Command reply structure. -*/ - -typedef struct DAC960_V1_EventLogEntry -{ - unsigned char MessageType; /* Byte 0 */ - unsigned char MessageLength; /* Byte 1 */ - unsigned char TargetID:5; /* Byte 2 Bits 0-4 */ - unsigned char Channel:3; /* Byte 2 Bits 5-7 */ - unsigned char LogicalUnit:6; /* Byte 3 Bits 0-5 */ - unsigned char :2; /* Byte 3 Bits 6-7 */ - unsigned short SequenceNumber; /* Bytes 4-5 */ - unsigned char ErrorCode:7; /* Byte 6 Bits 0-6 */ - bool Valid:1; /* Byte 6 Bit 7 */ - unsigned char SegmentNumber; /* Byte 7 */ - DAC960_SCSI_RequestSenseKey_T SenseKey:4; /* Byte 8 Bits 0-3 */ - unsigned char :1; /* Byte 8 Bit 4 */ - bool ILI:1; /* Byte 8 Bit 5 */ - bool EOM:1; /* Byte 8 Bit 6 */ - bool Filemark:1; /* Byte 8 Bit 7 */ - unsigned char Information[4]; /* Bytes 9-12 */ - unsigned char AdditionalSenseLength; /* Byte 13 */ - unsigned char CommandSpecificInformation[4]; /* Bytes 14-17 */ - unsigned char AdditionalSenseCode; /* Byte 18 */ - unsigned char AdditionalSenseCodeQualifier; /* Byte 19 */ - unsigned char Dummy[12]; /* Bytes 20-31 */ -} -DAC960_V1_EventLogEntry_T; - - -/* - Define the DAC960 V1 Firmware Physical Device State type. -*/ - -typedef enum -{ - DAC960_V1_Device_Dead = 0x00, - DAC960_V1_Device_WriteOnly = 0x02, - DAC960_V1_Device_Online = 0x03, - DAC960_V1_Device_Standby = 0x10 -} -__attribute__ ((packed)) -DAC960_V1_PhysicalDeviceState_T; - - -/* - Define the DAC960 V1 Firmware Get Device State Command reply structure. - The structure is padded by 2 bytes for compatibility with Version 2.xx - Firmware. -*/ - -typedef struct DAC960_V1_DeviceState -{ - bool Present:1; /* Byte 0 Bit 0 */ - unsigned char :7; /* Byte 0 Bits 1-7 */ - enum { - DAC960_V1_OtherType = 0x0, - DAC960_V1_DiskType = 0x1, - DAC960_V1_SequentialType = 0x2, - DAC960_V1_CDROM_or_WORM_Type = 0x3 - } __attribute__ ((packed)) DeviceType:2; /* Byte 1 Bits 0-1 */ - bool :1; /* Byte 1 Bit 2 */ - bool Fast20:1; /* Byte 1 Bit 3 */ - bool Sync:1; /* Byte 1 Bit 4 */ - bool Fast:1; /* Byte 1 Bit 5 */ - bool Wide:1; /* Byte 1 Bit 6 */ - bool TaggedQueuingSupported:1; /* Byte 1 Bit 7 */ - DAC960_V1_PhysicalDeviceState_T DeviceState; /* Byte 2 */ - unsigned char :8; /* Byte 3 */ - unsigned char SynchronousMultiplier; /* Byte 4 */ - unsigned char SynchronousOffset:5; /* Byte 5 Bits 0-4 */ - unsigned char :3; /* Byte 5 Bits 5-7 */ - unsigned int DiskSize __attribute__ ((packed)); /* Bytes 6-9 */ - unsigned short :16; /* Bytes 10-11 */ -} -DAC960_V1_DeviceState_T; - - -/* - Define the DAC960 V1 Firmware Get Rebuild Progress Command reply structure. -*/ - -typedef struct DAC960_V1_RebuildProgress -{ - unsigned int LogicalDriveNumber; /* Bytes 0-3 */ - unsigned int LogicalDriveSize; /* Bytes 4-7 */ - unsigned int RemainingBlocks; /* Bytes 8-11 */ -} -DAC960_V1_RebuildProgress_T; - - -/* - Define the DAC960 V1 Firmware Background Initialization Status Command - reply structure. -*/ - -typedef struct DAC960_V1_BackgroundInitializationStatus -{ - unsigned int LogicalDriveSize; /* Bytes 0-3 */ - unsigned int BlocksCompleted; /* Bytes 4-7 */ - unsigned char Reserved1[12]; /* Bytes 8-19 */ - unsigned int LogicalDriveNumber; /* Bytes 20-23 */ - unsigned char RAIDLevel; /* Byte 24 */ - enum { - DAC960_V1_BackgroundInitializationInvalid = 0x00, - DAC960_V1_BackgroundInitializationStarted = 0x02, - DAC960_V1_BackgroundInitializationInProgress = 0x04, - DAC960_V1_BackgroundInitializationSuspended = 0x05, - DAC960_V1_BackgroundInitializationCancelled = 0x06 - } __attribute__ ((packed)) Status; /* Byte 25 */ - unsigned char Reserved2[6]; /* Bytes 26-31 */ -} -DAC960_V1_BackgroundInitializationStatus_T; - - -/* - Define the DAC960 V1 Firmware Error Table Entry structure. -*/ - -typedef struct DAC960_V1_ErrorTableEntry -{ - unsigned char ParityErrorCount; /* Byte 0 */ - unsigned char SoftErrorCount; /* Byte 1 */ - unsigned char HardErrorCount; /* Byte 2 */ - unsigned char MiscErrorCount; /* Byte 3 */ -} -DAC960_V1_ErrorTableEntry_T; - - -/* - Define the DAC960 V1 Firmware Get Error Table Command reply structure. -*/ - -typedef struct DAC960_V1_ErrorTable -{ - DAC960_V1_ErrorTableEntry_T - ErrorTableEntries[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; -} -DAC960_V1_ErrorTable_T; - - -/* - Define the DAC960 V1 Firmware Read Config2 Command reply structure. -*/ - -typedef struct DAC960_V1_Config2 -{ - unsigned char :1; /* Byte 0 Bit 0 */ - bool ActiveNegationEnabled:1; /* Byte 0 Bit 1 */ - unsigned char :5; /* Byte 0 Bits 2-6 */ - bool NoRescanIfResetReceivedDuringScan:1; /* Byte 0 Bit 7 */ - bool StorageWorksSupportEnabled:1; /* Byte 1 Bit 0 */ - bool HewlettPackardSupportEnabled:1; /* Byte 1 Bit 1 */ - bool NoDisconnectOnFirstCommand:1; /* Byte 1 Bit 2 */ - unsigned char :2; /* Byte 1 Bits 3-4 */ - bool AEMI_ARM:1; /* Byte 1 Bit 5 */ - bool AEMI_OFM:1; /* Byte 1 Bit 6 */ - unsigned char :1; /* Byte 1 Bit 7 */ - enum { - DAC960_V1_OEMID_Mylex = 0x00, - DAC960_V1_OEMID_IBM = 0x08, - DAC960_V1_OEMID_HP = 0x0A, - DAC960_V1_OEMID_DEC = 0x0C, - DAC960_V1_OEMID_Siemens = 0x10, - DAC960_V1_OEMID_Intel = 0x12 - } __attribute__ ((packed)) OEMID; /* Byte 2 */ - unsigned char OEMModelNumber; /* Byte 3 */ - unsigned char PhysicalSector; /* Byte 4 */ - unsigned char LogicalSector; /* Byte 5 */ - unsigned char BlockFactor; /* Byte 6 */ - bool ReadAheadEnabled:1; /* Byte 7 Bit 0 */ - bool LowBIOSDelay:1; /* Byte 7 Bit 1 */ - unsigned char :2; /* Byte 7 Bits 2-3 */ - bool ReassignRestrictedToOneSector:1; /* Byte 7 Bit 4 */ - unsigned char :1; /* Byte 7 Bit 5 */ - bool ForceUnitAccessDuringWriteRecovery:1; /* Byte 7 Bit 6 */ - bool EnableLeftSymmetricRAID5Algorithm:1; /* Byte 7 Bit 7 */ - unsigned char DefaultRebuildRate; /* Byte 8 */ - unsigned char :8; /* Byte 9 */ - unsigned char BlocksPerCacheLine; /* Byte 10 */ - unsigned char BlocksPerStripe; /* Byte 11 */ - struct { - enum { - DAC960_V1_Async = 0x0, - DAC960_V1_Sync_8MHz = 0x1, - DAC960_V1_Sync_5MHz = 0x2, - DAC960_V1_Sync_10or20MHz = 0x3 /* Byte 11 Bits 0-1 */ - } __attribute__ ((packed)) Speed:2; - bool Force8Bit:1; /* Byte 11 Bit 2 */ - bool DisableFast20:1; /* Byte 11 Bit 3 */ - unsigned char :3; /* Byte 11 Bits 4-6 */ - bool EnableTaggedQueuing:1; /* Byte 11 Bit 7 */ - } __attribute__ ((packed)) ChannelParameters[6]; /* Bytes 12-17 */ - unsigned char SCSIInitiatorID; /* Byte 18 */ - unsigned char :8; /* Byte 19 */ - enum { - DAC960_V1_StartupMode_ControllerSpinUp = 0x00, - DAC960_V1_StartupMode_PowerOnSpinUp = 0x01 - } __attribute__ ((packed)) StartupMode; /* Byte 20 */ - unsigned char SimultaneousDeviceSpinUpCount; /* Byte 21 */ - unsigned char SecondsDelayBetweenSpinUps; /* Byte 22 */ - unsigned char Reserved1[29]; /* Bytes 23-51 */ - bool BIOSDisabled:1; /* Byte 52 Bit 0 */ - bool CDROMBootEnabled:1; /* Byte 52 Bit 1 */ - unsigned char :3; /* Byte 52 Bits 2-4 */ - enum { - DAC960_V1_Geometry_128_32 = 0x0, - DAC960_V1_Geometry_255_63 = 0x1, - DAC960_V1_Geometry_Reserved1 = 0x2, - DAC960_V1_Geometry_Reserved2 = 0x3 - } __attribute__ ((packed)) DriveGeometry:2; /* Byte 52 Bits 5-6 */ - unsigned char :1; /* Byte 52 Bit 7 */ - unsigned char Reserved2[9]; /* Bytes 53-61 */ - unsigned short Checksum; /* Bytes 62-63 */ -} -DAC960_V1_Config2_T; - - -/* - Define the DAC960 V1 Firmware DCDB request structure. -*/ - -typedef struct DAC960_V1_DCDB -{ - unsigned char TargetID:4; /* Byte 0 Bits 0-3 */ - unsigned char Channel:4; /* Byte 0 Bits 4-7 */ - enum { - DAC960_V1_DCDB_NoDataTransfer = 0, - DAC960_V1_DCDB_DataTransferDeviceToSystem = 1, - DAC960_V1_DCDB_DataTransferSystemToDevice = 2, - DAC960_V1_DCDB_IllegalDataTransfer = 3 - } __attribute__ ((packed)) Direction:2; /* Byte 1 Bits 0-1 */ - bool EarlyStatus:1; /* Byte 1 Bit 2 */ - unsigned char :1; /* Byte 1 Bit 3 */ - enum { - DAC960_V1_DCDB_Timeout_24_hours = 0, - DAC960_V1_DCDB_Timeout_10_seconds = 1, - DAC960_V1_DCDB_Timeout_60_seconds = 2, - DAC960_V1_DCDB_Timeout_10_minutes = 3 - } __attribute__ ((packed)) Timeout:2; /* Byte 1 Bits 4-5 */ - bool NoAutomaticRequestSense:1; /* Byte 1 Bit 6 */ - bool DisconnectPermitted:1; /* Byte 1 Bit 7 */ - unsigned short TransferLength; /* Bytes 2-3 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 4-7 */ - unsigned char CDBLength:4; /* Byte 8 Bits 0-3 */ - unsigned char TransferLengthHigh4:4; /* Byte 8 Bits 4-7 */ - unsigned char SenseLength; /* Byte 9 */ - unsigned char CDB[12]; /* Bytes 10-21 */ - unsigned char SenseData[64]; /* Bytes 22-85 */ - unsigned char Status; /* Byte 86 */ - unsigned char :8; /* Byte 87 */ -} -DAC960_V1_DCDB_T; - - -/* - Define the DAC960 V1 Firmware Scatter/Gather List Type 1 32 Bit Address - 32 Bit Byte Count structure. -*/ - -typedef struct DAC960_V1_ScatterGatherSegment -{ - DAC960_BusAddress32_T SegmentDataPointer; /* Bytes 0-3 */ - DAC960_ByteCount32_T SegmentByteCount; /* Bytes 4-7 */ -} -DAC960_V1_ScatterGatherSegment_T; - - -/* - Define the 13 Byte DAC960 V1 Firmware Command Mailbox structure. Bytes 13-15 - are not used. The Command Mailbox structure is padded to 16 bytes for - efficient access. -*/ - -typedef union DAC960_V1_CommandMailbox -{ - unsigned int Words[4]; /* Words 0-3 */ - unsigned char Bytes[16]; /* Bytes 0-15 */ - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy[14]; /* Bytes 2-15 */ - } __attribute__ ((packed)) Common; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy1[6]; /* Bytes 2-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char CommandOpcode2; /* Byte 2 */ - unsigned char Dummy1[5]; /* Bytes 3-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3B; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy1[5]; /* Bytes 2-6 */ - unsigned char LogicalDriveNumber:6; /* Byte 7 Bits 0-6 */ - bool AutoRestore:1; /* Byte 7 Bit 7 */ - unsigned char Dummy2[8]; /* Bytes 8-15 */ - } __attribute__ ((packed)) Type3C; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Channel; /* Byte 2 */ - unsigned char TargetID; /* Byte 3 */ - DAC960_V1_PhysicalDeviceState_T DeviceState:5; /* Byte 4 Bits 0-4 */ - unsigned char Modifier:3; /* Byte 4 Bits 5-7 */ - unsigned char Dummy1[3]; /* Bytes 5-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3D; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - DAC960_V1_PerformEventLogOpType_T OperationType; /* Byte 2 */ - unsigned char OperationQualifier; /* Byte 3 */ - unsigned short SequenceNumber; /* Bytes 4-5 */ - unsigned char Dummy1[2]; /* Bytes 6-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy2[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3E; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char Dummy1[2]; /* Bytes 2-3 */ - unsigned char RebuildRateConstant; /* Byte 4 */ - unsigned char Dummy2[3]; /* Bytes 5-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char Dummy3[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) Type3R; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned short TransferLength; /* Bytes 2-3 */ - unsigned int LogicalBlockAddress; /* Bytes 4-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char LogicalDriveNumber; /* Byte 12 */ - unsigned char Dummy[3]; /* Bytes 13-15 */ - } __attribute__ ((packed)) Type4; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - struct { - unsigned short TransferLength:11; /* Bytes 2-3 */ - unsigned char LogicalDriveNumber:5; /* Byte 3 Bits 3-7 */ - } __attribute__ ((packed)) LD; - unsigned int LogicalBlockAddress; /* Bytes 4-7 */ - DAC960_BusAddress32_T BusAddress; /* Bytes 8-11 */ - unsigned char ScatterGatherCount:6; /* Byte 12 Bits 0-5 */ - enum { - DAC960_V1_ScatterGather_32BitAddress_32BitByteCount = 0x0, - DAC960_V1_ScatterGather_32BitAddress_16BitByteCount = 0x1, - DAC960_V1_ScatterGather_32BitByteCount_32BitAddress = 0x2, - DAC960_V1_ScatterGather_16BitByteCount_32BitAddress = 0x3 - } __attribute__ ((packed)) ScatterGatherType:2; /* Byte 12 Bits 6-7 */ - unsigned char Dummy[3]; /* Bytes 13-15 */ - } __attribute__ ((packed)) Type5; - struct { - DAC960_V1_CommandOpcode_T CommandOpcode; /* Byte 0 */ - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 1 */ - unsigned char CommandOpcode2; /* Byte 2 */ - unsigned char :8; /* Byte 3 */ - DAC960_BusAddress32_T CommandMailboxesBusAddress; /* Bytes 4-7 */ - DAC960_BusAddress32_T StatusMailboxesBusAddress; /* Bytes 8-11 */ - unsigned char Dummy[4]; /* Bytes 12-15 */ - } __attribute__ ((packed)) TypeX; -} -DAC960_V1_CommandMailbox_T; - - -/* - Define the DAC960 V2 Firmware Command Opcodes. -*/ - -typedef enum -{ - DAC960_V2_MemCopy = 0x01, - DAC960_V2_SCSI_10_Passthru = 0x02, - DAC960_V2_SCSI_255_Passthru = 0x03, - DAC960_V2_SCSI_10 = 0x04, - DAC960_V2_SCSI_256 = 0x05, - DAC960_V2_IOCTL = 0x20 -} -__attribute__ ((packed)) -DAC960_V2_CommandOpcode_T; - - -/* - Define the DAC960 V2 Firmware IOCTL Opcodes. -*/ - -typedef enum -{ - DAC960_V2_GetControllerInfo = 0x01, - DAC960_V2_GetLogicalDeviceInfoValid = 0x03, - DAC960_V2_GetPhysicalDeviceInfoValid = 0x05, - DAC960_V2_GetHealthStatus = 0x11, - DAC960_V2_GetEvent = 0x15, - DAC960_V2_StartDiscovery = 0x81, - DAC960_V2_SetDeviceState = 0x82, - DAC960_V2_RebuildDeviceStart = 0x88, - DAC960_V2_RebuildDeviceStop = 0x89, - DAC960_V2_ConsistencyCheckStart = 0x8C, - DAC960_V2_ConsistencyCheckStop = 0x8D, - DAC960_V2_SetMemoryMailbox = 0x8E, - DAC960_V2_PauseDevice = 0x92, - DAC960_V2_TranslatePhysicalToLogicalDevice = 0xC5 -} -__attribute__ ((packed)) -DAC960_V2_IOCTL_Opcode_T; - - -/* - Define the DAC960 V2 Firmware Command Identifier type. -*/ - -typedef unsigned short DAC960_V2_CommandIdentifier_T; - - -/* - Define the DAC960 V2 Firmware Command Status Codes. -*/ - -#define DAC960_V2_NormalCompletion 0x00 -#define DAC960_V2_AbormalCompletion 0x02 -#define DAC960_V2_DeviceBusy 0x08 -#define DAC960_V2_DeviceNonresponsive 0x0E -#define DAC960_V2_DeviceNonresponsive2 0x0F -#define DAC960_V2_DeviceRevervationConflict 0x18 - -typedef unsigned char DAC960_V2_CommandStatus_T; - - -/* - Define the DAC960 V2 Firmware Memory Type structure. -*/ - -typedef struct DAC960_V2_MemoryType -{ - enum { - DAC960_V2_MemoryType_Reserved = 0x00, - DAC960_V2_MemoryType_DRAM = 0x01, - DAC960_V2_MemoryType_EDRAM = 0x02, - DAC960_V2_MemoryType_EDO = 0x03, - DAC960_V2_MemoryType_SDRAM = 0x04, - DAC960_V2_MemoryType_Last = 0x1F - } __attribute__ ((packed)) MemoryType:5; /* Byte 0 Bits 0-4 */ - bool :1; /* Byte 0 Bit 5 */ - bool MemoryParity:1; /* Byte 0 Bit 6 */ - bool MemoryECC:1; /* Byte 0 Bit 7 */ -} -DAC960_V2_MemoryType_T; - - -/* - Define the DAC960 V2 Firmware Processor Type structure. -*/ - -typedef enum -{ - DAC960_V2_ProcessorType_i960CA = 0x01, - DAC960_V2_ProcessorType_i960RD = 0x02, - DAC960_V2_ProcessorType_i960RN = 0x03, - DAC960_V2_ProcessorType_i960RP = 0x04, - DAC960_V2_ProcessorType_NorthBay = 0x05, - DAC960_V2_ProcessorType_StrongArm = 0x06, - DAC960_V2_ProcessorType_i960RM = 0x07 -} -__attribute__ ((packed)) -DAC960_V2_ProcessorType_T; - - -/* - Define the DAC960 V2 Firmware Get Controller Info reply structure. -*/ - -typedef struct DAC960_V2_ControllerInfo -{ - unsigned char :8; /* Byte 0 */ - enum { - DAC960_V2_SCSI_Bus = 0x00, - DAC960_V2_Fibre_Bus = 0x01, - DAC960_V2_PCI_Bus = 0x03 - } __attribute__ ((packed)) BusInterfaceType; /* Byte 1 */ - enum { - DAC960_V2_DAC960E = 0x01, - DAC960_V2_DAC960M = 0x08, - DAC960_V2_DAC960PD = 0x10, - DAC960_V2_DAC960PL = 0x11, - DAC960_V2_DAC960PU = 0x12, - DAC960_V2_DAC960PE = 0x13, - DAC960_V2_DAC960PG = 0x14, - DAC960_V2_DAC960PJ = 0x15, - DAC960_V2_DAC960PTL0 = 0x16, - DAC960_V2_DAC960PR = 0x17, - DAC960_V2_DAC960PRL = 0x18, - DAC960_V2_DAC960PT = 0x19, - DAC960_V2_DAC1164P = 0x1A, - DAC960_V2_DAC960PTL1 = 0x1B, - DAC960_V2_EXR2000P = 0x1C, - DAC960_V2_EXR3000P = 0x1D, - DAC960_V2_AcceleRAID352 = 0x1E, - DAC960_V2_AcceleRAID170 = 0x1F, - DAC960_V2_AcceleRAID160 = 0x20, - DAC960_V2_DAC960S = 0x60, - DAC960_V2_DAC960SU = 0x61, - DAC960_V2_DAC960SX = 0x62, - DAC960_V2_DAC960SF = 0x63, - DAC960_V2_DAC960SS = 0x64, - DAC960_V2_DAC960FL = 0x65, - DAC960_V2_DAC960LL = 0x66, - DAC960_V2_DAC960FF = 0x67, - DAC960_V2_DAC960HP = 0x68, - DAC960_V2_RAIDBRICK = 0x69, - DAC960_V2_METEOR_FL = 0x6A, - DAC960_V2_METEOR_FF = 0x6B - } __attribute__ ((packed)) ControllerType; /* Byte 2 */ - unsigned char :8; /* Byte 3 */ - unsigned short BusInterfaceSpeedMHz; /* Bytes 4-5 */ - unsigned char BusWidthBits; /* Byte 6 */ - unsigned char FlashCodeTypeOrProductID; /* Byte 7 */ - unsigned char NumberOfHostPortsPresent; /* Byte 8 */ - unsigned char Reserved1[7]; /* Bytes 9-15 */ - unsigned char BusInterfaceName[16]; /* Bytes 16-31 */ - unsigned char ControllerName[16]; /* Bytes 32-47 */ - unsigned char Reserved2[16]; /* Bytes 48-63 */ - /* Firmware Release Information */ - unsigned char FirmwareMajorVersion; /* Byte 64 */ - unsigned char FirmwareMinorVersion; /* Byte 65 */ - unsigned char FirmwareTurnNumber; /* Byte 66 */ - unsigned char FirmwareBuildNumber; /* Byte 67 */ - unsigned char FirmwareReleaseDay; /* Byte 68 */ - unsigned char FirmwareReleaseMonth; /* Byte 69 */ - unsigned char FirmwareReleaseYearHigh2Digits; /* Byte 70 */ - unsigned char FirmwareReleaseYearLow2Digits; /* Byte 71 */ - /* Hardware Release Information */ - unsigned char HardwareRevision; /* Byte 72 */ - unsigned int :24; /* Bytes 73-75 */ - unsigned char HardwareReleaseDay; /* Byte 76 */ - unsigned char HardwareReleaseMonth; /* Byte 77 */ - unsigned char HardwareReleaseYearHigh2Digits; /* Byte 78 */ - unsigned char HardwareReleaseYearLow2Digits; /* Byte 79 */ - /* Hardware Manufacturing Information */ - unsigned char ManufacturingBatchNumber; /* Byte 80 */ - unsigned char :8; /* Byte 81 */ - unsigned char ManufacturingPlantNumber; /* Byte 82 */ - unsigned char :8; /* Byte 83 */ - unsigned char HardwareManufacturingDay; /* Byte 84 */ - unsigned char HardwareManufacturingMonth; /* Byte 85 */ - unsigned char HardwareManufacturingYearHigh2Digits; /* Byte 86 */ - unsigned char HardwareManufacturingYearLow2Digits; /* Byte 87 */ - unsigned char MaximumNumberOfPDDperXLD; /* Byte 88 */ - unsigned char MaximumNumberOfILDperXLD; /* Byte 89 */ - unsigned short NonvolatileMemorySizeKB; /* Bytes 90-91 */ - unsigned char MaximumNumberOfXLD; /* Byte 92 */ - unsigned int :24; /* Bytes 93-95 */ - /* Unique Information per Controller */ - unsigned char ControllerSerialNumber[16]; /* Bytes 96-111 */ - unsigned char Reserved3[16]; /* Bytes 112-127 */ - /* Vendor Information */ - unsigned int :24; /* Bytes 128-130 */ - unsigned char OEM_Code; /* Byte 131 */ - unsigned char VendorName[16]; /* Bytes 132-147 */ - /* Other Physical/Controller/Operation Information */ - bool BBU_Present:1; /* Byte 148 Bit 0 */ - bool ActiveActiveClusteringMode:1; /* Byte 148 Bit 1 */ - unsigned char :6; /* Byte 148 Bits 2-7 */ - unsigned char :8; /* Byte 149 */ - unsigned short :16; /* Bytes 150-151 */ - /* Physical Device Scan Information */ - bool PhysicalScanActive:1; /* Byte 152 Bit 0 */ - unsigned char :7; /* Byte 152 Bits 1-7 */ - unsigned char PhysicalDeviceChannelNumber; /* Byte 153 */ - unsigned char PhysicalDeviceTargetID; /* Byte 154 */ - unsigned char PhysicalDeviceLogicalUnit; /* Byte 155 */ - /* Maximum Command Data Transfer Sizes */ - unsigned short MaximumDataTransferSizeInBlocks; /* Bytes 156-157 */ - unsigned short MaximumScatterGatherEntries; /* Bytes 158-159 */ - /* Logical/Physical Device Counts */ - unsigned short LogicalDevicesPresent; /* Bytes 160-161 */ - unsigned short LogicalDevicesCritical; /* Bytes 162-163 */ - unsigned short LogicalDevicesOffline; /* Bytes 164-165 */ - unsigned short PhysicalDevicesPresent; /* Bytes 166-167 */ - unsigned short PhysicalDisksPresent; /* Bytes 168-169 */ - unsigned short PhysicalDisksCritical; /* Bytes 170-171 */ - unsigned short PhysicalDisksOffline; /* Bytes 172-173 */ - unsigned short MaximumParallelCommands; /* Bytes 174-175 */ - /* Channel and Target ID Information */ - unsigned char NumberOfPhysicalChannelsPresent; /* Byte 176 */ - unsigned char NumberOfVirtualChannelsPresent; /* Byte 177 */ - unsigned char NumberOfPhysicalChannelsPossible; /* Byte 178 */ - unsigned char NumberOfVirtualChannelsPossible; /* Byte 179 */ - unsigned char MaximumTargetsPerChannel[16]; /* Bytes 180-195 */ - unsigned char Reserved4[12]; /* Bytes 196-207 */ - /* Memory/Cache Information */ - unsigned short MemorySizeMB; /* Bytes 208-209 */ - unsigned short CacheSizeMB; /* Bytes 210-211 */ - unsigned int ValidCacheSizeInBytes; /* Bytes 212-215 */ - unsigned int DirtyCacheSizeInBytes; /* Bytes 216-219 */ - unsigned short MemorySpeedMHz; /* Bytes 220-221 */ - unsigned char MemoryDataWidthBits; /* Byte 222 */ - DAC960_V2_MemoryType_T MemoryType; /* Byte 223 */ - unsigned char CacheMemoryTypeName[16]; /* Bytes 224-239 */ - /* Execution Memory Information */ - unsigned short ExecutionMemorySizeMB; /* Bytes 240-241 */ - unsigned short ExecutionL2CacheSizeMB; /* Bytes 242-243 */ - unsigned char Reserved5[8]; /* Bytes 244-251 */ - unsigned short ExecutionMemorySpeedMHz; /* Bytes 252-253 */ - unsigned char ExecutionMemoryDataWidthBits; /* Byte 254 */ - DAC960_V2_MemoryType_T ExecutionMemoryType; /* Byte 255 */ - unsigned char ExecutionMemoryTypeName[16]; /* Bytes 256-271 */ - /* First CPU Type Information */ - unsigned short FirstProcessorSpeedMHz; /* Bytes 272-273 */ - DAC960_V2_ProcessorType_T FirstProcessorType; /* Byte 274 */ - unsigned char FirstProcessorCount; /* Byte 275 */ - unsigned char Reserved6[12]; /* Bytes 276-287 */ - unsigned char FirstProcessorName[16]; /* Bytes 288-303 */ - /* Second CPU Type Information */ - unsigned short SecondProcessorSpeedMHz; /* Bytes 304-305 */ - DAC960_V2_ProcessorType_T SecondProcessorType; /* Byte 306 */ - unsigned char SecondProcessorCount; /* Byte 307 */ - unsigned char Reserved7[12]; /* Bytes 308-319 */ - unsigned char SecondProcessorName[16]; /* Bytes 320-335 */ - /* Debugging/Profiling/Command Time Tracing Information */ - unsigned short CurrentProfilingDataPageNumber; /* Bytes 336-337 */ - unsigned short ProgramsAwaitingProfilingData; /* Bytes 338-339 */ - unsigned short CurrentCommandTimeTraceDataPageNumber; /* Bytes 340-341 */ - unsigned short ProgramsAwaitingCommandTimeTraceData; /* Bytes 342-343 */ - unsigned char Reserved8[8]; /* Bytes 344-351 */ - /* Error Counters on Physical Devices */ - unsigned short PhysicalDeviceBusResets; /* Bytes 352-353 */ - unsigned short PhysicalDeviceParityErrors; /* Bytes 355-355 */ - unsigned short PhysicalDeviceSoftErrors; /* Bytes 356-357 */ - unsigned short PhysicalDeviceCommandsFailed; /* Bytes 358-359 */ - unsigned short PhysicalDeviceMiscellaneousErrors; /* Bytes 360-361 */ - unsigned short PhysicalDeviceCommandTimeouts; /* Bytes 362-363 */ - unsigned short PhysicalDeviceSelectionTimeouts; /* Bytes 364-365 */ - unsigned short PhysicalDeviceRetriesDone; /* Bytes 366-367 */ - unsigned short PhysicalDeviceAbortsDone; /* Bytes 368-369 */ - unsigned short PhysicalDeviceHostCommandAbortsDone; /* Bytes 370-371 */ - unsigned short PhysicalDevicePredictedFailuresDetected; /* Bytes 372-373 */ - unsigned short PhysicalDeviceHostCommandsFailed; /* Bytes 374-375 */ - unsigned short PhysicalDeviceHardErrors; /* Bytes 376-377 */ - unsigned char Reserved9[6]; /* Bytes 378-383 */ - /* Error Counters on Logical Devices */ - unsigned short LogicalDeviceSoftErrors; /* Bytes 384-385 */ - unsigned short LogicalDeviceCommandsFailed; /* Bytes 386-387 */ - unsigned short LogicalDeviceHostCommandAbortsDone; /* Bytes 388-389 */ - unsigned short :16; /* Bytes 390-391 */ - /* Error Counters on Controller */ - unsigned short ControllerMemoryErrors; /* Bytes 392-393 */ - unsigned short ControllerHostCommandAbortsDone; /* Bytes 394-395 */ - unsigned int :32; /* Bytes 396-399 */ - /* Long Duration Activity Information */ - unsigned short BackgroundInitializationsActive; /* Bytes 400-401 */ - unsigned short LogicalDeviceInitializationsActive; /* Bytes 402-403 */ - unsigned short PhysicalDeviceInitializationsActive; /* Bytes 404-405 */ - unsigned short ConsistencyChecksActive; /* Bytes 406-407 */ - unsigned short RebuildsActive; /* Bytes 408-409 */ - unsigned short OnlineExpansionsActive; /* Bytes 410-411 */ - unsigned short PatrolActivitiesActive; /* Bytes 412-413 */ - unsigned short :16; /* Bytes 414-415 */ - /* Flash ROM Information */ - unsigned char FlashType; /* Byte 416 */ - unsigned char :8; /* Byte 417 */ - unsigned short FlashSizeMB; /* Bytes 418-419 */ - unsigned int FlashLimit; /* Bytes 420-423 */ - unsigned int FlashCount; /* Bytes 424-427 */ - unsigned int :32; /* Bytes 428-431 */ - unsigned char FlashTypeName[16]; /* Bytes 432-447 */ - /* Firmware Run Time Information */ - unsigned char RebuildRate; /* Byte 448 */ - unsigned char BackgroundInitializationRate; /* Byte 449 */ - unsigned char ForegroundInitializationRate; /* Byte 450 */ - unsigned char ConsistencyCheckRate; /* Byte 451 */ - unsigned int :32; /* Bytes 452-455 */ - unsigned int MaximumDP; /* Bytes 456-459 */ - unsigned int FreeDP; /* Bytes 460-463 */ - unsigned int MaximumIOP; /* Bytes 464-467 */ - unsigned int FreeIOP; /* Bytes 468-471 */ - unsigned short MaximumCombLengthInBlocks; /* Bytes 472-473 */ - unsigned short NumberOfConfigurationGroups; /* Bytes 474-475 */ - bool InstallationAbortStatus:1; /* Byte 476 Bit 0 */ - bool MaintenanceModeStatus:1; /* Byte 476 Bit 1 */ - unsigned int :24; /* Bytes 476-479 */ - unsigned char Reserved10[32]; /* Bytes 480-511 */ - unsigned char Reserved11[512]; /* Bytes 512-1023 */ -} -DAC960_V2_ControllerInfo_T; - - -/* - Define the DAC960 V2 Firmware Logical Device State type. -*/ - -typedef enum -{ - DAC960_V2_LogicalDevice_Online = 0x01, - DAC960_V2_LogicalDevice_Offline = 0x08, - DAC960_V2_LogicalDevice_Critical = 0x09 -} -__attribute__ ((packed)) -DAC960_V2_LogicalDeviceState_T; - - -/* - Define the DAC960 V2 Firmware Get Logical Device Info reply structure. -*/ - -typedef struct DAC960_V2_LogicalDeviceInfo -{ - unsigned char :8; /* Byte 0 */ - unsigned char Channel; /* Byte 1 */ - unsigned char TargetID; /* Byte 2 */ - unsigned char LogicalUnit; /* Byte 3 */ - DAC960_V2_LogicalDeviceState_T LogicalDeviceState; /* Byte 4 */ - unsigned char RAIDLevel; /* Byte 5 */ - unsigned char StripeSize; /* Byte 6 */ - unsigned char CacheLineSize; /* Byte 7 */ - struct { - enum { - DAC960_V2_ReadCacheDisabled = 0x0, - DAC960_V2_ReadCacheEnabled = 0x1, - DAC960_V2_ReadAheadEnabled = 0x2, - DAC960_V2_IntelligentReadAheadEnabled = 0x3, - DAC960_V2_ReadCache_Last = 0x7 - } __attribute__ ((packed)) ReadCache:3; /* Byte 8 Bits 0-2 */ - enum { - DAC960_V2_WriteCacheDisabled = 0x0, - DAC960_V2_LogicalDeviceReadOnly = 0x1, - DAC960_V2_WriteCacheEnabled = 0x2, - DAC960_V2_IntelligentWriteCacheEnabled = 0x3, - DAC960_V2_WriteCache_Last = 0x7 - } __attribute__ ((packed)) WriteCache:3; /* Byte 8 Bits 3-5 */ - bool :1; /* Byte 8 Bit 6 */ - bool LogicalDeviceInitialized:1; /* Byte 8 Bit 7 */ - } LogicalDeviceControl; /* Byte 8 */ - /* Logical Device Operations Status */ - bool ConsistencyCheckInProgress:1; /* Byte 9 Bit 0 */ - bool RebuildInProgress:1; /* Byte 9 Bit 1 */ - bool BackgroundInitializationInProgress:1; /* Byte 9 Bit 2 */ - bool ForegroundInitializationInProgress:1; /* Byte 9 Bit 3 */ - bool DataMigrationInProgress:1; /* Byte 9 Bit 4 */ - bool PatrolOperationInProgress:1; /* Byte 9 Bit 5 */ - unsigned char :2; /* Byte 9 Bits 6-7 */ - unsigned char RAID5WriteUpdate; /* Byte 10 */ - unsigned char RAID5Algorithm; /* Byte 11 */ - unsigned short LogicalDeviceNumber; /* Bytes 12-13 */ - /* BIOS Info */ - bool BIOSDisabled:1; /* Byte 14 Bit 0 */ - bool CDROMBootEnabled:1; /* Byte 14 Bit 1 */ - bool DriveCoercionEnabled:1; /* Byte 14 Bit 2 */ - bool WriteSameDisabled:1; /* Byte 14 Bit 3 */ - bool HBA_ModeEnabled:1; /* Byte 14 Bit 4 */ - enum { - DAC960_V2_Geometry_128_32 = 0x0, - DAC960_V2_Geometry_255_63 = 0x1, - DAC960_V2_Geometry_Reserved1 = 0x2, - DAC960_V2_Geometry_Reserved2 = 0x3 - } __attribute__ ((packed)) DriveGeometry:2; /* Byte 14 Bits 5-6 */ - bool SuperReadAheadEnabled:1; /* Byte 14 Bit 7 */ - unsigned char :8; /* Byte 15 */ - /* Error Counters */ - unsigned short SoftErrors; /* Bytes 16-17 */ - unsigned short CommandsFailed; /* Bytes 18-19 */ - unsigned short HostCommandAbortsDone; /* Bytes 20-21 */ - unsigned short DeferredWriteErrors; /* Bytes 22-23 */ - unsigned int :32; /* Bytes 24-27 */ - unsigned int :32; /* Bytes 28-31 */ - /* Device Size Information */ - unsigned short :16; /* Bytes 32-33 */ - unsigned short DeviceBlockSizeInBytes; /* Bytes 34-35 */ - unsigned int OriginalDeviceSize; /* Bytes 36-39 */ - unsigned int ConfigurableDeviceSize; /* Bytes 40-43 */ - unsigned int :32; /* Bytes 44-47 */ - unsigned char LogicalDeviceName[32]; /* Bytes 48-79 */ - unsigned char SCSI_InquiryData[36]; /* Bytes 80-115 */ - unsigned char Reserved1[12]; /* Bytes 116-127 */ - DAC960_ByteCount64_T LastReadBlockNumber; /* Bytes 128-135 */ - DAC960_ByteCount64_T LastWrittenBlockNumber; /* Bytes 136-143 */ - DAC960_ByteCount64_T ConsistencyCheckBlockNumber; /* Bytes 144-151 */ - DAC960_ByteCount64_T RebuildBlockNumber; /* Bytes 152-159 */ - DAC960_ByteCount64_T BackgroundInitializationBlockNumber; /* Bytes 160-167 */ - DAC960_ByteCount64_T ForegroundInitializationBlockNumber; /* Bytes 168-175 */ - DAC960_ByteCount64_T DataMigrationBlockNumber; /* Bytes 176-183 */ - DAC960_ByteCount64_T PatrolOperationBlockNumber; /* Bytes 184-191 */ - unsigned char Reserved2[64]; /* Bytes 192-255 */ -} -DAC960_V2_LogicalDeviceInfo_T; - - -/* - Define the DAC960 V2 Firmware Physical Device State type. -*/ - -typedef enum -{ - DAC960_V2_Device_Unconfigured = 0x00, - DAC960_V2_Device_Online = 0x01, - DAC960_V2_Device_Rebuild = 0x03, - DAC960_V2_Device_Missing = 0x04, - DAC960_V2_Device_Critical = 0x05, - DAC960_V2_Device_Dead = 0x08, - DAC960_V2_Device_SuspectedDead = 0x0C, - DAC960_V2_Device_CommandedOffline = 0x10, - DAC960_V2_Device_Standby = 0x21, - DAC960_V2_Device_InvalidState = 0xFF -} -__attribute__ ((packed)) -DAC960_V2_PhysicalDeviceState_T; - - -/* - Define the DAC960 V2 Firmware Get Physical Device Info reply structure. -*/ - -typedef struct DAC960_V2_PhysicalDeviceInfo -{ - unsigned char :8; /* Byte 0 */ - unsigned char Channel; /* Byte 1 */ - unsigned char TargetID; /* Byte 2 */ - unsigned char LogicalUnit; /* Byte 3 */ - /* Configuration Status Bits */ - bool PhysicalDeviceFaultTolerant:1; /* Byte 4 Bit 0 */ - bool PhysicalDeviceConnected:1; /* Byte 4 Bit 1 */ - bool PhysicalDeviceLocalToController:1; /* Byte 4 Bit 2 */ - unsigned char :5; /* Byte 4 Bits 3-7 */ - /* Multiple Host/Controller Status Bits */ - bool RemoteHostSystemDead:1; /* Byte 5 Bit 0 */ - bool RemoteControllerDead:1; /* Byte 5 Bit 1 */ - unsigned char :6; /* Byte 5 Bits 2-7 */ - DAC960_V2_PhysicalDeviceState_T PhysicalDeviceState; /* Byte 6 */ - unsigned char NegotiatedDataWidthBits; /* Byte 7 */ - unsigned short NegotiatedSynchronousMegaTransfers; /* Bytes 8-9 */ - /* Multiported Physical Device Information */ - unsigned char NumberOfPortConnections; /* Byte 10 */ - unsigned char DriveAccessibilityBitmap; /* Byte 11 */ - unsigned int :32; /* Bytes 12-15 */ - unsigned char NetworkAddress[16]; /* Bytes 16-31 */ - unsigned short MaximumTags; /* Bytes 32-33 */ - /* Physical Device Operations Status */ - bool ConsistencyCheckInProgress:1; /* Byte 34 Bit 0 */ - bool RebuildInProgress:1; /* Byte 34 Bit 1 */ - bool MakingDataConsistentInProgress:1; /* Byte 34 Bit 2 */ - bool PhysicalDeviceInitializationInProgress:1; /* Byte 34 Bit 3 */ - bool DataMigrationInProgress:1; /* Byte 34 Bit 4 */ - bool PatrolOperationInProgress:1; /* Byte 34 Bit 5 */ - unsigned char :2; /* Byte 34 Bits 6-7 */ - unsigned char LongOperationStatus; /* Byte 35 */ - unsigned char ParityErrors; /* Byte 36 */ - unsigned char SoftErrors; /* Byte 37 */ - unsigned char HardErrors; /* Byte 38 */ - unsigned char MiscellaneousErrors; /* Byte 39 */ - unsigned char CommandTimeouts; /* Byte 40 */ - unsigned char Retries; /* Byte 41 */ - unsigned char Aborts; /* Byte 42 */ - unsigned char PredictedFailuresDetected; /* Byte 43 */ - unsigned int :32; /* Bytes 44-47 */ - unsigned short :16; /* Bytes 48-49 */ - unsigned short DeviceBlockSizeInBytes; /* Bytes 50-51 */ - unsigned int OriginalDeviceSize; /* Bytes 52-55 */ - unsigned int ConfigurableDeviceSize; /* Bytes 56-59 */ - unsigned int :32; /* Bytes 60-63 */ - unsigned char PhysicalDeviceName[16]; /* Bytes 64-79 */ - unsigned char Reserved1[16]; /* Bytes 80-95 */ - unsigned char Reserved2[32]; /* Bytes 96-127 */ - unsigned char SCSI_InquiryData[36]; /* Bytes 128-163 */ - unsigned char Reserved3[20]; /* Bytes 164-183 */ - unsigned char Reserved4[8]; /* Bytes 184-191 */ - DAC960_ByteCount64_T LastReadBlockNumber; /* Bytes 192-199 */ - DAC960_ByteCount64_T LastWrittenBlockNumber; /* Bytes 200-207 */ - DAC960_ByteCount64_T ConsistencyCheckBlockNumber; /* Bytes 208-215 */ - DAC960_ByteCount64_T RebuildBlockNumber; /* Bytes 216-223 */ - DAC960_ByteCount64_T MakingDataConsistentBlockNumber; /* Bytes 224-231 */ - DAC960_ByteCount64_T DeviceInitializationBlockNumber; /* Bytes 232-239 */ - DAC960_ByteCount64_T DataMigrationBlockNumber; /* Bytes 240-247 */ - DAC960_ByteCount64_T PatrolOperationBlockNumber; /* Bytes 248-255 */ - unsigned char Reserved5[256]; /* Bytes 256-511 */ -} -DAC960_V2_PhysicalDeviceInfo_T; - - -/* - Define the DAC960 V2 Firmware Health Status Buffer structure. -*/ - -typedef struct DAC960_V2_HealthStatusBuffer -{ - unsigned int MicrosecondsFromControllerStartTime; /* Bytes 0-3 */ - unsigned int MillisecondsFromControllerStartTime; /* Bytes 4-7 */ - unsigned int SecondsFrom1January1970; /* Bytes 8-11 */ - unsigned int :32; /* Bytes 12-15 */ - unsigned int StatusChangeCounter; /* Bytes 16-19 */ - unsigned int :32; /* Bytes 20-23 */ - unsigned int DebugOutputMessageBufferIndex; /* Bytes 24-27 */ - unsigned int CodedMessageBufferIndex; /* Bytes 28-31 */ - unsigned int CurrentTimeTracePageNumber; /* Bytes 32-35 */ - unsigned int CurrentProfilerPageNumber; /* Bytes 36-39 */ - unsigned int NextEventSequenceNumber; /* Bytes 40-43 */ - unsigned int :32; /* Bytes 44-47 */ - unsigned char Reserved1[16]; /* Bytes 48-63 */ - unsigned char Reserved2[64]; /* Bytes 64-127 */ -} -DAC960_V2_HealthStatusBuffer_T; - - -/* - Define the DAC960 V2 Firmware Get Event reply structure. -*/ - -typedef struct DAC960_V2_Event -{ - unsigned int EventSequenceNumber; /* Bytes 0-3 */ - unsigned int EventTime; /* Bytes 4-7 */ - unsigned int EventCode; /* Bytes 8-11 */ - unsigned char :8; /* Byte 12 */ - unsigned char Channel; /* Byte 13 */ - unsigned char TargetID; /* Byte 14 */ - unsigned char LogicalUnit; /* Byte 15 */ - unsigned int :32; /* Bytes 16-19 */ - unsigned int EventSpecificParameter; /* Bytes 20-23 */ - unsigned char RequestSenseData[40]; /* Bytes 24-63 */ -} -DAC960_V2_Event_T; - - -/* - Define the DAC960 V2 Firmware Command Control Bits structure. -*/ - -typedef struct DAC960_V2_CommandControlBits -{ - bool ForceUnitAccess:1; /* Byte 0 Bit 0 */ - bool DisablePageOut:1; /* Byte 0 Bit 1 */ - bool :1; /* Byte 0 Bit 2 */ - bool AdditionalScatterGatherListMemory:1; /* Byte 0 Bit 3 */ - bool DataTransferControllerToHost:1; /* Byte 0 Bit 4 */ - bool :1; /* Byte 0 Bit 5 */ - bool NoAutoRequestSense:1; /* Byte 0 Bit 6 */ - bool DisconnectProhibited:1; /* Byte 0 Bit 7 */ -} -DAC960_V2_CommandControlBits_T; - - -/* - Define the DAC960 V2 Firmware Command Timeout structure. -*/ - -typedef struct DAC960_V2_CommandTimeout -{ - unsigned char TimeoutValue:6; /* Byte 0 Bits 0-5 */ - enum { - DAC960_V2_TimeoutScale_Seconds = 0, - DAC960_V2_TimeoutScale_Minutes = 1, - DAC960_V2_TimeoutScale_Hours = 2, - DAC960_V2_TimeoutScale_Reserved = 3 - } __attribute__ ((packed)) TimeoutScale:2; /* Byte 0 Bits 6-7 */ -} -DAC960_V2_CommandTimeout_T; - - -/* - Define the DAC960 V2 Firmware Physical Device structure. -*/ - -typedef struct DAC960_V2_PhysicalDevice -{ - unsigned char LogicalUnit; /* Byte 0 */ - unsigned char TargetID; /* Byte 1 */ - unsigned char Channel:3; /* Byte 2 Bits 0-2 */ - unsigned char Controller:5; /* Byte 2 Bits 3-7 */ -} -__attribute__ ((packed)) -DAC960_V2_PhysicalDevice_T; - - -/* - Define the DAC960 V2 Firmware Logical Device structure. -*/ - -typedef struct DAC960_V2_LogicalDevice -{ - unsigned short LogicalDeviceNumber; /* Bytes 0-1 */ - unsigned char :3; /* Byte 2 Bits 0-2 */ - unsigned char Controller:5; /* Byte 2 Bits 3-7 */ -} -__attribute__ ((packed)) -DAC960_V2_LogicalDevice_T; - - -/* - Define the DAC960 V2 Firmware Operation Device type. -*/ - -typedef enum -{ - DAC960_V2_Physical_Device = 0x00, - DAC960_V2_RAID_Device = 0x01, - DAC960_V2_Physical_Channel = 0x02, - DAC960_V2_RAID_Channel = 0x03, - DAC960_V2_Physical_Controller = 0x04, - DAC960_V2_RAID_Controller = 0x05, - DAC960_V2_Configuration_Group = 0x10, - DAC960_V2_Enclosure = 0x11 -} -__attribute__ ((packed)) -DAC960_V2_OperationDevice_T; - - -/* - Define the DAC960 V2 Firmware Translate Physical To Logical Device structure. -*/ - -typedef struct DAC960_V2_PhysicalToLogicalDevice -{ - unsigned short LogicalDeviceNumber; /* Bytes 0-1 */ - unsigned short :16; /* Bytes 2-3 */ - unsigned char PreviousBootController; /* Byte 4 */ - unsigned char PreviousBootChannel; /* Byte 5 */ - unsigned char PreviousBootTargetID; /* Byte 6 */ - unsigned char PreviousBootLogicalUnit; /* Byte 7 */ -} -DAC960_V2_PhysicalToLogicalDevice_T; - - - -/* - Define the DAC960 V2 Firmware Scatter/Gather List Entry structure. -*/ - -typedef struct DAC960_V2_ScatterGatherSegment -{ - DAC960_BusAddress64_T SegmentDataPointer; /* Bytes 0-7 */ - DAC960_ByteCount64_T SegmentByteCount; /* Bytes 8-15 */ -} -DAC960_V2_ScatterGatherSegment_T; - - -/* - Define the DAC960 V2 Firmware Data Transfer Memory Address structure. -*/ - -typedef union DAC960_V2_DataTransferMemoryAddress -{ - DAC960_V2_ScatterGatherSegment_T ScatterGatherSegments[2]; /* Bytes 0-31 */ - struct { - unsigned short ScatterGatherList0Length; /* Bytes 0-1 */ - unsigned short ScatterGatherList1Length; /* Bytes 2-3 */ - unsigned short ScatterGatherList2Length; /* Bytes 4-5 */ - unsigned short :16; /* Bytes 6-7 */ - DAC960_BusAddress64_T ScatterGatherList0Address; /* Bytes 8-15 */ - DAC960_BusAddress64_T ScatterGatherList1Address; /* Bytes 16-23 */ - DAC960_BusAddress64_T ScatterGatherList2Address; /* Bytes 24-31 */ - } ExtendedScatterGather; -} -DAC960_V2_DataTransferMemoryAddress_T; - - -/* - Define the 64 Byte DAC960 V2 Firmware Command Mailbox structure. -*/ - -typedef union DAC960_V2_CommandMailbox -{ - unsigned int Words[16]; /* Words 0-15 */ - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned int :24; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } Common; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize; /* Bytes 4-7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char CDBLength; /* Byte 21 */ - unsigned char SCSI_CDB[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } SCSI_10; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize; /* Bytes 4-7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char CDBLength; /* Byte 21 */ - unsigned short :16; /* Bytes 22-23 */ - DAC960_BusAddress64_T SCSI_CDB_BusAddress; /* Bytes 24-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } SCSI_255; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned short :16; /* Bytes 16-17 */ - unsigned char ControllerNumber; /* Byte 18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } ControllerInfo; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_LogicalDevice_T LogicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } LogicalDeviceInfo; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char Reserved[10]; /* Bytes 22-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } PhysicalDeviceInfo; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned short EventSequenceNumberHigh16; /* Bytes 16-17 */ - unsigned char ControllerNumber; /* Byte 18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned short EventSequenceNumberLow16; /* Bytes 22-23 */ - unsigned char Reserved[8]; /* Bytes 24-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } GetEvent; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_LogicalDevice_T LogicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - union { - DAC960_V2_LogicalDeviceState_T LogicalDeviceState; - DAC960_V2_PhysicalDeviceState_T PhysicalDeviceState; - } DeviceState; /* Byte 22 */ - unsigned char Reserved[9]; /* Bytes 23-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } SetDeviceState; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_LogicalDevice_T LogicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - bool RestoreConsistency:1; /* Byte 22 Bit 0 */ - bool InitializedAreaOnly:1; /* Byte 22 Bit 1 */ - unsigned char :6; /* Byte 22 Bits 2-7 */ - unsigned char Reserved[9]; /* Bytes 23-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } ConsistencyCheck; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - unsigned char FirstCommandMailboxSizeKB; /* Byte 4 */ - unsigned char FirstStatusMailboxSizeKB; /* Byte 5 */ - unsigned char SecondCommandMailboxSizeKB; /* Byte 6 */ - unsigned char SecondStatusMailboxSizeKB; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - unsigned int :24; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - unsigned char HealthStatusBufferSizeKB; /* Byte 22 */ - unsigned char :8; /* Byte 23 */ - DAC960_BusAddress64_T HealthStatusBufferBusAddress; /* Bytes 24-31 */ - DAC960_BusAddress64_T FirstCommandMailboxBusAddress; /* Bytes 32-39 */ - DAC960_BusAddress64_T FirstStatusMailboxBusAddress; /* Bytes 40-47 */ - DAC960_BusAddress64_T SecondCommandMailboxBusAddress; /* Bytes 48-55 */ - DAC960_BusAddress64_T SecondStatusMailboxBusAddress; /* Bytes 56-63 */ - } SetMemoryMailbox; - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandOpcode_T CommandOpcode; /* Byte 2 */ - DAC960_V2_CommandControlBits_T CommandControlBits; /* Byte 3 */ - DAC960_ByteCount32_T DataTransferSize:24; /* Bytes 4-6 */ - unsigned char DataTransferPageNumber; /* Byte 7 */ - DAC960_BusAddress64_T RequestSenseBusAddress; /* Bytes 8-15 */ - DAC960_V2_PhysicalDevice_T PhysicalDevice; /* Bytes 16-18 */ - DAC960_V2_CommandTimeout_T CommandTimeout; /* Byte 19 */ - unsigned char RequestSenseSize; /* Byte 20 */ - unsigned char IOCTL_Opcode; /* Byte 21 */ - DAC960_V2_OperationDevice_T OperationDevice; /* Byte 22 */ - unsigned char Reserved[9]; /* Bytes 23-31 */ - DAC960_V2_DataTransferMemoryAddress_T - DataTransferMemoryAddress; /* Bytes 32-63 */ - } DeviceOperation; -} -DAC960_V2_CommandMailbox_T; - - -/* - Define the DAC960 Driver IOCTL requests. -*/ - -#define DAC960_IOCTL_GET_CONTROLLER_COUNT 0xDAC001 -#define DAC960_IOCTL_GET_CONTROLLER_INFO 0xDAC002 -#define DAC960_IOCTL_V1_EXECUTE_COMMAND 0xDAC003 -#define DAC960_IOCTL_V2_EXECUTE_COMMAND 0xDAC004 -#define DAC960_IOCTL_V2_GET_HEALTH_STATUS 0xDAC005 - - -/* - Define the DAC960_IOCTL_GET_CONTROLLER_INFO reply structure. -*/ - -typedef struct DAC960_ControllerInfo -{ - unsigned char ControllerNumber; - unsigned char FirmwareType; - unsigned char Channels; - unsigned char Targets; - unsigned char PCI_Bus; - unsigned char PCI_Device; - unsigned char PCI_Function; - unsigned char IRQ_Channel; - DAC960_PCI_Address_T PCI_Address; - unsigned char ModelName[20]; - unsigned char FirmwareVersion[12]; -} -DAC960_ControllerInfo_T; - - -/* - Define the User Mode DAC960_IOCTL_V1_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V1_UserCommand -{ - unsigned char ControllerNumber; - DAC960_V1_CommandMailbox_T CommandMailbox; - int DataTransferLength; - void __user *DataTransferBuffer; - DAC960_V1_DCDB_T __user *DCDB; -} -DAC960_V1_UserCommand_T; - - -/* - Define the Kernel Mode DAC960_IOCTL_V1_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V1_KernelCommand -{ - unsigned char ControllerNumber; - DAC960_V1_CommandMailbox_T CommandMailbox; - int DataTransferLength; - void *DataTransferBuffer; - DAC960_V1_DCDB_T *DCDB; - DAC960_V1_CommandStatus_T CommandStatus; - void (*CompletionFunction)(struct DAC960_V1_KernelCommand *); - void *CompletionData; -} -DAC960_V1_KernelCommand_T; - - -/* - Define the User Mode DAC960_IOCTL_V2_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V2_UserCommand -{ - unsigned char ControllerNumber; - DAC960_V2_CommandMailbox_T CommandMailbox; - int DataTransferLength; - int RequestSenseLength; - void __user *DataTransferBuffer; - void __user *RequestSenseBuffer; -} -DAC960_V2_UserCommand_T; - - -/* - Define the Kernel Mode DAC960_IOCTL_V2_EXECUTE_COMMAND request structure. -*/ - -typedef struct DAC960_V2_KernelCommand -{ - unsigned char ControllerNumber; - DAC960_V2_CommandMailbox_T CommandMailbox; - int DataTransferLength; - int RequestSenseLength; - void *DataTransferBuffer; - void *RequestSenseBuffer; - DAC960_V2_CommandStatus_T CommandStatus; - void (*CompletionFunction)(struct DAC960_V2_KernelCommand *); - void *CompletionData; -} -DAC960_V2_KernelCommand_T; - - -/* - Define the User Mode DAC960_IOCTL_V2_GET_HEALTH_STATUS request structure. -*/ - -typedef struct DAC960_V2_GetHealthStatus -{ - unsigned char ControllerNumber; - DAC960_V2_HealthStatusBuffer_T __user *HealthStatusBuffer; -} -DAC960_V2_GetHealthStatus_T; - - -/* - Import the Kernel Mode IOCTL interface. -*/ - -extern int DAC960_KernelIOCTL(unsigned int Request, void *Argument); - - -/* - DAC960_DriverVersion protects the private portion of this file. -*/ - -#ifdef DAC960_DriverVersion - - -/* - Define the maximum Driver Queue Depth and Controller Queue Depth supported - by DAC960 V1 and V2 Firmware Controllers. -*/ - -#define DAC960_MaxDriverQueueDepth 511 -#define DAC960_MaxControllerQueueDepth 512 - - -/* - Define the maximum number of Scatter/Gather Segments supported for any - DAC960 V1 and V2 Firmware controller. -*/ - -#define DAC960_V1_ScatterGatherLimit 33 -#define DAC960_V2_ScatterGatherLimit 128 - - -/* - Define the number of Command Mailboxes and Status Mailboxes used by the - DAC960 V1 and V2 Firmware Memory Mailbox Interface. -*/ - -#define DAC960_V1_CommandMailboxCount 256 -#define DAC960_V1_StatusMailboxCount 1024 -#define DAC960_V2_CommandMailboxCount 512 -#define DAC960_V2_StatusMailboxCount 512 - - -/* - Define the DAC960 Controller Monitoring Timer Interval. -*/ - -#define DAC960_MonitoringTimerInterval (10 * HZ) - - -/* - Define the DAC960 Controller Secondary Monitoring Interval. -*/ - -#define DAC960_SecondaryMonitoringInterval (60 * HZ) - - -/* - Define the DAC960 Controller Health Status Monitoring Interval. -*/ - -#define DAC960_HealthStatusMonitoringInterval (1 * HZ) - - -/* - Define the DAC960 Controller Progress Reporting Interval. -*/ - -#define DAC960_ProgressReportingInterval (60 * HZ) - - -/* - Define the maximum number of Partitions allowed for each Logical Drive. -*/ - -#define DAC960_MaxPartitions 8 -#define DAC960_MaxPartitionsBits 3 - -/* - Define the DAC960 Controller fixed Block Size and Block Size Bits. -*/ - -#define DAC960_BlockSize 512 -#define DAC960_BlockSizeBits 9 - - -/* - Define the number of Command structures that should be allocated as a - group to optimize kernel memory allocation. -*/ - -#define DAC960_V1_CommandAllocationGroupSize 11 -#define DAC960_V2_CommandAllocationGroupSize 29 - - -/* - Define the Controller Line Buffer, Progress Buffer, User Message, and - Initial Status Buffer sizes. -*/ - -#define DAC960_LineBufferSize 100 -#define DAC960_ProgressBufferSize 200 -#define DAC960_UserMessageSize 200 -#define DAC960_InitialStatusBufferSize (8192-32) - - -/* - Define the DAC960 Controller Firmware Types. -*/ - -typedef enum -{ - DAC960_V1_Controller = 1, - DAC960_V2_Controller = 2 -} -DAC960_FirmwareType_T; - - -/* - Define the DAC960 Controller Hardware Types. -*/ - -typedef enum -{ - DAC960_BA_Controller = 1, /* eXtremeRAID 2000 */ - DAC960_LP_Controller = 2, /* AcceleRAID 352 */ - DAC960_LA_Controller = 3, /* DAC1164P */ - DAC960_PG_Controller = 4, /* DAC960PTL/PJ/PG */ - DAC960_PD_Controller = 5, /* DAC960PU/PD/PL/P */ - DAC960_P_Controller = 6, /* DAC960PU/PD/PL/P */ - DAC960_GEM_Controller = 7, /* AcceleRAID 4/5/600 */ -} -DAC960_HardwareType_T; - - -/* - Define the Driver Message Levels. -*/ - -typedef enum DAC960_MessageLevel -{ - DAC960_AnnounceLevel = 0, - DAC960_InfoLevel = 1, - DAC960_NoticeLevel = 2, - DAC960_WarningLevel = 3, - DAC960_ErrorLevel = 4, - DAC960_ProgressLevel = 5, - DAC960_CriticalLevel = 6, - DAC960_UserCriticalLevel = 7 -} -DAC960_MessageLevel_T; - -static char - *DAC960_MessageLevelMap[] = - { KERN_NOTICE, KERN_NOTICE, KERN_NOTICE, KERN_WARNING, - KERN_ERR, KERN_CRIT, KERN_CRIT, KERN_CRIT }; - - -/* - Define Driver Message macros. -*/ - -#define DAC960_Announce(Format, Arguments...) \ - DAC960_Message(DAC960_AnnounceLevel, Format, ##Arguments) - -#define DAC960_Info(Format, Arguments...) \ - DAC960_Message(DAC960_InfoLevel, Format, ##Arguments) - -#define DAC960_Notice(Format, Arguments...) \ - DAC960_Message(DAC960_NoticeLevel, Format, ##Arguments) - -#define DAC960_Warning(Format, Arguments...) \ - DAC960_Message(DAC960_WarningLevel, Format, ##Arguments) - -#define DAC960_Error(Format, Arguments...) \ - DAC960_Message(DAC960_ErrorLevel, Format, ##Arguments) - -#define DAC960_Progress(Format, Arguments...) \ - DAC960_Message(DAC960_ProgressLevel, Format, ##Arguments) - -#define DAC960_Critical(Format, Arguments...) \ - DAC960_Message(DAC960_CriticalLevel, Format, ##Arguments) - -#define DAC960_UserCritical(Format, Arguments...) \ - DAC960_Message(DAC960_UserCriticalLevel, Format, ##Arguments) - - -struct DAC960_privdata { - DAC960_HardwareType_T HardwareType; - DAC960_FirmwareType_T FirmwareType; - irq_handler_t InterruptHandler; - unsigned int MemoryWindowSize; -}; - - -/* - Define the DAC960 V1 Firmware Controller Status Mailbox structure. -*/ - -typedef union DAC960_V1_StatusMailbox -{ - unsigned int Word; /* Word 0 */ - struct { - DAC960_V1_CommandIdentifier_T CommandIdentifier; /* Byte 0 */ - unsigned char :7; /* Byte 1 Bits 0-6 */ - bool Valid:1; /* Byte 1 Bit 7 */ - DAC960_V1_CommandStatus_T CommandStatus; /* Bytes 2-3 */ - } Fields; -} -DAC960_V1_StatusMailbox_T; - - -/* - Define the DAC960 V2 Firmware Controller Status Mailbox structure. -*/ - -typedef union DAC960_V2_StatusMailbox -{ - unsigned int Words[2]; /* Words 0-1 */ - struct { - DAC960_V2_CommandIdentifier_T CommandIdentifier; /* Bytes 0-1 */ - DAC960_V2_CommandStatus_T CommandStatus; /* Byte 2 */ - unsigned char RequestSenseLength; /* Byte 3 */ - int DataTransferResidue; /* Bytes 4-7 */ - } Fields; -} -DAC960_V2_StatusMailbox_T; - - -/* - Define the DAC960 Driver Command Types. -*/ - -typedef enum -{ - DAC960_ReadCommand = 1, - DAC960_WriteCommand = 2, - DAC960_ReadRetryCommand = 3, - DAC960_WriteRetryCommand = 4, - DAC960_MonitoringCommand = 5, - DAC960_ImmediateCommand = 6, - DAC960_QueuedCommand = 7 -} -DAC960_CommandType_T; - - -/* - Define the DAC960 Driver Command structure. -*/ - -typedef struct DAC960_Command -{ - int CommandIdentifier; - DAC960_CommandType_T CommandType; - struct DAC960_Controller *Controller; - struct DAC960_Command *Next; - struct completion *Completion; - unsigned int LogicalDriveNumber; - unsigned int BlockNumber; - unsigned int BlockCount; - unsigned int SegmentCount; - int DmaDirection; - struct scatterlist *cmd_sglist; - struct request *Request; - union { - struct { - DAC960_V1_CommandMailbox_T CommandMailbox; - DAC960_V1_KernelCommand_T *KernelCommand; - DAC960_V1_CommandStatus_T CommandStatus; - DAC960_V1_ScatterGatherSegment_T *ScatterGatherList; - dma_addr_t ScatterGatherListDMA; - struct scatterlist ScatterList[DAC960_V1_ScatterGatherLimit]; - unsigned int EndMarker[0]; - } V1; - struct { - DAC960_V2_CommandMailbox_T CommandMailbox; - DAC960_V2_KernelCommand_T *KernelCommand; - DAC960_V2_CommandStatus_T CommandStatus; - unsigned char RequestSenseLength; - int DataTransferResidue; - DAC960_V2_ScatterGatherSegment_T *ScatterGatherList; - dma_addr_t ScatterGatherListDMA; - DAC960_SCSI_RequestSense_T *RequestSense; - dma_addr_t RequestSenseDMA; - struct scatterlist ScatterList[DAC960_V2_ScatterGatherLimit]; - unsigned int EndMarker[0]; - } V2; - } FW; -} -DAC960_Command_T; - - -/* - Define the DAC960 Driver Controller structure. -*/ - -typedef struct DAC960_Controller -{ - void __iomem *BaseAddress; - void __iomem *MemoryMappedAddress; - DAC960_FirmwareType_T FirmwareType; - DAC960_HardwareType_T HardwareType; - DAC960_IO_Address_T IO_Address; - DAC960_PCI_Address_T PCI_Address; - struct pci_dev *PCIDevice; - unsigned char ControllerNumber; - unsigned char ControllerName[4]; - unsigned char ModelName[20]; - unsigned char FullModelName[28]; - unsigned char FirmwareVersion[12]; - unsigned char Bus; - unsigned char Device; - unsigned char Function; - unsigned char IRQ_Channel; - unsigned char Channels; - unsigned char Targets; - unsigned char MemorySize; - unsigned char LogicalDriveCount; - unsigned short CommandAllocationGroupSize; - unsigned short ControllerQueueDepth; - unsigned short DriverQueueDepth; - unsigned short MaxBlocksPerCommand; - unsigned short ControllerScatterGatherLimit; - unsigned short DriverScatterGatherLimit; - unsigned int CombinedStatusBufferLength; - unsigned int InitialStatusLength; - unsigned int CurrentStatusLength; - unsigned int ProgressBufferLength; - unsigned int UserStatusLength; - struct dma_loaf DmaPages; - unsigned long MonitoringTimerCount; - unsigned long PrimaryMonitoringTime; - unsigned long SecondaryMonitoringTime; - unsigned long ShutdownMonitoringTimer; - unsigned long LastProgressReportTime; - unsigned long LastCurrentStatusTime; - bool ControllerInitialized; - bool MonitoringCommandDeferred; - bool EphemeralProgressMessage; - bool DriveSpinUpMessageDisplayed; - bool MonitoringAlertMode; - bool SuppressEnclosureMessages; - struct timer_list MonitoringTimer; - struct gendisk *disks[DAC960_MaxLogicalDrives]; - struct dma_pool *ScatterGatherPool; - DAC960_Command_T *FreeCommands; - unsigned char *CombinedStatusBuffer; - unsigned char *CurrentStatusBuffer; - struct request_queue *RequestQueue[DAC960_MaxLogicalDrives]; - int req_q_index; - spinlock_t queue_lock; - wait_queue_head_t CommandWaitQueue; - wait_queue_head_t HealthStatusWaitQueue; - DAC960_Command_T InitialCommand; - DAC960_Command_T *Commands[DAC960_MaxDriverQueueDepth]; - struct proc_dir_entry *ControllerProcEntry; - bool LogicalDriveInitiallyAccessible[DAC960_MaxLogicalDrives]; - void (*QueueCommand)(DAC960_Command_T *Command); - bool (*ReadControllerConfiguration)(struct DAC960_Controller *); - bool (*ReadDeviceConfiguration)(struct DAC960_Controller *); - bool (*ReportDeviceConfiguration)(struct DAC960_Controller *); - void (*QueueReadWriteCommand)(DAC960_Command_T *Command); - union { - struct { - unsigned char GeometryTranslationHeads; - unsigned char GeometryTranslationSectors; - unsigned char PendingRebuildFlag; - unsigned short StripeSize; - unsigned short SegmentSize; - unsigned short NewEventLogSequenceNumber; - unsigned short OldEventLogSequenceNumber; - unsigned short DeviceStateChannel; - unsigned short DeviceStateTargetID; - bool DualModeMemoryMailboxInterface; - bool BackgroundInitializationStatusSupported; - bool SAFTE_EnclosureManagementEnabled; - bool NeedLogicalDriveInformation; - bool NeedErrorTableInformation; - bool NeedDeviceStateInformation; - bool NeedDeviceInquiryInformation; - bool NeedDeviceSerialNumberInformation; - bool NeedRebuildProgress; - bool NeedConsistencyCheckProgress; - bool NeedBackgroundInitializationStatus; - bool StartDeviceStateScan; - bool RebuildProgressFirst; - bool RebuildFlagPending; - bool RebuildStatusPending; - - dma_addr_t FirstCommandMailboxDMA; - DAC960_V1_CommandMailbox_T *FirstCommandMailbox; - DAC960_V1_CommandMailbox_T *LastCommandMailbox; - DAC960_V1_CommandMailbox_T *NextCommandMailbox; - DAC960_V1_CommandMailbox_T *PreviousCommandMailbox1; - DAC960_V1_CommandMailbox_T *PreviousCommandMailbox2; - - dma_addr_t FirstStatusMailboxDMA; - DAC960_V1_StatusMailbox_T *FirstStatusMailbox; - DAC960_V1_StatusMailbox_T *LastStatusMailbox; - DAC960_V1_StatusMailbox_T *NextStatusMailbox; - - DAC960_V1_DCDB_T *MonitoringDCDB; - dma_addr_t MonitoringDCDB_DMA; - - DAC960_V1_Enquiry_T Enquiry; - DAC960_V1_Enquiry_T *NewEnquiry; - dma_addr_t NewEnquiryDMA; - - DAC960_V1_ErrorTable_T ErrorTable; - DAC960_V1_ErrorTable_T *NewErrorTable; - dma_addr_t NewErrorTableDMA; - - DAC960_V1_EventLogEntry_T *EventLogEntry; - dma_addr_t EventLogEntryDMA; - - DAC960_V1_RebuildProgress_T *RebuildProgress; - dma_addr_t RebuildProgressDMA; - DAC960_V1_CommandStatus_T LastRebuildStatus; - DAC960_V1_CommandStatus_T PendingRebuildStatus; - - DAC960_V1_LogicalDriveInformationArray_T LogicalDriveInformation; - DAC960_V1_LogicalDriveInformationArray_T *NewLogicalDriveInformation; - dma_addr_t NewLogicalDriveInformationDMA; - - DAC960_V1_BackgroundInitializationStatus_T - *BackgroundInitializationStatus; - dma_addr_t BackgroundInitializationStatusDMA; - DAC960_V1_BackgroundInitializationStatus_T - LastBackgroundInitializationStatus; - - DAC960_V1_DeviceState_T - DeviceState[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - DAC960_V1_DeviceState_T *NewDeviceState; - dma_addr_t NewDeviceStateDMA; - - DAC960_SCSI_Inquiry_T - InquiryStandardData[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - DAC960_SCSI_Inquiry_T *NewInquiryStandardData; - dma_addr_t NewInquiryStandardDataDMA; - - DAC960_SCSI_Inquiry_UnitSerialNumber_T - InquiryUnitSerialNumber[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber; - dma_addr_t NewInquiryUnitSerialNumberDMA; - - int DeviceResetCount[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - bool DirectCommandActive[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets]; - } V1; - struct { - unsigned int StatusChangeCounter; - unsigned int NextEventSequenceNumber; - unsigned int PhysicalDeviceIndex; - bool NeedLogicalDeviceInformation; - bool NeedPhysicalDeviceInformation; - bool NeedDeviceSerialNumberInformation; - bool StartLogicalDeviceInformationScan; - bool StartPhysicalDeviceInformationScan; - struct dma_pool *RequestSensePool; - - dma_addr_t FirstCommandMailboxDMA; - DAC960_V2_CommandMailbox_T *FirstCommandMailbox; - DAC960_V2_CommandMailbox_T *LastCommandMailbox; - DAC960_V2_CommandMailbox_T *NextCommandMailbox; - DAC960_V2_CommandMailbox_T *PreviousCommandMailbox1; - DAC960_V2_CommandMailbox_T *PreviousCommandMailbox2; - - dma_addr_t FirstStatusMailboxDMA; - DAC960_V2_StatusMailbox_T *FirstStatusMailbox; - DAC960_V2_StatusMailbox_T *LastStatusMailbox; - DAC960_V2_StatusMailbox_T *NextStatusMailbox; - - dma_addr_t HealthStatusBufferDMA; - DAC960_V2_HealthStatusBuffer_T *HealthStatusBuffer; - - DAC960_V2_ControllerInfo_T ControllerInformation; - DAC960_V2_ControllerInfo_T *NewControllerInformation; - dma_addr_t NewControllerInformationDMA; - - DAC960_V2_LogicalDeviceInfo_T - *LogicalDeviceInformation[DAC960_MaxLogicalDrives]; - DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInformation; - dma_addr_t NewLogicalDeviceInformationDMA; - - DAC960_V2_PhysicalDeviceInfo_T - *PhysicalDeviceInformation[DAC960_V2_MaxPhysicalDevices]; - DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInformation; - dma_addr_t NewPhysicalDeviceInformationDMA; - - DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber; - dma_addr_t NewInquiryUnitSerialNumberDMA; - DAC960_SCSI_Inquiry_UnitSerialNumber_T - *InquiryUnitSerialNumber[DAC960_V2_MaxPhysicalDevices]; - - DAC960_V2_Event_T *Event; - dma_addr_t EventDMA; - - DAC960_V2_PhysicalToLogicalDevice_T *PhysicalToLogicalDevice; - dma_addr_t PhysicalToLogicalDeviceDMA; - - DAC960_V2_PhysicalDevice_T - LogicalDriveToVirtualDevice[DAC960_MaxLogicalDrives]; - bool LogicalDriveFoundDuringScan[DAC960_MaxLogicalDrives]; - } V2; - } FW; - unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; - unsigned char UserStatusBuffer[DAC960_UserMessageSize]; -} -DAC960_Controller_T; - - -/* - Simplify access to Firmware Version Dependent Data Structure Components - and Functions. -*/ - -#define V1 FW.V1 -#define V2 FW.V2 -#define DAC960_QueueCommand(Command) \ - (Controller->QueueCommand)(Command) -#define DAC960_ReadControllerConfiguration(Controller) \ - (Controller->ReadControllerConfiguration)(Controller) -#define DAC960_ReadDeviceConfiguration(Controller) \ - (Controller->ReadDeviceConfiguration)(Controller) -#define DAC960_ReportDeviceConfiguration(Controller) \ - (Controller->ReportDeviceConfiguration)(Controller) -#define DAC960_QueueReadWriteCommand(Command) \ - (Controller->QueueReadWriteCommand)(Command) - -/* - * dma_addr_writeql is provided to write dma_addr_t types - * to a 64-bit pci address space register. The controller - * will accept having the register written as two 32-bit - * values. - * - * In HIGHMEM kernels, dma_addr_t is a 64-bit value. - * without HIGHMEM, dma_addr_t is a 32-bit value. - * - * The compiler should always fix up the assignment - * to u.wq appropriately, depending upon the size of - * dma_addr_t. - */ -static inline -void dma_addr_writeql(dma_addr_t addr, void __iomem *write_address) -{ - union { - u64 wq; - uint wl[2]; - } u; - - u.wq = addr; - - writel(u.wl[0], write_address); - writel(u.wl[1], write_address + 4); -} - -/* - Define the DAC960 GEM Series Controller Interface Register Offsets. - */ - -#define DAC960_GEM_RegisterWindowSize 0x600 - -typedef enum -{ - DAC960_GEM_InboundDoorBellRegisterReadSetOffset = 0x214, - DAC960_GEM_InboundDoorBellRegisterClearOffset = 0x218, - DAC960_GEM_OutboundDoorBellRegisterReadSetOffset = 0x224, - DAC960_GEM_OutboundDoorBellRegisterClearOffset = 0x228, - DAC960_GEM_InterruptStatusRegisterOffset = 0x208, - DAC960_GEM_InterruptMaskRegisterReadSetOffset = 0x22C, - DAC960_GEM_InterruptMaskRegisterClearOffset = 0x230, - DAC960_GEM_CommandMailboxBusAddressOffset = 0x510, - DAC960_GEM_CommandStatusOffset = 0x518, - DAC960_GEM_ErrorStatusRegisterReadSetOffset = 0x224, - DAC960_GEM_ErrorStatusRegisterClearOffset = 0x228, -} -DAC960_GEM_RegisterOffsets_T; - -/* - Define the structure of the DAC960 GEM Series Inbound Door Bell - */ - -typedef union DAC960_GEM_InboundDoorBellRegister -{ - unsigned int All; - struct { - unsigned int :24; - bool HardwareMailboxNewCommand:1; - bool AcknowledgeHardwareMailboxStatus:1; - bool GenerateInterrupt:1; - bool ControllerReset:1; - bool MemoryMailboxNewCommand:1; - unsigned int :3; - } Write; - struct { - unsigned int :24; - bool HardwareMailboxFull:1; - bool InitializationInProgress:1; - unsigned int :6; - } Read; -} -DAC960_GEM_InboundDoorBellRegister_T; - -/* - Define the structure of the DAC960 GEM Series Outbound Door Bell Register. - */ -typedef union DAC960_GEM_OutboundDoorBellRegister -{ - unsigned int All; - struct { - unsigned int :24; - bool AcknowledgeHardwareMailboxInterrupt:1; - bool AcknowledgeMemoryMailboxInterrupt:1; - unsigned int :6; - } Write; - struct { - unsigned int :24; - bool HardwareMailboxStatusAvailable:1; - bool MemoryMailboxStatusAvailable:1; - unsigned int :6; - } Read; -} -DAC960_GEM_OutboundDoorBellRegister_T; - -/* - Define the structure of the DAC960 GEM Series Interrupt Mask Register. - */ -typedef union DAC960_GEM_InterruptMaskRegister -{ - unsigned int All; - struct { - unsigned int :16; - unsigned int :8; - unsigned int HardwareMailboxInterrupt:1; - unsigned int MemoryMailboxInterrupt:1; - unsigned int :6; - } Bits; -} -DAC960_GEM_InterruptMaskRegister_T; - -/* - Define the structure of the DAC960 GEM Series Error Status Register. - */ - -typedef union DAC960_GEM_ErrorStatusRegister -{ - unsigned int All; - struct { - unsigned int :24; - unsigned int :5; - bool ErrorStatusPending:1; - unsigned int :2; - } Bits; -} -DAC960_GEM_ErrorStatusRegister_T; - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 GEM Series Controller Interface Registers. -*/ - -static inline -void DAC960_GEM_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -void DAC960_GEM_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterClearOffset); -} - -static inline -void DAC960_GEM_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -void DAC960_GEM_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -void DAC960_GEM_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset); -} - -static inline -bool DAC960_GEM_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_InboundDoorBellRegisterReadSetOffset); - return InboundDoorBellRegister.Read.HardwareMailboxFull; -} - -static inline -bool DAC960_GEM_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_InboundDoorBellRegisterReadSetOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset); -} - -static inline -void DAC960_GEM_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset); -} - -static inline -void DAC960_GEM_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset); -} - -static inline -bool DAC960_GEM_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_OutboundDoorBellRegisterReadSetOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_GEM_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_OutboundDoorBellRegisterReadSetOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_GEM_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.HardwareMailboxInterrupt = true; - InterruptMaskRegister.Bits.MemoryMailboxInterrupt = true; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_GEM_InterruptMaskRegisterClearOffset); -} - -static inline -void DAC960_GEM_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.HardwareMailboxInterrupt = true; - InterruptMaskRegister.Bits.MemoryMailboxInterrupt = true; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_GEM_InterruptMaskRegisterReadSetOffset); -} - -static inline -bool DAC960_GEM_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readl(ControllerBaseAddress + - DAC960_GEM_InterruptMaskRegisterReadSetOffset); - return !(InterruptMaskRegister.Bits.HardwareMailboxInterrupt || - InterruptMaskRegister.Bits.MemoryMailboxInterrupt); -} - -static inline -void DAC960_GEM_WriteCommandMailbox(DAC960_V2_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V2_CommandMailbox_T - *CommandMailbox) -{ - memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1], - sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int)); - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_GEM_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - dma_addr_t CommandMailboxDMA) -{ - dma_addr_writeql(CommandMailboxDMA, - ControllerBaseAddress + - DAC960_GEM_CommandMailboxBusAddressOffset); -} - -static inline DAC960_V2_CommandIdentifier_T -DAC960_GEM_ReadCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_GEM_CommandStatusOffset); -} - -static inline DAC960_V2_CommandStatus_T -DAC960_GEM_ReadCommandStatus(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_GEM_CommandStatusOffset + 2); -} - -static inline bool -DAC960_GEM_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_GEM_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readl(ControllerBaseAddress + DAC960_GEM_ErrorStatusRegisterReadSetOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_GEM_CommandMailboxBusAddressOffset + 0); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_GEM_CommandMailboxBusAddressOffset + 1); - writel(0x03000000, ControllerBaseAddress + - DAC960_GEM_ErrorStatusRegisterClearOffset); - return true; -} - -/* - Define the DAC960 BA Series Controller Interface Register Offsets. -*/ - -#define DAC960_BA_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_BA_InboundDoorBellRegisterOffset = 0x60, - DAC960_BA_OutboundDoorBellRegisterOffset = 0x61, - DAC960_BA_InterruptStatusRegisterOffset = 0x30, - DAC960_BA_InterruptMaskRegisterOffset = 0x34, - DAC960_BA_CommandMailboxBusAddressOffset = 0x50, - DAC960_BA_CommandStatusOffset = 0x58, - DAC960_BA_ErrorStatusRegisterOffset = 0x63 -} -DAC960_BA_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 BA Series Inbound Door Bell Register. -*/ - -typedef union DAC960_BA_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned char :3; /* Bits 5-7 */ - } Write; - struct { - bool HardwareMailboxEmpty:1; /* Bit 0 */ - bool InitializationNotInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_BA_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 BA Series Outbound Door Bell Register. -*/ - -typedef union DAC960_BA_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_BA_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 BA Series Interrupt Mask Register. -*/ - -typedef union DAC960_BA_InterruptMaskRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - bool DisableInterruptsI2O:1; /* Bit 3 */ - unsigned int :4; /* Bits 4-7 */ - } Bits; -} -DAC960_BA_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 BA Series Error Status Register. -*/ - -typedef union DAC960_BA_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_BA_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 BA Series Controller Interface Registers. -*/ - -static inline -void DAC960_BA_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_BA_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.HardwareMailboxEmpty; -} - -static inline -bool DAC960_BA_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.InitializationNotInProgress; -} - -static inline -void DAC960_BA_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_BA_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_BA_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_BA_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_BA_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = false; - InterruptMaskRegister.Bits.DisableInterruptsI2O = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_BA_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = true; - InterruptMaskRegister.Bits.DisableInterruptsI2O = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_BA_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readb(ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_BA_WriteCommandMailbox(DAC960_V2_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V2_CommandMailbox_T - *CommandMailbox) -{ - memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1], - sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int)); - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - - -static inline -void DAC960_BA_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - dma_addr_t CommandMailboxDMA) -{ - dma_addr_writeql(CommandMailboxDMA, - ControllerBaseAddress + - DAC960_BA_CommandMailboxBusAddressOffset); -} - -static inline DAC960_V2_CommandIdentifier_T -DAC960_BA_ReadCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_BA_CommandStatusOffset); -} - -static inline DAC960_V2_CommandStatus_T -DAC960_BA_ReadCommandStatus(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_BA_CommandStatusOffset + 2); -} - -static inline bool -DAC960_BA_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_BA_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_BA_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_BA_CommandMailboxBusAddressOffset + 0); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_BA_CommandMailboxBusAddressOffset + 1); - writeb(0xFF, ControllerBaseAddress + DAC960_BA_ErrorStatusRegisterOffset); - return true; -} - - -/* - Define the DAC960 LP Series Controller Interface Register Offsets. -*/ - -#define DAC960_LP_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_LP_InboundDoorBellRegisterOffset = 0x20, - DAC960_LP_OutboundDoorBellRegisterOffset = 0x2C, - DAC960_LP_InterruptStatusRegisterOffset = 0x30, - DAC960_LP_InterruptMaskRegisterOffset = 0x34, - DAC960_LP_CommandMailboxBusAddressOffset = 0x10, - DAC960_LP_CommandStatusOffset = 0x18, - DAC960_LP_ErrorStatusRegisterOffset = 0x2E -} -DAC960_LP_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 LP Series Inbound Door Bell Register. -*/ - -typedef union DAC960_LP_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned char :3; /* Bits 5-7 */ - } Write; - struct { - bool HardwareMailboxFull:1; /* Bit 0 */ - bool InitializationInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LP_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LP Series Outbound Door Bell Register. -*/ - -typedef union DAC960_LP_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LP_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LP Series Interrupt Mask Register. -*/ - -typedef union DAC960_LP_InterruptMaskRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LP_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 LP Series Error Status Register. -*/ - -typedef union DAC960_LP_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LP_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 LP Series Controller Interface Registers. -*/ - -static inline -void DAC960_LP_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LP_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.HardwareMailboxFull; -} - -static inline -bool DAC960_LP_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_LP_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LP_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LP_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_LP_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_LP_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = false; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_LP_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_LP_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readb(ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_LP_WriteCommandMailbox(DAC960_V2_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V2_CommandMailbox_T - *CommandMailbox) -{ - memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1], - sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int)); - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_LP_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - dma_addr_t CommandMailboxDMA) -{ - dma_addr_writeql(CommandMailboxDMA, - ControllerBaseAddress + - DAC960_LP_CommandMailboxBusAddressOffset); -} - -static inline DAC960_V2_CommandIdentifier_T -DAC960_LP_ReadCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_LP_CommandStatusOffset); -} - -static inline DAC960_V2_CommandStatus_T -DAC960_LP_ReadCommandStatus(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_LP_CommandStatusOffset + 2); -} - -static inline bool -DAC960_LP_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_LP_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_LP_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_LP_CommandMailboxBusAddressOffset + 0); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_LP_CommandMailboxBusAddressOffset + 1); - writeb(0xFF, ControllerBaseAddress + DAC960_LP_ErrorStatusRegisterOffset); - return true; -} - - -/* - Define the DAC960 LA Series Controller Interface Register Offsets. -*/ - -#define DAC960_LA_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_LA_InboundDoorBellRegisterOffset = 0x60, - DAC960_LA_OutboundDoorBellRegisterOffset = 0x61, - DAC960_LA_InterruptMaskRegisterOffset = 0x34, - DAC960_LA_CommandOpcodeRegisterOffset = 0x50, - DAC960_LA_CommandIdentifierRegisterOffset = 0x51, - DAC960_LA_MailboxRegister2Offset = 0x52, - DAC960_LA_MailboxRegister3Offset = 0x53, - DAC960_LA_MailboxRegister4Offset = 0x54, - DAC960_LA_MailboxRegister5Offset = 0x55, - DAC960_LA_MailboxRegister6Offset = 0x56, - DAC960_LA_MailboxRegister7Offset = 0x57, - DAC960_LA_MailboxRegister8Offset = 0x58, - DAC960_LA_MailboxRegister9Offset = 0x59, - DAC960_LA_MailboxRegister10Offset = 0x5A, - DAC960_LA_MailboxRegister11Offset = 0x5B, - DAC960_LA_MailboxRegister12Offset = 0x5C, - DAC960_LA_StatusCommandIdentifierRegOffset = 0x5D, - DAC960_LA_StatusRegisterOffset = 0x5E, - DAC960_LA_ErrorStatusRegisterOffset = 0x63 -} -DAC960_LA_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 LA Series Inbound Door Bell Register. -*/ - -typedef union DAC960_LA_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned char :3; /* Bits 5-7 */ - } Write; - struct { - bool HardwareMailboxEmpty:1; /* Bit 0 */ - bool InitializationNotInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LA_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LA Series Outbound Door Bell Register. -*/ - -typedef union DAC960_LA_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_LA_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 LA Series Interrupt Mask Register. -*/ - -typedef union DAC960_LA_InterruptMaskRegister -{ - unsigned char All; - struct { - unsigned char :2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - unsigned char :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LA_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 LA Series Error Status Register. -*/ - -typedef union DAC960_LA_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_LA_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 LA Series Controller Interface Registers. -*/ - -static inline -void DAC960_LA_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LA_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.HardwareMailboxEmpty; -} - -static inline -bool DAC960_LA_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset); - return !InboundDoorBellRegister.Read.InitializationNotInProgress; -} - -static inline -void DAC960_LA_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_LA_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_LA_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_LA_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_LA_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = false; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_LA_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0xFF; - InterruptMaskRegister.Bits.DisableInterrupts = true; - writeb(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_LA_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readb(ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_LA_WriteCommandMailbox(DAC960_V1_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - MemoryCommandMailbox->Words[1] = CommandMailbox->Words[1]; - MemoryCommandMailbox->Words[2] = CommandMailbox->Words[2]; - MemoryCommandMailbox->Words[3] = CommandMailbox->Words[3]; - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_LA_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - DAC960_V1_CommandMailbox_T *CommandMailbox) -{ - writel(CommandMailbox->Words[0], - ControllerBaseAddress + DAC960_LA_CommandOpcodeRegisterOffset); - writel(CommandMailbox->Words[1], - ControllerBaseAddress + DAC960_LA_MailboxRegister4Offset); - writel(CommandMailbox->Words[2], - ControllerBaseAddress + DAC960_LA_MailboxRegister8Offset); - writeb(CommandMailbox->Bytes[12], - ControllerBaseAddress + DAC960_LA_MailboxRegister12Offset); -} - -static inline DAC960_V1_CommandIdentifier_T -DAC960_LA_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readb(ControllerBaseAddress - + DAC960_LA_StatusCommandIdentifierRegOffset); -} - -static inline DAC960_V1_CommandStatus_T -DAC960_LA_ReadStatusRegister(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_LA_StatusRegisterOffset); -} - -static inline bool -DAC960_LA_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_LA_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_LA_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_LA_CommandOpcodeRegisterOffset); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_LA_CommandIdentifierRegisterOffset); - writeb(0xFF, ControllerBaseAddress + DAC960_LA_ErrorStatusRegisterOffset); - return true; -} - -/* - Define the DAC960 PG Series Controller Interface Register Offsets. -*/ - -#define DAC960_PG_RegisterWindowSize 0x2000 - -typedef enum -{ - DAC960_PG_InboundDoorBellRegisterOffset = 0x0020, - DAC960_PG_OutboundDoorBellRegisterOffset = 0x002C, - DAC960_PG_InterruptMaskRegisterOffset = 0x0034, - DAC960_PG_CommandOpcodeRegisterOffset = 0x1000, - DAC960_PG_CommandIdentifierRegisterOffset = 0x1001, - DAC960_PG_MailboxRegister2Offset = 0x1002, - DAC960_PG_MailboxRegister3Offset = 0x1003, - DAC960_PG_MailboxRegister4Offset = 0x1004, - DAC960_PG_MailboxRegister5Offset = 0x1005, - DAC960_PG_MailboxRegister6Offset = 0x1006, - DAC960_PG_MailboxRegister7Offset = 0x1007, - DAC960_PG_MailboxRegister8Offset = 0x1008, - DAC960_PG_MailboxRegister9Offset = 0x1009, - DAC960_PG_MailboxRegister10Offset = 0x100A, - DAC960_PG_MailboxRegister11Offset = 0x100B, - DAC960_PG_MailboxRegister12Offset = 0x100C, - DAC960_PG_StatusCommandIdentifierRegOffset = 0x1018, - DAC960_PG_StatusRegisterOffset = 0x101A, - DAC960_PG_ErrorStatusRegisterOffset = 0x103F -} -DAC960_PG_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 PG Series Inbound Door Bell Register. -*/ - -typedef union DAC960_PG_InboundDoorBellRegister -{ - unsigned int All; - struct { - bool HardwareMailboxNewCommand:1; /* Bit 0 */ - bool AcknowledgeHardwareMailboxStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - bool MemoryMailboxNewCommand:1; /* Bit 4 */ - unsigned int :27; /* Bits 5-31 */ - } Write; - struct { - bool HardwareMailboxFull:1; /* Bit 0 */ - bool InitializationInProgress:1; /* Bit 1 */ - unsigned int :30; /* Bits 2-31 */ - } Read; -} -DAC960_PG_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PG Series Outbound Door Bell Register. -*/ - -typedef union DAC960_PG_OutboundDoorBellRegister -{ - unsigned int All; - struct { - bool AcknowledgeHardwareMailboxInterrupt:1; /* Bit 0 */ - bool AcknowledgeMemoryMailboxInterrupt:1; /* Bit 1 */ - unsigned int :30; /* Bits 2-31 */ - } Write; - struct { - bool HardwareMailboxStatusAvailable:1; /* Bit 0 */ - bool MemoryMailboxStatusAvailable:1; /* Bit 1 */ - unsigned int :30; /* Bits 2-31 */ - } Read; -} -DAC960_PG_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PG Series Interrupt Mask Register. -*/ - -typedef union DAC960_PG_InterruptMaskRegister -{ - unsigned int All; - struct { - unsigned int MessageUnitInterruptMask1:2; /* Bits 0-1 */ - bool DisableInterrupts:1; /* Bit 2 */ - unsigned int MessageUnitInterruptMask2:5; /* Bits 3-7 */ - unsigned int Reserved0:24; /* Bits 8-31 */ - } Bits; -} -DAC960_PG_InterruptMaskRegister_T; - - -/* - Define the structure of the DAC960 PG Series Error Status Register. -*/ - -typedef union DAC960_PG_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_PG_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 PG Series Controller Interface Registers. -*/ - -static inline -void DAC960_PG_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true; - writel(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PG_HardwareMailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.HardwareMailboxFull; -} - -static inline -bool DAC960_PG_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_PG_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PG_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true; - OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true; - writel(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PG_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable; -} - -static inline -bool DAC960_PG_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readl(ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable; -} - -static inline -void DAC960_PG_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.MessageUnitInterruptMask1 = 0x3; - InterruptMaskRegister.Bits.DisableInterrupts = false; - InterruptMaskRegister.Bits.MessageUnitInterruptMask2 = 0x1F; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset); -} - -static inline -void DAC960_PG_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = 0; - InterruptMaskRegister.Bits.MessageUnitInterruptMask1 = 0x3; - InterruptMaskRegister.Bits.DisableInterrupts = true; - InterruptMaskRegister.Bits.MessageUnitInterruptMask2 = 0x1F; - writel(InterruptMaskRegister.All, - ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset); -} - -static inline -bool DAC960_PG_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister; - InterruptMaskRegister.All = - readl(ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset); - return !InterruptMaskRegister.Bits.DisableInterrupts; -} - -static inline -void DAC960_PG_WriteCommandMailbox(DAC960_V1_CommandMailbox_T - *MemoryCommandMailbox, - DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - MemoryCommandMailbox->Words[1] = CommandMailbox->Words[1]; - MemoryCommandMailbox->Words[2] = CommandMailbox->Words[2]; - MemoryCommandMailbox->Words[3] = CommandMailbox->Words[3]; - wmb(); - MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0]; - mb(); -} - -static inline -void DAC960_PG_WriteHardwareMailbox(void __iomem *ControllerBaseAddress, - DAC960_V1_CommandMailbox_T *CommandMailbox) -{ - writel(CommandMailbox->Words[0], - ControllerBaseAddress + DAC960_PG_CommandOpcodeRegisterOffset); - writel(CommandMailbox->Words[1], - ControllerBaseAddress + DAC960_PG_MailboxRegister4Offset); - writel(CommandMailbox->Words[2], - ControllerBaseAddress + DAC960_PG_MailboxRegister8Offset); - writeb(CommandMailbox->Bytes[12], - ControllerBaseAddress + DAC960_PG_MailboxRegister12Offset); -} - -static inline DAC960_V1_CommandIdentifier_T -DAC960_PG_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readb(ControllerBaseAddress - + DAC960_PG_StatusCommandIdentifierRegOffset); -} - -static inline DAC960_V1_CommandStatus_T -DAC960_PG_ReadStatusRegister(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_PG_StatusRegisterOffset); -} - -static inline bool -DAC960_PG_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_PG_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_PG_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_PG_CommandOpcodeRegisterOffset); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_PG_CommandIdentifierRegisterOffset); - writeb(0, ControllerBaseAddress + DAC960_PG_ErrorStatusRegisterOffset); - return true; -} - -/* - Define the DAC960 PD Series Controller Interface Register Offsets. -*/ - -#define DAC960_PD_RegisterWindowSize 0x80 - -typedef enum -{ - DAC960_PD_CommandOpcodeRegisterOffset = 0x00, - DAC960_PD_CommandIdentifierRegisterOffset = 0x01, - DAC960_PD_MailboxRegister2Offset = 0x02, - DAC960_PD_MailboxRegister3Offset = 0x03, - DAC960_PD_MailboxRegister4Offset = 0x04, - DAC960_PD_MailboxRegister5Offset = 0x05, - DAC960_PD_MailboxRegister6Offset = 0x06, - DAC960_PD_MailboxRegister7Offset = 0x07, - DAC960_PD_MailboxRegister8Offset = 0x08, - DAC960_PD_MailboxRegister9Offset = 0x09, - DAC960_PD_MailboxRegister10Offset = 0x0A, - DAC960_PD_MailboxRegister11Offset = 0x0B, - DAC960_PD_MailboxRegister12Offset = 0x0C, - DAC960_PD_StatusCommandIdentifierRegOffset = 0x0D, - DAC960_PD_StatusRegisterOffset = 0x0E, - DAC960_PD_ErrorStatusRegisterOffset = 0x3F, - DAC960_PD_InboundDoorBellRegisterOffset = 0x40, - DAC960_PD_OutboundDoorBellRegisterOffset = 0x41, - DAC960_PD_InterruptEnableRegisterOffset = 0x43 -} -DAC960_PD_RegisterOffsets_T; - - -/* - Define the structure of the DAC960 PD Series Inbound Door Bell Register. -*/ - -typedef union DAC960_PD_InboundDoorBellRegister -{ - unsigned char All; - struct { - bool NewCommand:1; /* Bit 0 */ - bool AcknowledgeStatus:1; /* Bit 1 */ - bool GenerateInterrupt:1; /* Bit 2 */ - bool ControllerReset:1; /* Bit 3 */ - unsigned char :4; /* Bits 4-7 */ - } Write; - struct { - bool MailboxFull:1; /* Bit 0 */ - bool InitializationInProgress:1; /* Bit 1 */ - unsigned char :6; /* Bits 2-7 */ - } Read; -} -DAC960_PD_InboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PD Series Outbound Door Bell Register. -*/ - -typedef union DAC960_PD_OutboundDoorBellRegister -{ - unsigned char All; - struct { - bool AcknowledgeInterrupt:1; /* Bit 0 */ - unsigned char :7; /* Bits 1-7 */ - } Write; - struct { - bool StatusAvailable:1; /* Bit 0 */ - unsigned char :7; /* Bits 1-7 */ - } Read; -} -DAC960_PD_OutboundDoorBellRegister_T; - - -/* - Define the structure of the DAC960 PD Series Interrupt Enable Register. -*/ - -typedef union DAC960_PD_InterruptEnableRegister -{ - unsigned char All; - struct { - bool EnableInterrupts:1; /* Bit 0 */ - unsigned char :7; /* Bits 1-7 */ - } Bits; -} -DAC960_PD_InterruptEnableRegister_T; - - -/* - Define the structure of the DAC960 PD Series Error Status Register. -*/ - -typedef union DAC960_PD_ErrorStatusRegister -{ - unsigned char All; - struct { - unsigned int :2; /* Bits 0-1 */ - bool ErrorStatusPending:1; /* Bit 2 */ - unsigned int :5; /* Bits 3-7 */ - } Bits; -} -DAC960_PD_ErrorStatusRegister_T; - - -/* - Define inline functions to provide an abstraction for reading and writing the - DAC960 PD Series Controller Interface Registers. -*/ - -static inline -void DAC960_PD_NewCommand(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.NewCommand = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PD_AcknowledgeStatus(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.AcknowledgeStatus = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PD_GenerateInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.GenerateInterrupt = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -void DAC960_PD_ControllerReset(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = 0; - InboundDoorBellRegister.Write.ControllerReset = true; - writeb(InboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PD_MailboxFullP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.MailboxFull; -} - -static inline -bool DAC960_PD_InitializationInProgressP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister; - InboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset); - return InboundDoorBellRegister.Read.InitializationInProgress; -} - -static inline -void DAC960_PD_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = 0; - OutboundDoorBellRegister.Write.AcknowledgeInterrupt = true; - writeb(OutboundDoorBellRegister.All, - ControllerBaseAddress + DAC960_PD_OutboundDoorBellRegisterOffset); -} - -static inline -bool DAC960_PD_StatusAvailableP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_OutboundDoorBellRegister_T OutboundDoorBellRegister; - OutboundDoorBellRegister.All = - readb(ControllerBaseAddress + DAC960_PD_OutboundDoorBellRegisterOffset); - return OutboundDoorBellRegister.Read.StatusAvailable; -} - -static inline -void DAC960_PD_EnableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister; - InterruptEnableRegister.All = 0; - InterruptEnableRegister.Bits.EnableInterrupts = true; - writeb(InterruptEnableRegister.All, - ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset); -} - -static inline -void DAC960_PD_DisableInterrupts(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister; - InterruptEnableRegister.All = 0; - InterruptEnableRegister.Bits.EnableInterrupts = false; - writeb(InterruptEnableRegister.All, - ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset); -} - -static inline -bool DAC960_PD_InterruptsEnabledP(void __iomem *ControllerBaseAddress) -{ - DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister; - InterruptEnableRegister.All = - readb(ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset); - return InterruptEnableRegister.Bits.EnableInterrupts; -} - -static inline -void DAC960_PD_WriteCommandMailbox(void __iomem *ControllerBaseAddress, - DAC960_V1_CommandMailbox_T *CommandMailbox) -{ - writel(CommandMailbox->Words[0], - ControllerBaseAddress + DAC960_PD_CommandOpcodeRegisterOffset); - writel(CommandMailbox->Words[1], - ControllerBaseAddress + DAC960_PD_MailboxRegister4Offset); - writel(CommandMailbox->Words[2], - ControllerBaseAddress + DAC960_PD_MailboxRegister8Offset); - writeb(CommandMailbox->Bytes[12], - ControllerBaseAddress + DAC960_PD_MailboxRegister12Offset); -} - -static inline DAC960_V1_CommandIdentifier_T -DAC960_PD_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress) -{ - return readb(ControllerBaseAddress - + DAC960_PD_StatusCommandIdentifierRegOffset); -} - -static inline DAC960_V1_CommandStatus_T -DAC960_PD_ReadStatusRegister(void __iomem *ControllerBaseAddress) -{ - return readw(ControllerBaseAddress + DAC960_PD_StatusRegisterOffset); -} - -static inline bool -DAC960_PD_ReadErrorStatus(void __iomem *ControllerBaseAddress, - unsigned char *ErrorStatus, - unsigned char *Parameter0, - unsigned char *Parameter1) -{ - DAC960_PD_ErrorStatusRegister_T ErrorStatusRegister; - ErrorStatusRegister.All = - readb(ControllerBaseAddress + DAC960_PD_ErrorStatusRegisterOffset); - if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false; - ErrorStatusRegister.Bits.ErrorStatusPending = false; - *ErrorStatus = ErrorStatusRegister.All; - *Parameter0 = - readb(ControllerBaseAddress + DAC960_PD_CommandOpcodeRegisterOffset); - *Parameter1 = - readb(ControllerBaseAddress + DAC960_PD_CommandIdentifierRegisterOffset); - writeb(0, ControllerBaseAddress + DAC960_PD_ErrorStatusRegisterOffset); - return true; -} - -static inline void DAC960_P_To_PD_TranslateEnquiry(void *Enquiry) -{ - memcpy(Enquiry + 132, Enquiry + 36, 64); - memset(Enquiry + 36, 0, 96); -} - -static inline void DAC960_P_To_PD_TranslateDeviceState(void *DeviceState) -{ - memcpy(DeviceState + 2, DeviceState + 3, 1); - memmove(DeviceState + 4, DeviceState + 5, 2); - memmove(DeviceState + 6, DeviceState + 8, 4); -} - -static inline -void DAC960_PD_To_P_TranslateReadWriteCommand(DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - int LogicalDriveNumber = CommandMailbox->Type5.LD.LogicalDriveNumber; - CommandMailbox->Bytes[3] &= 0x7; - CommandMailbox->Bytes[3] |= CommandMailbox->Bytes[7] << 6; - CommandMailbox->Bytes[7] = LogicalDriveNumber; -} - -static inline -void DAC960_P_To_PD_TranslateReadWriteCommand(DAC960_V1_CommandMailbox_T - *CommandMailbox) -{ - int LogicalDriveNumber = CommandMailbox->Bytes[7]; - CommandMailbox->Bytes[7] = CommandMailbox->Bytes[3] >> 6; - CommandMailbox->Bytes[3] &= 0x7; - CommandMailbox->Bytes[3] |= LogicalDriveNumber << 3; -} - - -/* - Define prototypes for the forward referenced DAC960 Driver Internal Functions. -*/ - -static void DAC960_FinalizeController(DAC960_Controller_T *); -static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *); -static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *); -static void DAC960_RequestFunction(struct request_queue *); -static irqreturn_t DAC960_BA_InterruptHandler(int, void *); -static irqreturn_t DAC960_LP_InterruptHandler(int, void *); -static irqreturn_t DAC960_LA_InterruptHandler(int, void *); -static irqreturn_t DAC960_PG_InterruptHandler(int, void *); -static irqreturn_t DAC960_PD_InterruptHandler(int, void *); -static irqreturn_t DAC960_P_InterruptHandler(int, void *); -static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *); -static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *); -static void DAC960_MonitoringTimerFunction(struct timer_list *); -static void DAC960_Message(DAC960_MessageLevel_T, unsigned char *, - DAC960_Controller_T *, ...); -static void DAC960_CreateProcEntries(DAC960_Controller_T *); -static void DAC960_DestroyProcEntries(DAC960_Controller_T *); - -#endif /* DAC960_DriverVersion */ diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 8aec33277fda..20bb4bfa4be6 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -121,18 +121,6 @@ source "drivers/block/mtip32xx/Kconfig" source "drivers/block/zram/Kconfig" -config BLK_DEV_DAC960 - tristate "Mylex DAC960/DAC1100 PCI RAID Controller support" - depends on PCI - help - This driver adds support for the Mylex DAC960, AcceleRAID, and - eXtremeRAID PCI RAID controllers. See the file - for further information - about this driver. - - To compile this driver as a module, choose M here: the - module will be called DAC960. - config BLK_DEV_UMEM tristate "Micro Memory MM5415 Battery Backed RAM support" depends on PCI diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 8566b188368b..a53cc1e3a2d3 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -16,7 +16,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o -- cgit From 3045c0d05e728134aefb8adbbc56a4d876a0bdce Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 17 Oct 2018 11:34:15 -0700 Subject: nvme-pci: remove duplicate check This is a cleanup patch doesn't change any functionality. It removes the duplicate call to the blk_integrity_rq() in the nvme_map_data(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 72737009b82d..4e023cd007e1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -772,10 +772,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) goto out_unmap; - } - if (blk_integrity_rq(req)) cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); + } + return BLK_STS_OK; out_unmap: -- cgit From ecb0a83e3198f2c1142901687afacbc73602a13b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 22:55:03 +0200 Subject: ubd: remove use of blk_rq_map_sg There is no good reason to create a scatterlist in the ubd driver, it can just iterate the request directly. Signed-off-by: Christoph Hellwig [rw: Folded in improvements as discussed with hch and jens] Signed-off-by: Richard Weinberger Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 158 ++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 104 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 9cb0cabb4e02..74c002ddc0ce 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -160,12 +160,6 @@ struct ubd { spinlock_t lock; }; -struct ubd_pdu { - struct scatterlist sg[MAX_SG]; - int start_sg, end_sg; - sector_t rq_pos; -}; - #define DEFAULT_COW { \ .file = NULL, \ .fd = -1, \ @@ -197,9 +191,6 @@ static struct proc_dir_entry *proc_ide = NULL; static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd); -static int ubd_init_request(struct blk_mq_tag_set *set, - struct request *req, unsigned int hctx_idx, - unsigned int numa_node); static void make_proc_ide(void) { @@ -895,7 +886,6 @@ static int ubd_disk_register(int major, u64 size, int unit, static const struct blk_mq_ops ubd_mq_ops = { .queue_rq = ubd_queue_rq, - .init_request = ubd_init_request, }; static int ubd_add(int n, char **error_out) @@ -918,7 +908,6 @@ static int ubd_add(int n, char **error_out) ubd_dev->tag_set.queue_depth = 64; ubd_dev->tag_set.numa_node = NUMA_NO_NODE; ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ubd_dev->tag_set.cmd_size = sizeof(struct ubd_pdu); ubd_dev->tag_set.driver_data = ubd_dev; ubd_dev->tag_set.nr_hw_queues = 1; @@ -1300,123 +1289,84 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, req->bitmap_words, bitmap_len); } -/* Called with dev->lock held */ -static void prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, + u64 off, struct bio_vec *bvec) { - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; - - io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->fds[1] = ubd_dev->fd; - io_req->cow_offset = -1; - io_req->offset = offset; - io_req->length = len; - io_req->error = 0; - io_req->sector_mask = 0; - - io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; - io_req->offsets[0] = 0; - io_req->offsets[1] = ubd_dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; - io_req->sectorsize = 1 << 9; - - if(ubd_dev->cow.file != NULL) - cowify_req(io_req, ubd_dev->cow.bitmap, - ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); - -} + struct ubd *dev = hctx->queue->queuedata; + struct io_thread_req *io_req; + int ret; -/* Called with dev->lock held */ -static void prepare_flush_request(struct request *req, - struct io_thread_req *io_req) -{ - struct gendisk *disk = req->rq_disk; - struct ubd *ubd_dev = disk->private_data; + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (!io_req) + return -ENOMEM; io_req->req = req; - io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : - ubd_dev->fd; - io_req->op = UBD_FLUSH; -} - -static void submit_request(struct io_thread_req *io_req, struct ubd *dev) -{ - int n = os_write_file(thread_fd, &io_req, - sizeof(io_req)); + if (dev->cow.file) + io_req->fds[0] = dev->cow.fd; + else + io_req->fds[0] = dev->fd; - if (n != sizeof(io_req)) { - if (n != -EAGAIN) - pr_err("write to io thread failed: %d\n", -n); + if (req_op(req) == REQ_OP_FLUSH) { + io_req->op = UBD_FLUSH; + } else { + io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; + io_req->offset = off; + io_req->length = bvec->bv_len; + io_req->error = 0; + io_req->sector_mask = 0; + + io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; + io_req->sectorsize = 1 << 9; + + if (dev->cow.file) { + cowify_req(io_req, dev->cow.bitmap, + dev->cow.bitmap_offset, dev->cow.bitmap_len); + } + } - blk_mq_requeue_request(io_req->req, true); + ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (ret != sizeof(io_req)) { + if (ret != -EAGAIN) + pr_err("write to io thread failed: %d\n", -ret); kfree(io_req); } + + return ret; } static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *req = bd->rq; - struct ubd *dev = hctx->queue->queuedata; - struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); - struct io_thread_req *io_req; + int ret = 0; blk_mq_start_request(req); - pdu->rq_pos = blk_rq_pos(req); - pdu->start_sg = 0; - pdu->end_sg = blk_rq_map_sg(req->q, req, pdu->sg); - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); - if (io_req == NULL) { - blk_mq_requeue_request(req, true); - goto done; + ret = ubd_queue_one_vec(hctx, req, 0, NULL); + } else { + struct req_iterator iter; + struct bio_vec bvec; + u64 off = (u64)blk_rq_pos(req) << 9; + + rq_for_each_segment(bvec, req, iter) { + ret = ubd_queue_one_vec(hctx, req, off, &bvec); + if (ret < 0) + goto out; + off += bvec.bv_len; } - prepare_flush_request(req, io_req); - submit_request(io_req, dev); - - goto done; } - - while (pdu->start_sg < pdu->end_sg) { - struct scatterlist *sg = &pdu->sg[pdu->start_sg]; - - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - blk_mq_requeue_request(req, true); - goto done; - } - prepare_request(req, io_req, - (unsigned long long)pdu->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); - - submit_request(io_req, dev); - - pdu->rq_pos += sg->length >> 9; - pdu->start_sg++; +out: + if (ret < 0) { + blk_mq_requeue_request(req, true); } - -done: return BLK_STS_OK; } -static int ubd_init_request(struct blk_mq_tag_set *set, - struct request *req, unsigned int hctx_idx, - unsigned int numa_node) -{ - struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); - - sg_init_table(pdu->sg, MAX_SG); - - return 0; -} - static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct ubd *ubd_dev = bdev->bd_disk->private_data; -- cgit From 138126214868a741691edec92d4c94d428213302 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 15:15:11 +0200 Subject: skd: switch to the generic DMA API The PCI DMA API is deprecated, switch to the generic DMA API instead. Also make use of the dma_set_mask_and_coherent helper to easily set the streaming an coherent DMA masks together. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/skd_main.c | 63 +++++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 38 deletions(-) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 80a5806ede03..7c5fc6942f32 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -632,7 +632,7 @@ static bool skd_preop_sg_list(struct skd_device *skdev, * Map scatterlist to PCI bus addresses. * Note PCI might change the number of entries. */ - n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir); + n_sg = dma_map_sg(&skdev->pdev->dev, sgl, n_sg, skreq->data_dir); if (n_sg <= 0) return false; @@ -682,7 +682,8 @@ static void skd_postop_sg_list(struct skd_device *skdev, skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr = skreq->sksg_dma_address + ((skreq->n_sg) * sizeof(struct fit_sg_descriptor)); - pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir); + dma_unmap_sg(&skdev->pdev->dev, &skreq->sg[0], skreq->n_sg, + skreq->data_dir); } /* @@ -2632,8 +2633,8 @@ static int skd_cons_skcomp(struct skd_device *skdev) "comp pci_alloc, total bytes %zd entries %d\n", SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY); - skcomp = pci_zalloc_consistent(skdev->pdev, SKD_SKCOMP_SIZE, - &skdev->cq_dma_address); + skcomp = dma_zalloc_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE, + &skdev->cq_dma_address, GFP_KERNEL); if (skcomp == NULL) { rc = -ENOMEM; @@ -2674,10 +2675,10 @@ static int skd_cons_skmsg(struct skd_device *skdev) skmsg->id = i + SKD_ID_FIT_MSG; - skmsg->msg_buf = pci_alloc_consistent(skdev->pdev, - SKD_N_FITMSG_BYTES, - &skmsg->mb_dma_address); - + skmsg->msg_buf = dma_alloc_coherent(&skdev->pdev->dev, + SKD_N_FITMSG_BYTES, + &skmsg->mb_dma_address, + GFP_KERNEL); if (skmsg->msg_buf == NULL) { rc = -ENOMEM; goto err_out; @@ -2971,8 +2972,8 @@ err_out: static void skd_free_skcomp(struct skd_device *skdev) { if (skdev->skcomp_table) - pci_free_consistent(skdev->pdev, SKD_SKCOMP_SIZE, - skdev->skcomp_table, skdev->cq_dma_address); + dma_free_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE, + skdev->skcomp_table, skdev->cq_dma_address); skdev->skcomp_table = NULL; skdev->cq_dma_address = 0; @@ -2991,8 +2992,8 @@ static void skd_free_skmsg(struct skd_device *skdev) skmsg = &skdev->skmsg_table[i]; if (skmsg->msg_buf != NULL) { - pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES, - skmsg->msg_buf, + dma_free_coherent(&skdev->pdev->dev, SKD_N_FITMSG_BYTES, + skmsg->msg_buf, skmsg->mb_dma_address); } skmsg->msg_buf = NULL; @@ -3172,18 +3173,12 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) rc = pci_request_regions(pdev, DRV_NAME); if (rc) goto err_out; - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!rc) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { - dev_err(&pdev->dev, "consistent DMA mask error %d\n", - rc); - } - } else { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - dev_err(&pdev->dev, "DMA mask error %d\n", rc); - goto err_out_regions; - } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(&pdev->dev, "DMA mask error %d\n", rc); + goto err_out_regions; } if (!skd_major) { @@ -3367,20 +3362,12 @@ static int skd_pci_resume(struct pci_dev *pdev) rc = pci_request_regions(pdev, DRV_NAME); if (rc) goto err_out; - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!rc) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { - - dev_err(&pdev->dev, "consistent DMA mask error %d\n", - rc); - } - } else { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - - dev_err(&pdev->dev, "DMA mask error %d\n", rc); - goto err_out_regions; - } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (rc) { + dev_err(&pdev->dev, "DMA mask error %d\n", rc); + goto err_out_regions; } pci_set_master(pdev); -- cgit From 64ab1fa5da058960fdd930b768c6c7b441d53d17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 15:15:12 +0200 Subject: sx8: remove dead IF_64BIT_DMA_IS_POSSIBLE code This code has effectively been commented out since the first commit, so remove it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/sx8.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 16f37254bcf4..377c17e106c3 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -197,7 +197,6 @@ enum { FL_NON_RAID = FW_VER_NON_RAID, FL_4PORT = FW_VER_4PORT, FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT), - FL_DAC = (1 << 16), FL_DYN_MAJOR = (1 << 17), }; @@ -1585,7 +1584,6 @@ static int carm_init_shm(struct carm_host *host) static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct carm_host *host; - unsigned int pci_dac; int rc; struct request_queue *q; unsigned int i; @@ -1600,28 +1598,12 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto err_out; -#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!rc) { - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) { - printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n", - pci_name(pdev)); - goto err_out_regions; - } - pci_dac = 1; - } else { -#endif - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) { - printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n", - pci_name(pdev)); - goto err_out_regions; - } - pci_dac = 0; -#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */ + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) { + printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n", + pci_name(pdev)); + goto err_out_regions; } -#endif host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) { @@ -1632,7 +1614,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } host->pdev = pdev; - host->flags = pci_dac ? FL_DAC : 0; spin_lock_init(&host->lock); INIT_WORK(&host->fsm_task, carm_fsm_task); init_completion(&host->probe_comp); -- cgit From 931da2f7a56b7e84a75f40472edf96a5689734a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 15:15:13 +0200 Subject: sx8: switch to the generic DMA API The PCI DMA API is deprecated, switch to the generic DMA API instead. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/sx8.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 377c17e106c3..064b8c5c7a32 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -861,9 +861,9 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, if (rq_data_dir(rq) == WRITE) { writing = 1; - pci_dir = PCI_DMA_TODEVICE; + pci_dir = DMA_TO_DEVICE; } else { - pci_dir = PCI_DMA_FROMDEVICE; + pci_dir = DMA_FROM_DEVICE; } /* get scatterlist from block layer */ @@ -877,7 +877,7 @@ static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx, } /* map scatterlist to PCI bus addresses */ - n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir); + n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir); if (n_elem <= 0) { /* request with no s/g entries? */ carm_end_rq(host, crq, BLK_STS_IOERR); @@ -1058,11 +1058,11 @@ static inline void carm_handle_rw(struct carm_host *host, VPRINTK("ENTER\n"); if (rq_data_dir(crq->rq) == WRITE) - pci_dir = PCI_DMA_TODEVICE; + pci_dir = DMA_TO_DEVICE; else - pci_dir = PCI_DMA_FROMDEVICE; + pci_dir = DMA_FROM_DEVICE; - pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir); + dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir); carm_end_rq(host, crq, error); } @@ -1567,8 +1567,8 @@ static void carm_free_disks(struct carm_host *host) static int carm_init_shm(struct carm_host *host) { - host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE, - &host->shm_dma); + host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE, + &host->shm_dma, GFP_KERNEL); if (!host->shm) return -ENOMEM; @@ -1598,7 +1598,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto err_out; - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (rc) { printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n", pci_name(pdev)); @@ -1643,7 +1643,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n", pci_name(pdev)); rc = PTR_ERR(q); - goto err_out_pci_free; + goto err_out_dma_free; } host->oob_q = q; q->queuedata = host; @@ -1708,8 +1708,8 @@ err_out_free_majors: clear_bit(1, &carm_major_alloc); blk_cleanup_queue(host->oob_q); blk_mq_free_tag_set(&host->tag_set); -err_out_pci_free: - pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma); +err_out_dma_free: + dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma); err_out_iounmap: iounmap(host->mmio); err_out_kfree: @@ -1740,7 +1740,7 @@ static void carm_remove_one (struct pci_dev *pdev) clear_bit(1, &carm_major_alloc); blk_cleanup_queue(host->oob_q); blk_mq_free_tag_set(&host->tag_set); - pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma); + dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma); iounmap(host->mmio); kfree(host); pci_release_regions(pdev); -- cgit From b46d40daba968f6bac901b50e8fbc016b9c18c00 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 15:15:14 +0200 Subject: umem: switch to the generic DMA API The PCI DMA API is deprecated, switch to the generic DMA API instead. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/umem.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 9094ca60949c..be3e3ab79950 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -363,12 +363,12 @@ static int add_bio(struct cardinfo *card) vec = bio_iter_iovec(bio, card->current_iter); - dma_handle = pci_map_page(card->dev, + dma_handle = dma_map_page(&card->dev->dev, vec.bv_page, vec.bv_offset, vec.bv_len, bio_op(bio) == REQ_OP_READ ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + DMA_FROM_DEVICE : DMA_TO_DEVICE); p = &card->mm_pages[card->Ready]; desc = &p->desc[p->cnt]; @@ -448,10 +448,10 @@ static void process_page(unsigned long data) page->iter = page->bio->bi_iter; } - pci_unmap_page(card->dev, desc->data_dma_handle, + dma_unmap_page(&card->dev->dev, desc->data_dma_handle, vec.bv_len, (control & DMASCR_TRANSFER_READ) ? - PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (control & DMASCR_HARD_ERROR) { /* error */ bio->bi_status = BLK_STS_IOERR; @@ -817,8 +817,8 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_printk(KERN_INFO, &dev->dev, "Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n"); - if (pci_set_dma_mask(dev, DMA_BIT_MASK(64)) && - pci_set_dma_mask(dev, DMA_BIT_MASK(32))) { + if (dma_set_mask(&dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) { dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n"); return -ENOMEM; } @@ -871,12 +871,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) goto failed_magic; } - card->mm_pages[0].desc = pci_alloc_consistent(card->dev, - PAGE_SIZE * 2, - &card->mm_pages[0].page_dma); - card->mm_pages[1].desc = pci_alloc_consistent(card->dev, - PAGE_SIZE * 2, - &card->mm_pages[1].page_dma); + card->mm_pages[0].desc = dma_alloc_coherent(&card->dev->dev, + PAGE_SIZE * 2, &card->mm_pages[0].page_dma, GFP_KERNEL); + card->mm_pages[1].desc = dma_alloc_coherent(&card->dev->dev, + PAGE_SIZE * 2, &card->mm_pages[1].page_dma, GFP_KERNEL); if (card->mm_pages[0].desc == NULL || card->mm_pages[1].desc == NULL) { dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n"); @@ -1002,13 +1000,13 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) failed_req_irq: failed_alloc: if (card->mm_pages[0].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, - card->mm_pages[0].desc, - card->mm_pages[0].page_dma); + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[0].desc, + card->mm_pages[0].page_dma); if (card->mm_pages[1].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, - card->mm_pages[1].desc, - card->mm_pages[1].page_dma); + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[1].desc, + card->mm_pages[1].page_dma); failed_magic: iounmap(card->csr_remap); failed_remap_csr: @@ -1027,11 +1025,11 @@ static void mm_pci_remove(struct pci_dev *dev) iounmap(card->csr_remap); if (card->mm_pages[0].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, card->mm_pages[0].desc, card->mm_pages[0].page_dma); if (card->mm_pages[1].desc) - pci_free_consistent(card->dev, PAGE_SIZE*2, + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, card->mm_pages[1].desc, card->mm_pages[1].page_dma); blk_cleanup_queue(card->queue); -- cgit From 77a12e51fcf8c6200818e29fe1e90d944bb1af4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 15:15:15 +0200 Subject: rsxx: switch to the generic DMA API The PCI DMA API is deprecated, switch to the generic DMA API instead. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 2 +- drivers/block/rsxx/dma.c | 52 +++++++++++++++++++++++------------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index f2c631ce793c..639051502181 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -782,7 +782,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, pci_set_master(dev); pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); - st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); if (st) { dev_err(CARD_TO_DEV(card), "No usable DMA configuration,aborting\n"); diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c index 8fbc1bf6db3d..af9cf0215164 100644 --- a/drivers/block/rsxx/dma.c +++ b/drivers/block/rsxx/dma.c @@ -224,12 +224,12 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma) { if (dma->cmd != HW_CMD_BLK_DISCARD) { - if (!pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) { - pci_unmap_page(ctrl->card->dev, dma->dma_addr, + if (!dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) { + dma_unmap_page(&ctrl->card->dev->dev, dma->dma_addr, get_dma_size(dma), dma->cmd == HW_CMD_BLK_WRITE ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); + DMA_TO_DEVICE : + DMA_FROM_DEVICE); } } @@ -438,23 +438,23 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl) if (dma->cmd != HW_CMD_BLK_DISCARD) { if (dma->cmd == HW_CMD_BLK_WRITE) - dir = PCI_DMA_TODEVICE; + dir = DMA_TO_DEVICE; else - dir = PCI_DMA_FROMDEVICE; + dir = DMA_FROM_DEVICE; /* - * The function pci_map_page is placed here because we + * The function dma_map_page is placed here because we * can only, by design, issue up to 255 commands to the * hardware at one time per DMA channel. So the maximum * amount of mapped memory would be 255 * 4 channels * * 4096 Bytes which is less than 2GB, the limit of a x8 - * Non-HWWD PCIe slot. This way the pci_map_page + * Non-HWWD PCIe slot. This way the dma_map_page * function should never fail because of a lack of * mappable memory. */ - dma->dma_addr = pci_map_page(ctrl->card->dev, dma->page, + dma->dma_addr = dma_map_page(&ctrl->card->dev->dev, dma->page, dma->pg_off, dma->sub_page.cnt << 9, dir); - if (pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) { + if (dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) { push_tracker(ctrl->trackers, tag); rsxx_complete_dma(ctrl, dma, DMA_CANCELLED); continue; @@ -776,10 +776,10 @@ bvec_err: /*----------------- DMA Engine Initialization & Setup -------------------*/ int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl) { - ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, - &ctrl->status.dma_addr); - ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, - &ctrl->cmd.dma_addr); + ctrl->status.buf = dma_alloc_coherent(&dev->dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr, GFP_KERNEL); + ctrl->cmd.buf = dma_alloc_coherent(&dev->dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr, GFP_KERNEL); if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) return -ENOMEM; @@ -962,12 +962,12 @@ failed_dma_setup: vfree(ctrl->trackers); if (ctrl->status.buf) - pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, - ctrl->status.buf, - ctrl->status.dma_addr); + dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); if (ctrl->cmd.buf) - pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, - ctrl->cmd.buf, ctrl->cmd.dma_addr); + dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); } return st; @@ -1023,10 +1023,10 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card) vfree(ctrl->trackers); - pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, - ctrl->status.buf, ctrl->status.dma_addr); - pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, - ctrl->cmd.buf, ctrl->cmd.dma_addr); + dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); } } @@ -1059,11 +1059,11 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card) card->ctrl[i].stats.reads_issued--; if (dma->cmd != HW_CMD_BLK_DISCARD) { - pci_unmap_page(card->dev, dma->dma_addr, + dma_unmap_page(&card->dev->dev, dma->dma_addr, get_dma_size(dma), dma->cmd == HW_CMD_BLK_WRITE ? - PCI_DMA_TODEVICE : - PCI_DMA_FROMDEVICE); + DMA_TO_DEVICE : + DMA_FROM_DEVICE); } list_add_tail(&dma->list, &issued_dmas[i]); -- cgit From ee75fa2ae0e23f575630923bc598e7dedbb8b21c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 18 Oct 2018 15:15:16 +0200 Subject: mtip32xx: fully switch to the generic DMA API The mtip32xx used an odd mix of the old PCI and the generic DMA API, so switch it over to the generic API entirely. Note that this also removes a weird fallback to just a 32-bit coherent dma mask if the 64-bit dma mask doesn't work, as that can't even happen. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 47 +++++++++++++++------------------------ 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 1d7d48d8a205..dfc8de6ce525 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1862,11 +1862,9 @@ static int exec_drive_taskfile(struct driver_data *dd, if (IS_ERR(outbuf)) return PTR_ERR(outbuf); - outbuf_dma = pci_map_single(dd->pdev, - outbuf, - taskout, - DMA_TO_DEVICE); - if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) { + outbuf_dma = dma_map_single(&dd->pdev->dev, outbuf, + taskout, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pdev->dev, outbuf_dma)) { err = -ENOMEM; goto abort; } @@ -1880,10 +1878,9 @@ static int exec_drive_taskfile(struct driver_data *dd, inbuf = NULL; goto abort; } - inbuf_dma = pci_map_single(dd->pdev, - inbuf, - taskin, DMA_FROM_DEVICE); - if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) { + inbuf_dma = dma_map_single(&dd->pdev->dev, inbuf, + taskin, DMA_FROM_DEVICE); + if (dma_mapping_error(&dd->pdev->dev, inbuf_dma)) { err = -ENOMEM; goto abort; } @@ -2002,11 +1999,11 @@ static int exec_drive_taskfile(struct driver_data *dd, /* reclaim the DMA buffers.*/ if (inbuf_dma) - pci_unmap_single(dd->pdev, inbuf_dma, - taskin, DMA_FROM_DEVICE); + dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin, + DMA_FROM_DEVICE); if (outbuf_dma) - pci_unmap_single(dd->pdev, outbuf_dma, - taskout, DMA_TO_DEVICE); + dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout, + DMA_TO_DEVICE); inbuf_dma = 0; outbuf_dma = 0; @@ -2053,11 +2050,11 @@ static int exec_drive_taskfile(struct driver_data *dd, } abort: if (inbuf_dma) - pci_unmap_single(dd->pdev, inbuf_dma, - taskin, DMA_FROM_DEVICE); + dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin, + DMA_FROM_DEVICE); if (outbuf_dma) - pci_unmap_single(dd->pdev, outbuf_dma, - taskout, DMA_TO_DEVICE); + dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout, + DMA_TO_DEVICE); kfree(outbuf); kfree(inbuf); @@ -4216,18 +4213,10 @@ static int mtip_pci_probe(struct pci_dev *pdev, goto iomap_err; } - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - - if (rv) { - rv = pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(32)); - if (rv) { - dev_warn(&pdev->dev, - "64-bit DMA enable failed\n"); - goto setmask_err; - } - } + rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rv) { + dev_warn(&pdev->dev, "64-bit DMA enable failed\n"); + goto setmask_err; } /* Copy the info we may need later into the private data structure. */ -- cgit From bb59b8e57493465fac8658bba103f7c4cc5d874a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 19 Oct 2018 00:50:29 -0700 Subject: nvme-rdma: always have a valid trsvcid If not passed, we set the default trsvcid. We can rely on having trsvcid and can simplify the controller matching logic. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e7be903041a8..03fff72b96f1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1860,26 +1860,11 @@ static inline bool __nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - char *stdport = __stringify(NVME_RDMA_IP_PORT); - - if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || - strcmp(opts->traddr, ctrl->ctrl.opts->traddr)) + strcmp(opts->traddr, ctrl->ctrl.opts->traddr) || + strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) return false; - if (opts->mask & NVMF_OPT_TRSVCID && - ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) - return false; - } else if (opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, stdport)) - return false; - } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(stdport, ctrl->ctrl.opts->trsvcid)) - return false; - } - /* else, it's a match as both have stdport. Fall to next checks */ - /* * checking the local address is rough. In most cases, one * is not specified and the host port is selected by the stack. @@ -1939,7 +1924,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvme_rdma_ctrl *ctrl; int ret; bool changed; - char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1947,15 +1931,21 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - if (opts->mask & NVMF_OPT_TRSVCID) - port = opts->trsvcid; - else - port = __stringify(NVME_RDMA_IP_PORT); + if (!(opts->mask & NVMF_OPT_TRSVCID)) { + opts->trsvcid = + kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL); + if (!opts->trsvcid) { + ret = -ENOMEM; + goto out_free_ctrl; + } + opts->mask |= NVMF_OPT_TRSVCID; + } ret = inet_pton_with_scope(&init_net, AF_UNSPEC, - opts->traddr, port, &ctrl->addr); + opts->traddr, opts->trsvcid, &ctrl->addr); if (ret) { - pr_err("malformed address passed: %s:%s\n", opts->traddr, port); + pr_err("malformed address passed: %s:%s\n", + opts->traddr, opts->trsvcid); goto out_free_ctrl; } -- cgit From b7c7be6f6bd28ffea7f608ac2d806b8a4bdc82fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 18 Oct 2018 17:40:40 -0700 Subject: nvme-fabrics: move controller options matching to fabrics IP transports will most likely use the same controller options matching when detecting a duplicate connect. Move it to fabrics. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/fabrics.h | 2 ++ drivers/nvme/host/rdma.c | 35 +---------------------------------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index bcd09d3a44da..bd0969db6225 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -868,6 +868,36 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, return 0; } +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts) +{ + if (!nvmf_ctlr_matches_baseopts(ctrl, opts) || + strcmp(opts->traddr, ctrl->opts->traddr) || + strcmp(opts->trsvcid, ctrl->opts->trsvcid)) + return false; + + /* + * Checking the local address is rough. In most cases, none is specified + * and the host port is selected by the stack. + * + * Assume no match if: + * - local address is specified and address is not the same + * - local address is not specified but remote is, or vice versa + * (admin using specific host_traddr when it matters). + */ + if ((opts->mask & NVMF_OPT_HOST_TRADDR) && + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + if (strcmp(opts->host_traddr, ctrl->opts->host_traddr)) + return false; + } else if ((opts->mask & NVMF_OPT_HOST_TRADDR) || + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(nvmf_ip_options_match); + static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index aa2fdb2a2e8f..6ea6275f332a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -166,6 +166,8 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq); bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live); +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts); static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 03fff72b96f1..d181cafedc58 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1856,39 +1856,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .stop_ctrl = nvme_rdma_stop_ctrl, }; -static inline bool -__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, - struct nvmf_ctrl_options *opts) -{ - if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || - strcmp(opts->traddr, ctrl->ctrl.opts->traddr) || - strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) - return false; - - /* - * checking the local address is rough. In most cases, one - * is not specified and the host port is selected by the stack. - * - * Assume no match if: - * local address is specified and address is not the same - * local address is not specified but remote is, or vice versa - * (admin using specific host_traddr when it matters). - */ - if (opts->mask & NVMF_OPT_HOST_TRADDR && - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { - if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr)) - return false; - } else if (opts->mask & NVMF_OPT_HOST_TRADDR || - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - return false; - /* - * if neither controller had an host port specified, assume it's - * a match as everything else matched. - */ - - return true; -} - /* * Fails a connection request if it matches an existing controller * (association) with the same tuple: @@ -1909,7 +1876,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { - found = __nvme_rdma_options_match(ctrl, opts); + found = nvmf_ip_options_match(&ctrl->ctrl, opts); if (found) break; } -- cgit From b2c3fa546705944e748666b474ffdaebaec0569f Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Sat, 20 Oct 2018 14:56:11 -0400 Subject: blkcg: fix edge case for blk_get_rl() under memory pressure It is possible for blkg creation to fail when in blk_get_rl(). In this situation, the fallback logic returns the nearest created blkg. There is however special handling for the request_list for the root blkcg. This fixes the missing edge case from the earlier series changing blk_get_rl(). Fixes: e2b0989954ae ("blkcg: cleanup and make blk_get_rl use blkg_lookup_create") Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b7fd08013de2..1e76ceebeb5d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -597,7 +597,7 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, if (unlikely(!blkg)) blkg = __blkg_lookup_create(blkcg, q); - if (!blkg_tryget(blkg)) + if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg)) goto rl_use_root; rcu_read_unlock(); -- cgit From d459d853c2edc793135e4bfa4e345c758f1cc859 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Sat, 20 Oct 2018 14:56:12 -0400 Subject: blkcg: reassociate bios when make_request() is called recursively When submitting a bio, multiple recursive calls to make_request() may occur. This causes the initial associate done in blkcg_bio_issue_check() to be incorrect and reference the prior request_queue. This introduces a helper to do reassociation when make_request() is recursively called. Fixes: a7b39b4e961c ("blkcg: always associate a bio with a blkg") Reported-by: Valdis Kletnieks Signed-off-by: Dennis Zhou Tested-by: Valdis Kletnieks Signed-off-by: Jens Axboe --- block/bio.c | 20 ++++++++++++++++++++ block/blk-core.c | 1 + include/linux/bio.h | 3 +++ 3 files changed, 24 insertions(+) diff --git a/block/bio.c b/block/bio.c index 17a8b0aa7050..bbfeb4ee2892 100644 --- a/block/bio.c +++ b/block/bio.c @@ -2083,6 +2083,26 @@ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) return ret; } +/** + * bio_reassociate_blkg - reassociate a bio with a blkg from q + * @q: request_queue where bio is going + * @bio: target bio + * + * When submitting a bio, multiple recursive calls to make_request() may occur. + * This causes the initial associate done in blkcg_bio_issue_check() to be + * incorrect and reference the prior request_queue. This performs reassociation + * when this situation happens. + */ +int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) +{ + if (bio->bi_blkg) { + blkg_put(bio->bi_blkg); + bio->bi_blkg = NULL; + } + + return bio_associate_create_blkg(q, bio); +} + /** * bio_disassociate_task - undo bio_associate_current() * @bio: target bio diff --git a/block/blk-core.c b/block/blk-core.c index cdfabc5646da..3ed60723e242 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2433,6 +2433,7 @@ blk_qc_t generic_make_request(struct bio *bio) if (q) blk_queue_exit(q); q = bio->bi_disk->queue; + bio_reassociate_blkg(q, bio); flags = 0; if (bio->bi_opf & REQ_NOWAIT) flags = BLK_MQ_REQ_NOWAIT; diff --git a/include/linux/bio.h b/include/linux/bio.h index f447b0ebb288..b47c7f716731 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -514,6 +514,7 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); int bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); +int bio_reassociate_blkg(struct request_queue *q, struct bio *bio); void bio_disassociate_task(struct bio *bio); void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ @@ -522,6 +523,8 @@ static inline int bio_associate_blkg_from_css(struct bio *bio, { return 0; } static inline int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) { return 0; } +static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) +{ return 0; } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } -- cgit From 52990a5fb0c991ecafebdab43138b5ed41376852 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 21 Oct 2018 12:02:36 -0600 Subject: block: setup bounce bio_sets properly We're only setting up the bounce bio sets if we happen to need bouncing for regular HIGHMEM, not if we only need it for ISA devices. Protect the ISA bounce setup with a mutex, since it's being invoked from driver init functions and can thus be called in parallel. Cc: stable@vger.kernel.org Reported-by: Ondrej Zary Tested-by: Ondrej Zary Signed-off-by: Jens Axboe --- block/bounce.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/block/bounce.c b/block/bounce.c index b30071ac4ec6..ec0d99995f5f 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -31,6 +31,24 @@ static struct bio_set bounce_bio_set, bounce_bio_split; static mempool_t page_pool, isa_page_pool; +static void init_bounce_bioset(void) +{ + static bool bounce_bs_setup; + int ret; + + if (bounce_bs_setup) + return; + + ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + BUG_ON(ret); + if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) + BUG_ON(1); + + ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); + BUG_ON(ret); + bounce_bs_setup = true; +} + #if defined(CONFIG_HIGHMEM) static __init int init_emergency_pool(void) { @@ -44,14 +62,7 @@ static __init int init_emergency_pool(void) BUG_ON(ret); pr_info("pool size: %d pages\n", POOL_SIZE); - ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - BUG_ON(ret); - if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE)) - BUG_ON(1); - - ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0); - BUG_ON(ret); - + init_bounce_bioset(); return 0; } @@ -86,6 +97,8 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) return mempool_alloc_pages(gfp_mask | GFP_DMA, data); } +static DEFINE_MUTEX(isa_mutex); + /* * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA * as the max address, so check if the pool has already been created. @@ -94,14 +107,20 @@ int init_emergency_isa_pool(void) { int ret; - if (mempool_initialized(&isa_page_pool)) + mutex_lock(&isa_mutex); + + if (mempool_initialized(&isa_page_pool)) { + mutex_unlock(&isa_mutex); return 0; + } ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa, mempool_free_pages, (void *) 0); BUG_ON(ret); pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE); + init_bounce_bioset(); + mutex_unlock(&isa_mutex); return 0; } -- cgit