From 9afbee3d62a40532441d0de36ef3eaa38661a7bf Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 12 Mar 2019 16:30:28 -0700 Subject: scsi: lpfc: Reduce memory footprint for lpfc_queue Currently the driver maintains a sideband structure which has a pointer for each queue element. However, at 8 bytes per pointer, and up to 4k elements per queue, and 100s of queues, this can take up a lot of memory. Convert the driver to using an access routine that calculates the element address based on its index rather than using the pointer table. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 4 +-- drivers/scsi/lpfc/lpfc_debugfs.h | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 55 +++++++++++++++++++++------------------- drivers/scsi/lpfc/lpfc_sli4.h | 19 +++----------- 4 files changed, 35 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 1215eaa530db..cdf26eb02225 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -4135,7 +4135,7 @@ lpfc_idiag_queacc_read_qe(char *pbuffer, int len, struct lpfc_queue *pque, "QE-INDEX[%04d]:\n", index); offset = 0; - pentry = pque->qe[index].address; + pentry = lpfc_sli4_qe(pque, index); while (esize > 0) { len += snprintf(pbuffer+len, LPFC_QUE_ACC_BUF_SIZE-len, "%08x ", *pentry); @@ -4485,7 +4485,7 @@ pass_check: pque = (struct lpfc_queue *)idiag.ptr_private; if (offset > pque->entry_size/sizeof(uint32_t) - 1) goto error_out; - pentry = pque->qe[index].address; + pentry = lpfc_sli4_qe(pque, index); pentry += offset; if (idiag.cmd.opcode == LPFC_IDIAG_CMD_QUEACC_WR) *pentry = value; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index 93ab7dfb8ee0..e42c1fac72cf 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -345,7 +345,7 @@ lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx) esize = q->entry_size; qe_word_cnt = esize / sizeof(uint32_t); - pword = q->qe[idx].address; + pword = lpfc_sli4_qe(q, idx); len = 0; len += snprintf(line_buf+len, LPFC_LBUF_SZ-len, "QE[%04d]: ", idx); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 627488434d9e..3dd7a17239a1 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -151,7 +151,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe) /* sanity check on queue memory */ if (unlikely(!q)) return -ENOMEM; - temp_wqe = q->qe[q->host_index].wqe; + temp_wqe = lpfc_sli4_qe(q, q->host_index); /* If the host has not yet processed the next entry then we are done */ idx = ((q->host_index + 1) % q->entry_count); @@ -271,7 +271,7 @@ lpfc_sli4_mq_put(struct lpfc_queue *q, struct lpfc_mqe *mqe) /* sanity check on queue memory */ if (unlikely(!q)) return -ENOMEM; - temp_mqe = q->qe[q->host_index].mqe; + temp_mqe = lpfc_sli4_qe(q, q->host_index); /* If the host has not yet processed the next entry then we are done */ if (((q->host_index + 1) % q->entry_count) == q->hba_index) @@ -331,7 +331,7 @@ lpfc_sli4_eq_get(struct lpfc_queue *q) /* sanity check on queue memory */ if (unlikely(!q)) return NULL; - eqe = q->qe[q->host_index].eqe; + eqe = lpfc_sli4_qe(q, q->host_index); /* If the next EQE is not valid then we are done */ if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid) @@ -545,7 +545,7 @@ lpfc_sli4_cq_get(struct lpfc_queue *q) /* sanity check on queue memory */ if (unlikely(!q)) return NULL; - cqe = q->qe[q->host_index].cqe; + cqe = lpfc_sli4_qe(q, q->host_index); /* If the next CQE is not valid then we are done */ if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid) @@ -667,8 +667,8 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, return -ENOMEM; hq_put_index = hq->host_index; dq_put_index = dq->host_index; - temp_hrqe = hq->qe[hq_put_index].rqe; - temp_drqe = dq->qe[dq_put_index].rqe; + temp_hrqe = lpfc_sli4_qe(hq, hq_put_index); + temp_drqe = lpfc_sli4_qe(dq, dq_put_index); if (hq->type != LPFC_HRQ || dq->type != LPFC_DRQ) return -EINVAL; @@ -7878,8 +7878,9 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba) mcq = phba->sli4_hba.mbx_cq; idx = mcq->hba_index; qe_valid = mcq->qe_valid; - while (bf_get_le32(lpfc_cqe_valid, mcq->qe[idx].cqe) == qe_valid) { - mcqe = (struct lpfc_mcqe *)mcq->qe[idx].cqe; + while (bf_get_le32(lpfc_cqe_valid, + (struct lpfc_cqe *)lpfc_sli4_qe(mcq, idx)) == qe_valid) { + mcqe = (struct lpfc_mcqe *)(lpfc_sli4_qe(mcq, idx)); if (bf_get_le32(lpfc_trailer_completed, mcqe) && (!bf_get_le32(lpfc_trailer_async, mcqe))) { pending_completions = true; @@ -14507,24 +14508,22 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, { struct lpfc_queue *queue; struct lpfc_dmabuf *dmabuf; - int x, total_qe_count; - void *dma_pointer; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; + uint16_t x, pgcnt; if (!phba->sli4_hba.pc_sli4_params.supported) hw_page_size = page_size; + pgcnt = ALIGN(entry_size * entry_count, hw_page_size) / hw_page_size; + + /* If needed, Adjust page count to match the max the adapter supports */ + if (pgcnt > phba->sli4_hba.pc_sli4_params.wqpcnt) + pgcnt = phba->sli4_hba.pc_sli4_params.wqpcnt; + queue = kzalloc(sizeof(struct lpfc_queue) + - (sizeof(union sli4_qe) * entry_count), GFP_KERNEL); + (sizeof(void *) * pgcnt), GFP_KERNEL); if (!queue) return NULL; - queue->page_count = (ALIGN(entry_size * entry_count, - hw_page_size))/hw_page_size; - - /* If needed, Adjust page count to match the max the adapter supports */ - if (phba->sli4_hba.pc_sli4_params.wqpcnt && - (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt)) - queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt; INIT_LIST_HEAD(&queue->list); INIT_LIST_HEAD(&queue->wq_list); @@ -14536,12 +14535,15 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, /* Set queue parameters now. If the system cannot provide memory * resources, the free routine needs to know what was allocated. */ + queue->page_count = pgcnt; + queue->q_pgs = (void **)&queue[1]; + queue->entry_cnt_per_pg = hw_page_size / entry_size; queue->entry_size = entry_size; queue->entry_count = entry_count; queue->page_size = hw_page_size; queue->phba = phba; - for (x = 0, total_qe_count = 0; x < queue->page_count; x++) { + for (x = 0; x < queue->page_count; x++) { dmabuf = kzalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); if (!dmabuf) goto out_fail; @@ -14554,13 +14556,8 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, } dmabuf->buffer_tag = x; list_add_tail(&dmabuf->list, &queue->page_list); - /* initialize queue's entry array */ - dma_pointer = dmabuf->virt; - for (; total_qe_count < entry_count && - dma_pointer < (hw_page_size + dmabuf->virt); - total_qe_count++, dma_pointer += entry_size) { - queue->qe[total_qe_count].address = dma_pointer; - } + /* use lpfc_sli4_qe to index a paritcular entry in this page */ + queue->q_pgs[x] = dmabuf->virt; } INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq); INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq); @@ -14575,6 +14572,12 @@ out_fail: return NULL; } +inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx) +{ + return q->q_pgs[idx / q->entry_cnt_per_pg] + + (q->entry_size * (idx % q->entry_cnt_per_pg)); +} + /** * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory * @phba: HBA structure that indicates port to create a queue on. diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 325069abc087..b86ac85b65d0 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -117,21 +117,6 @@ enum lpfc_sli4_queue_subtype { LPFC_USOL }; -union sli4_qe { - void *address; - struct lpfc_eqe *eqe; - struct lpfc_cqe *cqe; - struct lpfc_mcqe *mcqe; - struct lpfc_wcqe_complete *wcqe_complete; - struct lpfc_wcqe_release *wcqe_release; - struct sli4_wcqe_xri_aborted *wcqe_xri_aborted; - struct lpfc_rcqe_complete *rcqe_complete; - struct lpfc_mqe *mqe; - union lpfc_wqe *wqe; - union lpfc_wqe128 *wqe128; - struct lpfc_rqe *rqe; -}; - /* RQ buffer list */ struct lpfc_rqb { uint16_t entry_count; /* Current number of RQ slots */ @@ -157,6 +142,7 @@ struct lpfc_queue { struct list_head cpu_list; uint32_t entry_count; /* Number of entries to support on the queue */ uint32_t entry_size; /* Size of each queue entry. */ + uint32_t entry_cnt_per_pg; uint32_t notify_interval; /* Queue Notification Interval * For chip->host queues (EQ, CQ, RQ): * specifies the interval (number of @@ -254,7 +240,7 @@ struct lpfc_queue { uint16_t last_cpu; /* most recent cpu */ uint8_t qe_valid; struct lpfc_queue *assoc_qp; - union sli4_qe qe[1]; /* array to index entries (must be last) */ + void **q_pgs; /* array to index entries per page */ }; struct lpfc_sli4_link { @@ -1092,3 +1078,4 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *); uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *); uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba); +inline void *lpfc_sli4_qe(struct lpfc_queue *, uint16_t); -- cgit