diff options
Diffstat (limited to 'drivers/net/ethernet/microsoft/mana')
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/gdma_main.c | 177 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/hw_channel.c | 104 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_bpf.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_en.c | 581 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 189 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/shm_channel.c | 13 |
6 files changed, 813 insertions, 253 deletions
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 1332db9a08eb..3504507477c6 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Copyright (c) 2021, Microsoft Corporation. */ +#include <linux/debugfs.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/utsname.h> @@ -8,6 +9,8 @@ #include <net/mana/mana.h> +struct dentry *mana_debugfs_root; + static u32 mana_gd_r32(struct gdma_context *g, u64 offset) { return readl(g->bar0_va + offset); @@ -131,9 +134,10 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) struct gdma_list_devices_resp resp = {}; struct gdma_general_req req = {}; struct gdma_dev_id dev; - u32 i, max_num_devs; + int found_dev = 0; u16 dev_type; int err; + u32 i; mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), sizeof(resp)); @@ -145,12 +149,17 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) return err ? err : -EPROTO; } - max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); - - for (i = 0; i < max_num_devs; i++) { + for (i = 0; i < GDMA_DEV_LIST_SIZE && + found_dev < resp.num_of_devs; i++) { dev = resp.devs[i]; dev_type = dev.type; + /* Skip empty devices */ + if (dev.as_uint32 == 0) + continue; + + found_dev++; + /* HWC is already detected in mana_hwc_create_channel(). */ if (dev_type == GDMA_DEVICE_HWC) continue; @@ -174,7 +183,7 @@ int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); } -EXPORT_SYMBOL_NS(mana_gd_send_request, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_send_request, "NET_MANA"); int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, struct gdma_mem_info *gmi) @@ -182,7 +191,7 @@ int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, dma_addr_t dma_handle; void *buf; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; gmi->dev = gc->dev; @@ -328,6 +337,7 @@ void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, queue->id, queue->head * GDMA_WQE_BU_SIZE, 0); } +EXPORT_SYMBOL_NS(mana_gd_wq_ring_doorbell, "NET_MANA"); void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) { @@ -340,6 +350,7 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, head, arm_bit); } +EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA"); static void mana_gd_process_eqe(struct gdma_queue *eq) { @@ -380,6 +391,8 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) case GDMA_EQE_HWC_INIT_EQ_ID_DB: case GDMA_EQE_HWC_INIT_DATA: case GDMA_EQE_HWC_INIT_DONE: + case GDMA_EQE_HWC_SOC_SERVICE: + case GDMA_EQE_RNIC_QP_FATAL: if (!eq->eq.callback) break; @@ -662,8 +675,11 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } queue->head = 0; queue->tail = 0; @@ -684,6 +700,8 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -712,12 +730,12 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) return 0; } -EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, "NET_MANA"); static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_mem_info *gmi) { - unsigned int num_page = gmi->length / PAGE_SIZE; + unsigned int num_page = gmi->length / MANA_PAGE_SIZE; struct gdma_create_dma_region_req *req = NULL; struct gdma_create_dma_region_resp resp = {}; struct gdma_context *gc = gd->gdma_context; @@ -727,10 +745,10 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, int err; int i; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; - if (offset_in_page(gmi->virt_addr) != 0) + if (!MANA_PAGE_ALIGNED(gmi->virt_addr)) return -EINVAL; hwc = gc->hwc.driver_data; @@ -751,7 +769,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, req->page_addr_list_len = num_page; for (i = 0; i < num_page; i++) - req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + req->page_addr_list[i] = gmi->dma_handle + i * MANA_PAGE_SIZE; err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); if (err) @@ -766,7 +784,13 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, } gmi->dma_region_handle = resp.dma_region_handle; + dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n", + gmi->dma_region_handle); out: + if (err) + dev_dbg(gc->dev, + "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n", + length, req->gdma_page_type, resp.hdr.status, err); kfree(req); return err; } @@ -789,8 +813,11 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -811,12 +838,14 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); return err; } -EXPORT_SYMBOL_NS(mana_gd_create_mana_eq, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_create_mana_eq, "NET_MANA"); int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, const struct gdma_queue_spec *spec, @@ -837,8 +866,11 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -858,11 +890,14 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); return err; } +EXPORT_SYMBOL_NS(mana_gd_create_mana_wq_cq, "NET_MANA"); void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) { @@ -893,7 +928,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) mana_gd_free_memory(gmi); kfree(queue); } -EXPORT_SYMBOL_NS(mana_gd_destroy_queue, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_destroy_queue, "NET_MANA"); int mana_gd_verify_vf_version(struct pci_dev *pdev) { @@ -930,6 +965,7 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev) err, resp.hdr.status); return err ? err : -EPROTO; } + gc->pf_cap_flags1 = resp.pf_cap_flags1; if (resp.pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) { err = mana_gd_query_hwc_timeout(pdev, &hwc->hwc_timeout); if (err) { @@ -970,7 +1006,6 @@ int mana_gd_register_device(struct gdma_dev *gd) return 0; } -EXPORT_SYMBOL_NS(mana_gd_register_device, NET_MANA); int mana_gd_deregister_device(struct gdma_dev *gd) { @@ -1001,7 +1036,6 @@ int mana_gd_deregister_device(struct gdma_dev *gd) return err; } -EXPORT_SYMBOL_NS(mana_gd_deregister_device, NET_MANA); u32 mana_gd_wq_avail_space(struct gdma_queue *wq) { @@ -1037,7 +1071,7 @@ static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, header->inline_oob_size_div4 = client_oob_size / sizeof(u32); if (oob_in_sgl) { - WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2); + WARN_ON_ONCE(wqe_req->num_sge < 2); header->client_oob_in_sgl = 1; @@ -1144,6 +1178,7 @@ int mana_gd_post_work_request(struct gdma_queue *wq, return 0; } +EXPORT_SYMBOL_NS(mana_gd_post_work_request, "NET_MANA"); int mana_gd_post_and_ring(struct gdma_queue *queue, const struct gdma_wqe_request *wqe_req, @@ -1153,8 +1188,11 @@ int mana_gd_post_and_ring(struct gdma_queue *queue, int err; err = mana_gd_post_work_request(queue, wqe_req, wqe_info); - if (err) + if (err) { + dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n", + queue->type, queue->queue_size, err); return err; + } mana_gd_wq_ring_doorbell(gc, queue); @@ -1214,6 +1252,7 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe) return cqe_idx; } +EXPORT_SYMBOL_NS(mana_gd_poll_cq, "NET_MANA"); static irqreturn_t mana_gd_intr(int irq, void *arg) { @@ -1314,7 +1353,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) GFP_KERNEL); if (!gc->irq_contexts) { err = -ENOMEM; - goto free_irq_vector; + goto free_irq_array; } for (i = 0; i < nvec; i++) { @@ -1371,6 +1410,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) gc->max_num_msix = nvec; gc->num_msix_usable = nvec; cpus_read_unlock(); + kfree(irqs); return 0; free_irq: @@ -1383,8 +1423,9 @@ free_irq: } kfree(gc->irq_contexts); - kfree(irqs); gc->irq_contexts = NULL; +free_irq_array: + kfree(irqs); free_irq_vector: cpus_read_unlock(); pci_free_irq_vectors(pdev); @@ -1428,9 +1469,15 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_gd_init_registers(pdev); mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); + gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0); + if (!gc->service_wq) + return -ENOMEM; + err = mana_gd_setup_irqs(pdev); - if (err) - return err; + if (err) { + dev_err(gc->dev, "Failed to setup IRQs: %d\n", err); + goto free_workqueue; + } err = mana_hwc_create_channel(gc); if (err) @@ -1448,12 +1495,16 @@ static int mana_gd_setup(struct pci_dev *pdev) if (err) goto destroy_hwc; + dev_dbg(&pdev->dev, "mana gdma setup successful\n"); return 0; destroy_hwc: mana_hwc_destroy_channel(gc); remove_irq: mana_gd_remove_irqs(pdev); +free_workqueue: + destroy_workqueue(gc->service_wq); + dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } @@ -1464,6 +1515,9 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_hwc_destroy_channel(gc); mana_gd_remove_irqs(pdev); + + destroy_workqueue(gc->service_wq); + dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } static bool mana_is_pf(unsigned short dev_id) @@ -1482,8 +1536,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); err = pci_enable_device(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err); return -ENXIO; + } pci_set_master(pdev); @@ -1492,14 +1548,11 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto disable_dev; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) - goto release_region; - - err = dma_set_max_seg_size(&pdev->dev, UINT_MAX); if (err) { - dev_err(&pdev->dev, "Failed to set dma device segment size\n"); + dev_err(&pdev->dev, "DMA set mask failed: %d\n", err); goto release_region; } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; gc = vzalloc(sizeof(*gc)); @@ -1519,6 +1572,12 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) gc->bar0_va = bar0_va; gc->dev = &pdev->dev; + if (gc->is_pf) + gc->mana_pci_debugfs = debugfs_create_dir("0", mana_debugfs_root); + else + gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev->slot), + mana_debugfs_root); + err = mana_gd_setup(pdev); if (err) goto unmap_bar; @@ -1527,11 +1586,25 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto cleanup_gd; + err = mana_rdma_probe(&gc->mana_ib); + if (err) + goto cleanup_mana; + return 0; +cleanup_mana: + mana_remove(&gc->mana, false); cleanup_gd: mana_gd_cleanup(pdev); unmap_bar: + /* + * at this point we know that the other debugfs child dir/files + * are either not yet created or are already cleaned up. + * The pci debugfs folder clean-up now, will only be cleaning up + * adapter-MTU file and apc->mana_pci_debugfs folder. + */ + debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; pci_iounmap(pdev, bar0_va); free_gc: pci_set_drvdata(pdev, NULL); @@ -1548,16 +1621,23 @@ static void mana_gd_remove(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); + mana_rdma_remove(&gc->mana_ib); mana_remove(&gc->mana, false); mana_gd_cleanup(pdev); + debugfs_remove_recursive(gc->mana_pci_debugfs); + + gc->mana_pci_debugfs = NULL; + pci_iounmap(pdev, gc->bar0_va); vfree(gc); pci_release_regions(pdev); pci_disable_device(pdev); + + dev_dbg(&pdev->dev, "mana gdma remove successful\n"); } /* The 'state' parameter is not used. */ @@ -1565,6 +1645,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) { struct gdma_context *gc = pci_get_drvdata(pdev); + mana_rdma_remove(&gc->mana_ib); mana_remove(&gc->mana, true); mana_gd_cleanup(pdev); @@ -1589,6 +1670,10 @@ static int mana_gd_resume(struct pci_dev *pdev) if (err) return err; + err = mana_rdma_probe(&gc->mana_ib); + if (err) + return err; + return 0; } @@ -1599,10 +1684,15 @@ static void mana_gd_shutdown(struct pci_dev *pdev) dev_info(&pdev->dev, "Shutdown was called\n"); + mana_rdma_remove(&gc->mana_ib); mana_remove(&gc->mana, true); mana_gd_cleanup(pdev); + debugfs_remove_recursive(gc->mana_pci_debugfs); + + gc->mana_pci_debugfs = NULL; + pci_disable_device(pdev); } @@ -1622,7 +1712,32 @@ static struct pci_driver mana_driver = { .shutdown = mana_gd_shutdown, }; -module_pci_driver(mana_driver); +static int __init mana_driver_init(void) +{ + int err; + + mana_debugfs_root = debugfs_create_dir("mana", NULL); + + err = pci_register_driver(&mana_driver); + if (err) { + debugfs_remove(mana_debugfs_root); + mana_debugfs_root = NULL; + } + + return err; +} + +static void __exit mana_driver_exit(void) +{ + pci_unregister_driver(&mana_driver); + + debugfs_remove(mana_debugfs_root); + + mana_debugfs_root = NULL; +} + +module_init(mana_driver_init); +module_exit(mana_driver_exit); MODULE_DEVICE_TABLE(pci, mana_id_table); diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 2729a2c5acf9..a8c4d8db75a5 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -3,6 +3,7 @@ #include <net/mana/gdma.h> #include <net/mana/hw_channel.h> +#include <linux/vmalloc.h> static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) { @@ -51,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, return 0; } +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, + struct hwc_work_request *req) +{ + struct device *dev = hwc_rxq->hwc->dev; + struct gdma_sge *sge; + int err; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; + sge->size = req->buf_len; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); + return err; +} + static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, - const struct gdma_resp_hdr *resp_msg) + struct hwc_work_request *rx_req) { + const struct gdma_resp_hdr *resp_msg = rx_req->buf_va; struct hwc_caller_ctx *ctx; int err; @@ -61,6 +86,7 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, hwc->inflight_msg_res.map)) { dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", resp_msg->response.hwc_msg_id); + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); return; } @@ -74,40 +100,25 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, memcpy(ctx->output_buf, resp_msg, resp_len); out: ctx->error = err; - complete(&ctx->comp_event); -} - -static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, - struct hwc_work_request *req) -{ - struct device *dev = hwc_rxq->hwc->dev; - struct gdma_sge *sge; - int err; - - sge = &req->sge; - sge->address = (u64)req->buf_sge_addr; - sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; - sge->size = req->buf_len; - memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); - req->wqe_req.sgl = sge; - req->wqe_req.num_sge = 1; - req->wqe_req.client_data_unit = 0; + /* Must post rx wqe before complete(), otherwise the next rx may + * hit no_wqe error. + */ + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); - err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); - if (err) - dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); - return err; + complete(&ctx->comp_event); } static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, struct gdma_event *event) { + union hwc_init_soc_service_type service_data; struct hw_channel_context *hwc = ctx; struct gdma_dev *gd = hwc->gdma_dev; union hwc_init_type_data type_data; union hwc_init_eq_id_db eq_db; u32 type, val; + int ret; switch (event->type) { case GDMA_EQE_HWC_INIT_EQ_ID_DB: @@ -190,7 +201,24 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, } break; + case GDMA_EQE_HWC_SOC_SERVICE: + service_data.as_uint32 = event->details[0]; + type = service_data.type; + + switch (type) { + case GDMA_SERVICE_TYPE_RDMA_SUSPEND: + case GDMA_SERVICE_TYPE_RDMA_RESUME: + ret = mana_rdma_service_event(gd->gdma_context, type); + if (ret) + dev_err(hwc->dev, "Failed to schedule adev service event: %d\n", + ret); + break; + default: + dev_warn(hwc->dev, "Received unknown SOC service type %u\n", type); + break; + } + break; default: dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type); /* Ignore unknown events, which should never happen. */ @@ -234,14 +262,12 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, return; } - mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req); - /* Do no longer use 'resp', because the buffer is posted to the HW - * in the below mana_hwc_post_rx_wqe(). + /* Can no longer use 'resp', because the buffer is posted to the HW + * in mana_hwc_handle_resp() above. */ resp = NULL; - - mana_hwc_post_rx_wqe(hwc_rxq, rx_req); } static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, @@ -361,12 +387,12 @@ static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, int err; eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); - if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (eq_size < MANA_MIN_QSIZE) + eq_size = MANA_MIN_QSIZE; cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); - if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (cq_size < MANA_MIN_QSIZE) + cq_size = MANA_MIN_QSIZE; hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); if (!hwc_cq) @@ -428,12 +454,13 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, dma_buf->num_reqs = q_depth; - buf_size = PAGE_ALIGN(q_depth * max_msg_size); + buf_size = MANA_PAGE_ALIGN(q_depth * max_msg_size); gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); if (err) { - dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n", + buf_size, err); goto out; } @@ -496,8 +523,8 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, else queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); - if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) - queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (queue_size < MANA_MIN_QSIZE) + queue_size = MANA_MIN_QSIZE; hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); if (!hwc_wq) @@ -522,6 +549,9 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, out: if (err) mana_hwc_destroy_wq(hwc, hwc_wq); + + dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n", + queue_size, q_type, err); return err; } @@ -848,7 +878,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, } if (!wait_for_completion_timeout(&ctx->comp_event, - (msecs_to_jiffies(hwc->hwc_timeout) * HZ))) { + (msecs_to_jiffies(hwc->hwc_timeout)))) { dev_err(hwc->dev, "HWC: Request timed out!\n"); err = -ETIMEDOUT; goto out; diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index 23b1521c0df9..d30721d4516f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -91,7 +91,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, goto out; xdp_init_buff(xdp, PAGE_SIZE, &rxq->xdp_rxq); - xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, false); + xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, true); act = bpf_prog_run_xdp(prog, xdp); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d8af5e7e15b4..ccd2885c939e 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3,6 +3,7 @@ #include <uapi/linux/bpf.h> +#include <linux/debugfs.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -12,6 +13,7 @@ #include <net/checksum.h> #include <net/ip6_checksum.h> +#include <net/netdev_lock.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> @@ -30,16 +32,33 @@ static void mana_adev_idx_free(int idx) ida_free(&mana_adev_ida, idx); } +static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct gdma_queue *gdma_q = filp->private_data; + + return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr, + gdma_q->queue_size); +} + +static const struct file_operations mana_dbg_q_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mana_dbg_q_read, +}; + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); int err; - err = mana_alloc_queues(ndev); - if (err) + + if (err) { + netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); return err; + } apc->port_is_up = true; @@ -48,7 +67,7 @@ static int mana_open(struct net_device *ndev) netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); - + netdev_dbg(ndev, "%s successful\n", __func__); return 0; } @@ -160,6 +179,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return 0; frag_err: + if (net_ratelimit()) + netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", + skb->len); for (i = sg_i - 1; i >= hsg; i--) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); @@ -240,6 +262,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (skb_cow_head(skb, MANA_HEADROOM)) goto tx_drop_count; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto tx_drop_count; + txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; @@ -481,7 +506,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, struct sock *sk = skb->sk; int txq; - txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; if (txq != old_q && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -511,7 +536,7 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, } /* Release pre-allocated RX buffers */ -static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) +void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) { struct device *dev; int i; @@ -599,12 +624,16 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } -static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) +int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) { struct device *dev; struct page *page; @@ -618,7 +647,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) dev = mpc->ac->gdma_dev->gdma_context->dev; - num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE; + num_rxb = num_queues * mpc->rx_queue_size; WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); @@ -632,30 +661,16 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) mpc->rxbpre_total = 0; for (i = 0; i < num_rxb; i++) { - if (mpc->rxbpre_alloc_size > PAGE_SIZE) { - va = netdev_alloc_frag(mpc->rxbpre_alloc_size); - if (!va) - goto error; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < - get_order(mpc->rxbpre_alloc_size)) { - put_page(page); - goto error; - } - } else { - page = dev_alloc_page(); - if (!page) - goto error; + page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); + if (!page) + goto error; - va = page_to_virt(page); - } + va = page_to_virt(page); da = dma_map_single(dev, va + mpc->rxbpre_headroom, mpc->rxbpre_datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { - put_page(virt_to_head_page(va)); + put_page(page); goto error; } @@ -667,6 +682,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) return 0; error: + netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); mana_pre_dealloc_rxbufs(mpc); return -ENOMEM; } @@ -678,7 +694,7 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) int err; /* Pre-allocate buffers to prevent failure in mana_attach later */ - err = mana_pre_alloc_rxbufs(mpc, new_mtu); + err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues); if (err) { netdev_err(ndev, "Insufficient memory for new MTU\n"); return err; @@ -690,12 +706,12 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) goto out; } - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); err = mana_attach(ndev); if (err) { netdev_err(ndev, "mana_attach failed: %d\n", err); - ndev->mtu = old_mtu; + WRITE_ONCE(ndev->mtu, old_mtu); } out: @@ -717,10 +733,23 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { + /* + * make sure subsequent cleanup attempts don't end up removing already + * cleaned dentry pointer + */ + debugfs_remove(apc->mana_port_debugfs); + apc->mana_port_debugfs = NULL; kfree(apc->rxqs); apc->rxqs = NULL; } +static void mana_cleanup_indir_table(struct mana_port_context *apc) +{ + apc->indir_table_sz = 0; + kfree(apc->indir_table); + kfree(apc->rxobj_table); +} + static int mana_init_port_context(struct mana_port_context *apc) { apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), @@ -892,7 +921,7 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, - u16 *max_num_vports) + u16 *max_num_vports, u8 *bm_hostmode) { struct gdma_context *gc = ac->gdma_dev->gdma_context; struct mana_query_device_cfg_resp resp = {}; @@ -903,7 +932,7 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; req.proto_major_ver = proto_major_ver; req.proto_minor_ver = proto_minor_ver; @@ -927,11 +956,18 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, *max_num_vports = resp.max_num_vports; - if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) gc->adapter_mtu = resp.adapter_mtu; else gc->adapter_mtu = ETH_FRAME_LEN; + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) + *bm_hostmode = resp.bm_hostmode; + else + *bm_hostmode = 0; + + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); + return 0; } @@ -962,7 +998,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, *max_sq = resp.max_num_sq; *max_rq = resp.max_num_rq; - *num_indir_entry = resp.num_indirection_ent; + if (resp.num_indirection_ent > 0 && + resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && + is_power_of_2(resp.num_indirection_ent)) { + *num_indir_entry = resp.num_indirection_ent; + } else { + netdev_warn(apc->ndev, + "Setting indirection table size to default %d for vPort %d\n", + MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); + *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; + } apc->port_handle = resp.vport; ether_addr_copy(apc->mac_addr, resp.mac_addr); @@ -977,7 +1022,7 @@ void mana_uncfg_vport(struct mana_port_context *apc) WARN_ON(apc->vport_use_count < 0); mutex_unlock(&apc->vport_mutex); } -EXPORT_SYMBOL_NS(mana_uncfg_vport, NET_MANA); +EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA"); int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, u32 doorbell_pg_id) @@ -1047,22 +1092,20 @@ out: return err; } -EXPORT_SYMBOL_NS(mana_cfg_vport, NET_MANA); +EXPORT_SYMBOL_NS(mana_cfg_vport, "NET_MANA"); static int mana_cfg_vport_steering(struct mana_port_context *apc, enum TRI_STATE rx, bool update_default_rxobj, bool update_key, bool update_tab) { - u16 num_entries = MANA_INDIRECT_TABLE_SIZE; struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; - mana_handle_t *req_indir_tab; u32 req_buf_size; int err; - req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1073,8 +1116,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->hdr.req.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; - req->num_indir_entries = num_entries; - req->indir_tab_offset = sizeof(*req); + req->num_indir_entries = apc->indir_table_sz; + req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, + indir_tab); req->rx_enable = rx; req->rss_enable = apc->rss_state; req->update_default_rxobj = update_default_rxobj; @@ -1086,11 +1130,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); - if (update_tab) { - req_indir_tab = (mana_handle_t *)(req + 1); - memcpy(req_indir_tab, apc->rxobj_table, - req->num_indir_entries * sizeof(mana_handle_t)); - } + if (update_tab) + memcpy(req->indir_tab, apc->rxobj_table, + flex_array_size(req, indir_tab, req->num_indir_entries)); err = mana_send_request(apc->ac, req, req_buf_size, &resp, sizeof(resp)); @@ -1113,7 +1155,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, } netdev_info(ndev, "Configured steering vPort %llu entries %u\n", - apc->port_handle, num_entries); + apc->port_handle, apc->indir_table_sz); out: kfree(req); return err; @@ -1172,7 +1214,7 @@ int mana_create_wq_obj(struct mana_port_context *apc, out: return err; } -EXPORT_SYMBOL_NS(mana_create_wq_obj, NET_MANA); +EXPORT_SYMBOL_NS(mana_create_wq_obj, "NET_MANA"); void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, mana_handle_t wq_obj) @@ -1200,7 +1242,7 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, resp.hdr.status); } -EXPORT_SYMBOL_NS(mana_destroy_wq_obj, NET_MANA); +EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA"); static void mana_destroy_eq(struct mana_context *ac) { @@ -1211,6 +1253,9 @@ static void mana_destroy_eq(struct mana_context *ac) if (!ac->eqs) return; + debugfs_remove_recursive(ac->mana_eqs_debugfs); + ac->mana_eqs_debugfs = NULL; + for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; if (!eq) @@ -1223,6 +1268,18 @@ static void mana_destroy_eq(struct mana_context *ac) ac->eqs = NULL; } +static void mana_create_eq_debugfs(struct mana_context *ac, int i) +{ + struct mana_eq eq = ac->eqs[i]; + char eqnum[32]; + + sprintf(eqnum, "eq%d", i); + eq.mana_eq_debugfs = debugfs_create_dir(eqnum, ac->mana_eqs_debugfs); + debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); + debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); + debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); +} + static int mana_create_eq(struct mana_context *ac) { struct gdma_dev *gd = ac->gdma_dev; @@ -1243,11 +1300,16 @@ static int mana_create_eq(struct mana_context *ac) spec.eq.context = ac->eqs; spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + ac->mana_eqs_debugfs = debugfs_create_dir("EQs", gc->mana_pci_debugfs); + for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); - if (err) + if (err) { + dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); goto out; + } + mana_create_eq_debugfs(ac, i); } return 0; @@ -1488,8 +1550,12 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, return NULL; if (xdp->data_hard_start) { + u32 metasize = xdp->data - xdp->data_meta; + skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); return skb; } @@ -1601,7 +1667,7 @@ drop: } static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, - dma_addr_t *da, bool *from_pool, bool is_napi) + dma_addr_t *da, bool *from_pool) { struct page *page; void *va; @@ -1612,21 +1678,6 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, if (rxq->xdp_save_va) { va = rxq->xdp_save_va; rxq->xdp_save_va = NULL; - } else if (rxq->alloc_size > PAGE_SIZE) { - if (is_napi) - va = napi_alloc_frag(rxq->alloc_size); - else - va = netdev_alloc_frag(rxq->alloc_size); - - if (!va) - return NULL; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < get_order(rxq->alloc_size)) { - put_page(page); - return NULL; - } } else { page = page_pool_dev_alloc_pages(rxq->page_pool); if (!page) @@ -1659,7 +1710,7 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, dma_addr_t da; void *va; - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; @@ -1775,7 +1826,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1786,16 +1836,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } @@ -1848,11 +1905,16 @@ static void mana_destroy_txq(struct mana_port_context *apc) return; for (i = 0; i < apc->num_queues; i++) { - napi = &apc->tx_qp[i].tx_cq.napi; - napi_synchronize(napi); - napi_disable(napi); - netif_napi_del(napi); + debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + apc->tx_qp[i].mana_tx_debugfs = NULL; + napi = &apc->tx_qp[i].tx_cq.napi; + if (apc->tx_qp[i].txq.napi_initialized) { + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + apc->tx_qp[i].txq.napi_initialized = false; + } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1864,6 +1926,31 @@ static void mana_destroy_txq(struct mana_port_context *apc) apc->tx_qp = NULL; } +static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_tx_qp *tx_qp = &apc->tx_qp[idx]; + char qnum[32]; + + sprintf(qnum, "TX-%d", idx); + tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->head); + debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->tail); + debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.pending_skbs.qlen); + debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.budget); + debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->txq.gdma_sq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops); +} + static int mana_create_txq(struct mana_port_context *apc, struct net_device *net) { @@ -1886,15 +1973,17 @@ static int mana_create_txq(struct mana_port_context *apc, return -ENOMEM; /* The minimum size of the WQE is 32 bytes, hence - * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs + * apc->tx_queue_size represents the maximum number of WQEs * the SQ can store. This value is then used to size other queues * to prevent overflow. + * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED, + * as min val of apc->tx_queue_size is 128 and that would make + * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size + * are always power of two */ - txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; - BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + txq_size = apc->tx_queue_size * 32; - cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; - cq_size = PAGE_ALIGN(cq_size); + cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; gc = gd->gdma_context; @@ -1908,6 +1997,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; + txq->napi_initialized = false; skb_queue_head_init(&txq->pending_skbs); memset(&spec, 0, sizeof(spec)); @@ -1972,20 +2062,25 @@ static int mana_create_txq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; + mana_create_txq_debugfs(apc, i); + netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); + txq->napi_initialized = true; mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } return 0; out: + netdev_err(net, "Failed to create %d TX queues, %d\n", + apc->num_queues, err); mana_destroy_txq(apc); return err; } static void mana_destroy_rxq(struct mana_port_context *apc, - struct mana_rxq *rxq, bool validate_state) + struct mana_rxq *rxq, bool napi_initialized) { struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; @@ -1998,17 +2093,20 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rxq) return; + debugfs_remove_recursive(rxq->mana_rx_debugfs); + rxq->mana_rx_debugfs = NULL; + napi = &rxq->rx_cq.napi; - if (validate_state) + if (napi_initialized) { napi_synchronize(napi); - napi_disable(napi); + napi_disable(napi); + netif_napi_del(napi); + } xdp_rxq_info_unreg(&rxq->xdp_rxq); - netif_napi_del(napi); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); @@ -2054,7 +2152,7 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, if (mpc->rxbufs_pre) va = mana_get_rxbuf_pre(rxq, &da); else - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return -ENOMEM; @@ -2132,13 +2230,15 @@ static int mana_push_wqe(struct mana_rxq *rxq) static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) { + struct mana_port_context *mpc = netdev_priv(rxq->ndev); struct page_pool_params pprm = {}; int ret; - pprm.pool_size = RX_BUFFERS_PER_QUEUE; + pprm.pool_size = mpc->rx_queue_size; pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; + pprm.order = get_order(rxq->alloc_size); rxq->page_pool = page_pool_create(&pprm); @@ -2167,13 +2267,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc = gd->gdma_context; - rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), + rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size), GFP_KERNEL); if (!rxq) return NULL; rxq->ndev = ndev; - rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->num_rx_buf = apc->rx_queue_size; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; @@ -2191,8 +2291,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - rq_size = PAGE_ALIGN(rq_size); - cq_size = PAGE_ALIGN(cq_size); + rq_size = MANA_PAGE_ALIGN(rq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); /* Create RQ */ memset(&spec, 0, sizeof(spec)); @@ -2278,6 +2378,28 @@ out: return NULL; } +static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_rxq *rxq; + char qnum[32]; + + rxq = apc->rxqs[idx]; + + sprintf(qnum, "RX-%d", idx); + rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head); + debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail); + debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf); + debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); + debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, + &mana_dbg_q_fops); +} + static int mana_add_rx_queues(struct mana_port_context *apc, struct net_device *ndev) { @@ -2290,12 +2412,15 @@ static int mana_add_rx_queues(struct mana_port_context *apc, rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); if (!rxq) { err = -ENOMEM; + netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); goto out; } u64_stats_init(&rxq->stats.syncp); apc->rxqs[i] = rxq; + + mana_create_rxq_debugfs(apc, i); } apc->default_rxobj = apc->rxqs[0]->rxobj; @@ -2321,7 +2446,7 @@ static void mana_destroy_vport(struct mana_port_context *apc) mana_destroy_txq(apc); mana_uncfg_vport(apc); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_hw_vport(apc); } @@ -2333,7 +2458,7 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_hw_vport(apc); if (err) return err; @@ -2346,11 +2471,33 @@ static int mana_create_vport(struct mana_port_context *apc, return mana_create_txq(apc, net); } +static int mana_rss_table_alloc(struct mana_port_context *apc) +{ + if (!apc->indir_table_sz) { + netdev_err(apc->ndev, + "Indirection table size not set for vPort %d\n", + apc->port_idx); + return -EINVAL; + } + + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!apc->indir_table) + return -ENOMEM; + + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); + if (!apc->rxobj_table) { + kfree(apc->indir_table); + return -ENOMEM; + } + + return 0; +} + static void mana_rss_table_init(struct mana_port_context *apc) { int i; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = ethtool_rxfh_indir_default(i, apc->num_queues); } @@ -2363,7 +2510,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, int i; if (update_tab) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { queue_idx = apc->indir_table[i]; apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; } @@ -2387,6 +2534,7 @@ void mana_query_gf_stats(struct mana_port_context *apc) mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, sizeof(req), sizeof(resp)); + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE | STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED | STATISTICS_FLAGS_HC_RX_BYTES | @@ -2466,17 +2614,21 @@ void mana_query_gf_stats(struct mana_port_context *apc) static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; - u32 num_indirect_entries; + struct gdma_context *gc; + char vport[32]; int err; err = mana_init_port_context(apc); if (err) return err; + gc = gd->gdma_context; + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, - &num_indirect_entries); + &apc->indir_table_sz); if (err) { netdev_err(ndev, "Failed to query info for vPort %d\n", port_idx); @@ -2491,12 +2643,12 @@ static int mana_init_port(struct net_device *ndev) apc->num_queues = apc->max_queues; eth_hw_addr_set(ndev, apc->mac_addr); - + sprintf(vport, "vport%d", port_idx); + apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); return 0; reset_apc: - kfree(apc->rxqs); - apc->rxqs = NULL; + mana_cleanup_port_context(apc); return err; } @@ -2507,12 +2659,18 @@ int mana_alloc_queues(struct net_device *ndev) int err; err = mana_create_vport(apc, ndev); - if (err) + if (err) { + netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); return err; + } err = netif_set_real_num_tx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } err = mana_add_rx_queues(apc, ndev); if (err) @@ -2521,16 +2679,22 @@ int mana_alloc_queues(struct net_device *ndev) apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; err = netif_set_real_num_rx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } mana_rss_table_init(apc); err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); - if (err) + if (err) { + netdev_err(ndev, "Failed to configure RSS table: %d\n", err); goto destroy_vport; + } - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_filter(apc); if (err) goto destroy_vport; @@ -2592,7 +2756,7 @@ static int mana_dealloc_queues(struct net_device *ndev) mana_chn_setxdp(apc, NULL); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); /* No packet can be transmitted now since apc->port_is_up is false. @@ -2669,8 +2833,10 @@ int mana_detach(struct net_device *ndev, bool from_close) if (apc->port_st_save) { err = mana_dealloc_queues(ndev); - if (err) + if (err) { + netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); return err; + } } if (!from_close) { @@ -2701,6 +2867,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->ndev = ndev; apc->max_queues = gc->max_num_queues; apc->num_queues = gc->max_num_queues; + apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; + apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; apc->port_handle = INVALID_MANA_HANDLE; apc->pf_filter_handle = INVALID_MANA_HANDLE; apc->port_idx = port_idx; @@ -2717,6 +2885,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->dev_port = port_idx; SET_NETDEV_DEV(ndev, gc->dev); + netif_set_tso_max_size(ndev, GSO_MAX_SIZE); + netif_carrier_off(ndev); netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); @@ -2725,6 +2895,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, if (err) goto free_net; + err = mana_rss_table_alloc(apc); + if (err) + goto reset_apc; + netdev_lockdep_set_classes(ndev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -2741,14 +2915,15 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, err = register_netdev(ndev); if (err) { netdev_err(ndev, "Unable to register netdev.\n"); - goto reset_apc; + goto free_indir; } return 0; +free_indir: + mana_cleanup_indir_table(apc); reset_apc: - kfree(apc->rxqs); - apc->rxqs = NULL; + mana_cleanup_port_context(apc); free_net: *ndev_storage = NULL; netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); @@ -2775,7 +2950,7 @@ static void remove_adev(struct gdma_dev *gd) gd->adev = NULL; } -static int add_adev(struct gdma_dev *gd) +static int add_adev(struct gdma_dev *gd, const char *name) { struct auxiliary_device *adev; struct mana_adev *madev; @@ -2791,7 +2966,7 @@ static int add_adev(struct gdma_dev *gd) goto idx_fail; adev->id = ret; - adev->name = "rdma"; + adev->name = name; adev->dev.parent = gd->gdma_context->dev; adev->dev.release = adev_release; madev->mdev = gd; @@ -2800,11 +2975,15 @@ static int add_adev(struct gdma_dev *gd) if (ret) goto init_fail; + /* madev is owned by the auxiliary device */ + madev = NULL; ret = auxiliary_device_add(adev); if (ret) goto add_fail; gd->adev = adev; + dev_dbg(gd->gdma_context->dev, + "Auxiliary device added successfully\n"); return 0; add_fail: @@ -2819,11 +2998,76 @@ idx_fail: return ret; } +static void mana_rdma_service_handle(struct work_struct *work) +{ + struct mana_service_work *serv_work = + container_of(work, struct mana_service_work, work); + struct gdma_dev *gd = serv_work->gdma_dev; + struct device *dev = gd->gdma_context->dev; + int ret; + + if (READ_ONCE(gd->rdma_teardown)) + goto out; + + switch (serv_work->event) { + case GDMA_SERVICE_TYPE_RDMA_SUSPEND: + if (!gd->adev || gd->is_suspended) + break; + + remove_adev(gd); + gd->is_suspended = true; + break; + + case GDMA_SERVICE_TYPE_RDMA_RESUME: + if (!gd->is_suspended) + break; + + ret = add_adev(gd, "rdma"); + if (ret) + dev_err(dev, "Failed to add adev on resume: %d\n", ret); + else + gd->is_suspended = false; + break; + + default: + dev_warn(dev, "unknown adev service event %u\n", + serv_work->event); + break; + } + +out: + kfree(serv_work); +} + +int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event) +{ + struct gdma_dev *gd = &gc->mana_ib; + struct mana_service_work *serv_work; + + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { + /* RDMA device is not detected on pci */ + return 0; + } + + serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC); + if (!serv_work) + return -ENOMEM; + + serv_work->event = event; + serv_work->gdma_dev = gd; + + INIT_WORK(&serv_work->work, mana_rdma_service_handle); + queue_work(gc->service_wq, &serv_work->work); + + return 0; +} + int mana_probe(struct gdma_dev *gd, bool resuming) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; struct device *dev = gc->dev; + u8 bm_hostmode = 0; u16 num_ports = 0; int err; int i; @@ -2846,14 +3090,18 @@ int mana_probe(struct gdma_dev *gd, bool resuming) } err = mana_create_eq(ac); - if (err) + if (err) { + dev_err(dev, "Failed to create EQs: %d\n", err); goto out; + } err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, - MANA_MICRO_VERSION, &num_ports); + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); if (err) goto out; + ac->bm_hostmode = bm_hostmode; + if (!resuming) { ac->num_ports = num_ports; } else { @@ -2874,23 +3122,43 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); - if (err) + /* we log the port for which the probe failed and stop + * probes for subsequent ports. + * Note that we keep running ports, for which the probes + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Probe Failed for port %d\n", i); break; + } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); - if (err) + /* we log the port for which the attach failed and stop + * attach for subsequent ports + * Note that we keep running ports, for which the attach + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Attach Failed for port %d\n", i); break; + } } } - err = add_adev(gd); + err = add_adev(gd, "eth"); out: - if (err) + if (err) { mana_remove(gd, false); + } else { + dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", + gd, gd->dev_id.as_uint32, ac->num_ports, + gd->dev_id.type, gd->dev_id.instance); + dev_dbg(dev, "%s succeeded\n", __func__); + } return err; } @@ -2899,6 +3167,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; + struct mana_port_context *apc; struct device *dev = gc->dev; struct net_device *ndev; int err; @@ -2910,6 +3179,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) for (i = 0; i < ac->num_ports; i++) { ndev = ac->ports[i]; + apc = netdev_priv(ndev); if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); @@ -2933,6 +3203,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) } unregister_netdevice(ndev); + mana_cleanup_indir_table(apc); rtnl_unlock(); @@ -2949,4 +3220,68 @@ out: gd->driver_data = NULL; gd->gdma_context = NULL; kfree(ac); + dev_dbg(dev, "%s succeeded\n", __func__); +} + +int mana_rdma_probe(struct gdma_dev *gd) +{ + int err = 0; + + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { + /* RDMA device is not detected on pci */ + return err; + } + + err = mana_gd_register_device(gd); + if (err) + return err; + + err = add_adev(gd, "rdma"); + if (err) + mana_gd_deregister_device(gd); + + return err; +} + +void mana_rdma_remove(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { + /* RDMA device is not detected on pci */ + return; + } + + WRITE_ONCE(gd->rdma_teardown, true); + flush_workqueue(gc->service_wq); + + if (gd->adev) + remove_adev(gd); + + mana_gd_deregister_device(gd); +} + +struct net_device *mana_get_primary_netdev(struct mana_context *ac, + u32 port_index, + netdevice_tracker *tracker) +{ + struct net_device *ndev; + + if (port_index >= ac->num_ports) + return NULL; + + rcu_read_lock(); + + /* If mana is used in netvsc, the upper netdevice should be returned. */ + ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); + + /* If there is no upper device, use the parent Ethernet device */ + if (!ndev) + ndev = ac->ports[port_index]; + + netdev_hold(ndev, tracker, GFP_ATOMIC); + rcu_read_unlock(); + + return ndev; } +EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA"); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index ab2413d71f6c..c419626073f5 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -91,53 +91,34 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; - u8 *p = data; int i; if (stringset != ETH_SS_STATS) return; - for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) { - memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) + ethtool_puts(&data, mana_eth_stats[i].name); for (i = 0; i < num_queues; i++) { - sprintf(p, "rx_%d_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_xdp_drop", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_xdp_tx", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_xdp_redirect", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "rx_%d_packets", i); + ethtool_sprintf(&data, "rx_%d_bytes", i); + ethtool_sprintf(&data, "rx_%d_xdp_drop", i); + ethtool_sprintf(&data, "rx_%d_xdp_tx", i); + ethtool_sprintf(&data, "rx_%d_xdp_redirect", i); } for (i = 0; i < num_queues; i++) { - sprintf(p, "tx_%d_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_xdp_xmit", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_inner_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_inner_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_long_pkt_fmt", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_short_pkt_fmt", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_csum_partial", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_mana_map_err", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "tx_%d_packets", i); + ethtool_sprintf(&data, "tx_%d_bytes", i); + ethtool_sprintf(&data, "tx_%d_xdp_xmit", i); + ethtool_sprintf(&data, "tx_%d_tso_packets", i); + ethtool_sprintf(&data, "tx_%d_tso_bytes", i); + ethtool_sprintf(&data, "tx_%d_tso_inner_packets", i); + ethtool_sprintf(&data, "tx_%d_tso_inner_bytes", i); + ethtool_sprintf(&data, "tx_%d_long_pkt_fmt", i); + ethtool_sprintf(&data, "tx_%d_short_pkt_fmt", i); + ethtool_sprintf(&data, "tx_%d_csum_partial", i); + ethtool_sprintf(&data, "tx_%d_mana_map_err", i); } } @@ -245,7 +226,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev) static u32 mana_rss_indir_size(struct net_device *ndev) { - return MANA_INDIRECT_TABLE_SIZE; + struct mana_port_context *apc = netdev_priv(ndev); + + return apc->indir_table_sz; } static int mana_get_rxfh(struct net_device *ndev, @@ -257,7 +240,7 @@ static int mana_get_rxfh(struct net_device *ndev, rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) rxfh->indir[i] = apc->indir_table[i]; } @@ -273,8 +256,8 @@ static int mana_set_rxfh(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); bool update_hash = false, update_table = false; - u32 save_table[MANA_INDIRECT_TABLE_SIZE]; u8 save_key[MANA_HASH_KEY_SIZE]; + u32 *save_table; int i, err; if (!apc->port_is_up) @@ -284,13 +267,19 @@ static int mana_set_rxfh(struct net_device *ndev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!save_table) + return -ENOMEM; + if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) - if (rxfh->indir[i] >= apc->num_queues) - return -EINVAL; + for (i = 0; i < apc->indir_table_sz; i++) + if (rxfh->indir[i] >= apc->num_queues) { + err = -EINVAL; + goto cleanup; + } update_table = true; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { save_table[i] = apc->indir_table[i]; apc->indir_table[i] = rxfh->indir[i]; } @@ -306,7 +295,7 @@ static int mana_set_rxfh(struct net_device *ndev, if (err) { /* recover to original values */ if (update_table) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = save_table[i]; } @@ -316,6 +305,9 @@ static int mana_set_rxfh(struct net_device *ndev, mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); } +cleanup: + kfree(save_table); + return err; } @@ -334,30 +326,113 @@ static int mana_set_channels(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int new_count = channels->combined_count; unsigned int old_count = apc->num_queues; - int err, err2; + int err; + + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, new_count); + if (err) { + netdev_err(ndev, "Insufficient memory for new allocations"); + return err; + } err = mana_detach(ndev, false); if (err) { netdev_err(ndev, "mana_detach failed: %d\n", err); - return err; + goto out; } apc->num_queues = new_count; err = mana_attach(ndev); - if (!err) - return 0; + if (err) { + apc->num_queues = old_count; + netdev_err(ndev, "mana_attach failed: %d\n", err); + } + +out: + mana_pre_dealloc_rxbufs(apc); + return err; +} + +static void mana_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + ring->rx_pending = apc->rx_queue_size; + ring->tx_pending = apc->tx_queue_size; + ring->rx_max_pending = MAX_RX_BUFFERS_PER_QUEUE; + ring->tx_max_pending = MAX_TX_BUFFERS_PER_QUEUE; +} + +static int mana_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 new_tx, new_rx; + u32 old_tx, old_rx; + int err; - netdev_err(ndev, "mana_attach failed: %d\n", err); + old_tx = apc->tx_queue_size; + old_rx = apc->rx_queue_size; - /* Try to roll it back to the old configuration. */ - apc->num_queues = old_count; - err2 = mana_attach(ndev); - if (err2) - netdev_err(ndev, "mana re-attach failed: %d\n", err2); + if (ring->tx_pending < MIN_TX_BUFFERS_PER_QUEUE) { + NL_SET_ERR_MSG_FMT(extack, "tx:%d less than the min:%d", ring->tx_pending, + MIN_TX_BUFFERS_PER_QUEUE); + return -EINVAL; + } + + if (ring->rx_pending < MIN_RX_BUFFERS_PER_QUEUE) { + NL_SET_ERR_MSG_FMT(extack, "rx:%d less than the min:%d", ring->rx_pending, + MIN_RX_BUFFERS_PER_QUEUE); + return -EINVAL; + } + + new_rx = roundup_pow_of_two(ring->rx_pending); + new_tx = roundup_pow_of_two(ring->tx_pending); + netdev_info(ndev, "Using nearest power of 2 values for Txq:%d Rxq:%d\n", + new_tx, new_rx); + + /* pre-allocating new buffers to prevent failures in mana_attach() later */ + apc->rx_queue_size = new_rx; + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); + apc->rx_queue_size = old_rx; + if (err) { + netdev_err(ndev, "Insufficient memory for new allocations\n"); + return err; + } + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + goto out; + } + apc->tx_queue_size = new_tx; + apc->rx_queue_size = new_rx; + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, "mana_attach failed: %d\n", err); + apc->tx_queue_size = old_tx; + apc->rx_queue_size = old_rx; + } +out: + mana_pre_dealloc_rxbufs(apc); return err; } +static int mana_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_OTHER; + + return 0; +} + const struct ethtool_ops mana_ethtool_ops = { .get_ethtool_stats = mana_get_ethtool_stats, .get_sset_count = mana_get_sset_count, @@ -369,4 +444,8 @@ const struct ethtool_ops mana_ethtool_ops = { .set_rxfh = mana_set_rxfh, .get_channels = mana_get_channels, .set_channels = mana_set_channels, + .get_ringparam = mana_get_ringparam, + .set_ringparam = mana_set_ringparam, + .get_link_ksettings = mana_get_link_ksettings, + .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 5553af9c8085..0f1679ebad96 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -6,6 +6,7 @@ #include <linux/io.h> #include <linux/mm.h> +#include <net/mana/gdma.h> #include <net/mana/shm_channel.h> #define PAGE_FRAME_L48_WIDTH_BYTES 6 @@ -155,8 +156,8 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, return err; } - if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || - !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + if (!MANA_PAGE_ALIGNED(eq_addr) || !MANA_PAGE_ALIGNED(cq_addr) || + !MANA_PAGE_ALIGNED(rq_addr) || !MANA_PAGE_ALIGNED(sq_addr)) return -EINVAL; if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) @@ -183,7 +184,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* EQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(eq_addr); + frame_addr = MANA_PFN(eq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -191,7 +192,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* CQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(cq_addr); + frame_addr = MANA_PFN(cq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -199,7 +200,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* RQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(rq_addr); + frame_addr = MANA_PFN(rq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -207,7 +208,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* SQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(sq_addr); + frame_addr = MANA_PFN(sq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); |