diff options
Diffstat (limited to 'drivers/net/ethernet/microsoft/mana')
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/gdma_main.c | 187 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/hw_channel.c | 79 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_en.c | 331 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 189 | ||||
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/shm_channel.c | 13 |
5 files changed, 605 insertions, 194 deletions
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index d33b27214539..638ef64d639f 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Copyright (c) 2021, Microsoft Corporation. */ +#include <linux/debugfs.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/utsname.h> @@ -8,6 +9,8 @@ #include <net/mana/mana.h> +struct dentry *mana_debugfs_root; + static u32 mana_gd_r32(struct gdma_context *g, u64 offset) { return readl(g->bar0_va + offset); @@ -131,9 +134,10 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) struct gdma_list_devices_resp resp = {}; struct gdma_general_req req = {}; struct gdma_dev_id dev; - u32 i, max_num_devs; + int found_dev = 0; u16 dev_type; int err; + u32 i; mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), sizeof(resp)); @@ -145,12 +149,17 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) return err ? err : -EPROTO; } - max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); - - for (i = 0; i < max_num_devs; i++) { + for (i = 0; i < GDMA_DEV_LIST_SIZE && + found_dev < resp.num_of_devs; i++) { dev = resp.devs[i]; dev_type = dev.type; + /* Skip empty devices */ + if (dev.as_uint32 == 0) + continue; + + found_dev++; + /* HWC is already detected in mana_hwc_create_channel(). */ if (dev_type == GDMA_DEVICE_HWC) continue; @@ -174,7 +183,7 @@ int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); } -EXPORT_SYMBOL_NS(mana_gd_send_request, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_send_request, "NET_MANA"); int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, struct gdma_mem_info *gmi) @@ -182,7 +191,7 @@ int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, dma_addr_t dma_handle; void *buf; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; gmi->dev = gc->dev; @@ -380,6 +389,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) case GDMA_EQE_HWC_INIT_EQ_ID_DB: case GDMA_EQE_HWC_INIT_DATA: case GDMA_EQE_HWC_INIT_DONE: + case GDMA_EQE_RNIC_QP_FATAL: if (!eq->eq.callback) break; @@ -712,12 +722,12 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) return 0; } -EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, "NET_MANA"); static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_mem_info *gmi) { - unsigned int num_page = gmi->length / PAGE_SIZE; + unsigned int num_page = gmi->length / MANA_PAGE_SIZE; struct gdma_create_dma_region_req *req = NULL; struct gdma_create_dma_region_resp resp = {}; struct gdma_context *gc = gd->gdma_context; @@ -727,10 +737,10 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, int err; int i; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; - if (offset_in_page(gmi->virt_addr) != 0) + if (!MANA_PAGE_ALIGNED(gmi->virt_addr)) return -EINVAL; hwc = gc->hwc.driver_data; @@ -751,7 +761,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, req->page_addr_list_len = num_page; for (i = 0; i < num_page; i++) - req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + req->page_addr_list[i] = gmi->dma_handle + i * MANA_PAGE_SIZE; err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); if (err) @@ -816,7 +826,7 @@ free_q: kfree(queue); return err; } -EXPORT_SYMBOL_NS(mana_gd_create_mana_eq, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_create_mana_eq, "NET_MANA"); int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, const struct gdma_queue_spec *spec, @@ -893,7 +903,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) mana_gd_free_memory(gmi); kfree(queue); } -EXPORT_SYMBOL_NS(mana_gd_destroy_queue, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_destroy_queue, "NET_MANA"); int mana_gd_verify_vf_version(struct pci_dev *pdev) { @@ -970,7 +980,7 @@ int mana_gd_register_device(struct gdma_dev *gd) return 0; } -EXPORT_SYMBOL_NS(mana_gd_register_device, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_register_device, "NET_MANA"); int mana_gd_deregister_device(struct gdma_dev *gd) { @@ -1001,7 +1011,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd) return err; } -EXPORT_SYMBOL_NS(mana_gd_deregister_device, NET_MANA); +EXPORT_SYMBOL_NS(mana_gd_deregister_device, "NET_MANA"); u32 mana_gd_wq_avail_space(struct gdma_queue *wq) { @@ -1249,15 +1259,47 @@ void mana_gd_free_res_map(struct gdma_resource *r) r->size = 0; } +static int irq_setup(unsigned int *irqs, unsigned int len, int node) +{ + const struct cpumask *next, *prev = cpu_none_mask; + cpumask_var_t cpus __free(free_cpumask_var); + int cpu, weight; + + if (!alloc_cpumask_var(&cpus, GFP_KERNEL)) + return -ENOMEM; + + rcu_read_lock(); + for_each_numa_hop_mask(next, node) { + weight = cpumask_weight_andnot(next, prev); + while (weight > 0) { + cpumask_andnot(cpus, next, prev); + for_each_cpu(cpu, cpus) { + if (len-- == 0) + goto done; + irq_set_affinity_and_hint(*irqs++, topology_sibling_cpumask(cpu)); + cpumask_andnot(cpus, cpus, topology_sibling_cpumask(cpu)); + --weight; + } + } + prev = next; + } +done: + rcu_read_unlock(); + return 0; +} + static int mana_gd_setup_irqs(struct pci_dev *pdev) { - unsigned int max_queues_per_port = num_online_cpus(); struct gdma_context *gc = pci_get_drvdata(pdev); + unsigned int max_queues_per_port; struct gdma_irq_context *gic; unsigned int max_irqs, cpu; - int nvec, irq; + int start_irq_index = 1; + int nvec, *irqs, irq; int err, i = 0, j; + cpus_read_lock(); + max_queues_per_port = num_online_cpus(); if (max_queues_per_port > MANA_MAX_NUM_QUEUES) max_queues_per_port = MANA_MAX_NUM_QUEUES; @@ -1265,14 +1307,24 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) max_irqs = max_queues_per_port + 1; nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX); - if (nvec < 0) + if (nvec < 0) { + cpus_read_unlock(); return nvec; + } + if (nvec <= num_online_cpus()) + start_irq_index = 0; + + irqs = kmalloc_array((nvec - start_irq_index), sizeof(int), GFP_KERNEL); + if (!irqs) { + err = -ENOMEM; + goto free_irq_vector; + } gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context), GFP_KERNEL); if (!gc->irq_contexts) { err = -ENOMEM; - goto free_irq_vector; + goto free_irq_array; } for (i = 0; i < nvec; i++) { @@ -1294,17 +1346,42 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) goto free_irq; } - err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); - if (err) - goto free_irq; - - cpu = cpumask_local_spread(i, gc->numa_node); - irq_set_affinity_and_hint(irq, cpumask_of(cpu)); + if (!i) { + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); + if (err) + goto free_irq; + + /* If number of IRQ is one extra than number of online CPUs, + * then we need to assign IRQ0 (hwc irq) and IRQ1 to + * same CPU. + * Else we will use different CPUs for IRQ0 and IRQ1. + * Also we are using cpumask_local_spread instead of + * cpumask_first for the node, because the node can be + * mem only. + */ + if (start_irq_index) { + cpu = cpumask_local_spread(i, gc->numa_node); + irq_set_affinity_and_hint(irq, cpumask_of(cpu)); + } else { + irqs[start_irq_index] = irq; + } + } else { + irqs[i - start_irq_index] = irq; + err = request_irq(irqs[i - start_irq_index], mana_gd_intr, 0, + gic->name, gic); + if (err) + goto free_irq; + } } + err = irq_setup(irqs, (nvec - start_irq_index), gc->numa_node); + if (err) + goto free_irq; + gc->max_num_msix = nvec; gc->num_msix_usable = nvec; - + cpus_read_unlock(); + kfree(irqs); return 0; free_irq: @@ -1318,7 +1395,10 @@ free_irq: kfree(gc->irq_contexts); gc->irq_contexts = NULL; +free_irq_array: + kfree(irqs); free_irq_vector: + cpus_read_unlock(); pci_free_irq_vectors(pdev); return err; } @@ -1427,11 +1507,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto release_region; - err = dma_set_max_seg_size(&pdev->dev, UINT_MAX); - if (err) { - dev_err(&pdev->dev, "Failed to set dma device segment size\n"); - goto release_region; - } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; gc = vzalloc(sizeof(*gc)); @@ -1451,6 +1527,12 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) gc->bar0_va = bar0_va; gc->dev = &pdev->dev; + if (gc->is_pf) + gc->mana_pci_debugfs = debugfs_create_dir("0", mana_debugfs_root); + else + gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev->slot), + mana_debugfs_root); + err = mana_gd_setup(pdev); if (err) goto unmap_bar; @@ -1464,6 +1546,14 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) cleanup_gd: mana_gd_cleanup(pdev); unmap_bar: + /* + * at this point we know that the other debugfs child dir/files + * are either not yet created or are already cleaned up. + * The pci debugfs folder clean-up now, will only be cleaning up + * adapter-MTU file and apc->mana_pci_debugfs folder. + */ + debugfs_remove_recursive(gc->mana_pci_debugfs); + gc->mana_pci_debugfs = NULL; pci_iounmap(pdev, bar0_va); free_gc: pci_set_drvdata(pdev, NULL); @@ -1484,6 +1574,10 @@ static void mana_gd_remove(struct pci_dev *pdev) mana_gd_cleanup(pdev); + debugfs_remove_recursive(gc->mana_pci_debugfs); + + gc->mana_pci_debugfs = NULL; + pci_iounmap(pdev, gc->bar0_va); vfree(gc); @@ -1535,6 +1629,10 @@ static void mana_gd_shutdown(struct pci_dev *pdev) mana_gd_cleanup(pdev); + debugfs_remove_recursive(gc->mana_pci_debugfs); + + gc->mana_pci_debugfs = NULL; + pci_disable_device(pdev); } @@ -1554,7 +1652,32 @@ static struct pci_driver mana_driver = { .shutdown = mana_gd_shutdown, }; -module_pci_driver(mana_driver); +static int __init mana_driver_init(void) +{ + int err; + + mana_debugfs_root = debugfs_create_dir("mana", NULL); + + err = pci_register_driver(&mana_driver); + if (err) { + debugfs_remove(mana_debugfs_root); + mana_debugfs_root = NULL; + } + + return err; +} + +static void __exit mana_driver_exit(void) +{ + pci_unregister_driver(&mana_driver); + + debugfs_remove(mana_debugfs_root); + + mana_debugfs_root = NULL; +} + +module_init(mana_driver_init); +module_exit(mana_driver_exit); MODULE_DEVICE_TABLE(pci, mana_id_table); diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 2729a2c5acf9..a00f915c5188 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -3,6 +3,7 @@ #include <net/mana/gdma.h> #include <net/mana/hw_channel.h> +#include <linux/vmalloc.h> static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) { @@ -51,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, return 0; } +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, + struct hwc_work_request *req) +{ + struct device *dev = hwc_rxq->hwc->dev; + struct gdma_sge *sge; + int err; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; + sge->size = req->buf_len; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); + return err; +} + static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, - const struct gdma_resp_hdr *resp_msg) + struct hwc_work_request *rx_req) { + const struct gdma_resp_hdr *resp_msg = rx_req->buf_va; struct hwc_caller_ctx *ctx; int err; @@ -61,6 +86,7 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, hwc->inflight_msg_res.map)) { dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", resp_msg->response.hwc_msg_id); + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); return; } @@ -74,30 +100,13 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, memcpy(ctx->output_buf, resp_msg, resp_len); out: ctx->error = err; - complete(&ctx->comp_event); -} - -static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, - struct hwc_work_request *req) -{ - struct device *dev = hwc_rxq->hwc->dev; - struct gdma_sge *sge; - int err; - - sge = &req->sge; - sge->address = (u64)req->buf_sge_addr; - sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; - sge->size = req->buf_len; - memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); - req->wqe_req.sgl = sge; - req->wqe_req.num_sge = 1; - req->wqe_req.client_data_unit = 0; + /* Must post rx wqe before complete(), otherwise the next rx may + * hit no_wqe error. + */ + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); - err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); - if (err) - dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); - return err; + complete(&ctx->comp_event); } static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, @@ -234,14 +243,12 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, return; } - mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req); - /* Do no longer use 'resp', because the buffer is posted to the HW - * in the below mana_hwc_post_rx_wqe(). + /* Can no longer use 'resp', because the buffer is posted to the HW + * in mana_hwc_handle_resp() above. */ resp = NULL; - - mana_hwc_post_rx_wqe(hwc_rxq, rx_req); } static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, @@ -361,12 +368,12 @@ static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, int err; eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); - if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (eq_size < MANA_MIN_QSIZE) + eq_size = MANA_MIN_QSIZE; cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); - if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (cq_size < MANA_MIN_QSIZE) + cq_size = MANA_MIN_QSIZE; hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); if (!hwc_cq) @@ -428,7 +435,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, dma_buf->num_reqs = q_depth; - buf_size = PAGE_ALIGN(q_depth * max_msg_size); + buf_size = MANA_PAGE_ALIGN(q_depth * max_msg_size); gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); @@ -496,8 +503,8 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, else queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); - if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) - queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (queue_size < MANA_MIN_QSIZE) + queue_size = MANA_MIN_QSIZE; hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); if (!hwc_wq) @@ -848,7 +855,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, } if (!wait_for_completion_timeout(&ctx->comp_event, - (msecs_to_jiffies(hwc->hwc_timeout) * HZ))) { + (msecs_to_jiffies(hwc->hwc_timeout)))) { dev_err(hwc->dev, "HWC: Request timed out!\n"); err = -ETIMEDOUT; goto out; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 59287c6e6cee..ae76ecc7a5d3 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3,6 +3,7 @@ #include <uapi/linux/bpf.h> +#include <linux/debugfs.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -30,6 +31,21 @@ static void mana_adev_idx_free(int idx) ida_free(&mana_adev_ida, idx); } +static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct gdma_queue *gdma_q = filp->private_data; + + return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr, + gdma_q->queue_size); +} + +static const struct file_operations mana_dbg_q_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mana_dbg_q_read, +}; + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) @@ -481,7 +497,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, struct sock *sk = skb->sk; int txq; - txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; if (txq != old_q && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -511,7 +527,7 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, } /* Release pre-allocated RX buffers */ -static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) +void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) { struct device *dev; int i; @@ -599,12 +615,16 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); - *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; + + *datasize = mtu + ETH_HLEN; } -static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) +int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) { struct device *dev; struct page *page; @@ -618,7 +638,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) dev = mpc->ac->gdma_dev->gdma_context->dev; - num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE; + num_rxb = num_queues * mpc->rx_queue_size; WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); @@ -678,7 +698,7 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) int err; /* Pre-allocate buffers to prevent failure in mana_attach later */ - err = mana_pre_alloc_rxbufs(mpc, new_mtu); + err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues); if (err) { netdev_err(ndev, "Insufficient memory for new MTU\n"); return err; @@ -690,12 +710,12 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) goto out; } - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); err = mana_attach(ndev); if (err) { netdev_err(ndev, "mana_attach failed: %d\n", err); - ndev->mtu = old_mtu; + WRITE_ONCE(ndev->mtu, old_mtu); } out: @@ -717,10 +737,23 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { + /* + * make sure subsequent cleanup attempts don't end up removing already + * cleaned dentry pointer + */ + debugfs_remove(apc->mana_port_debugfs); + apc->mana_port_debugfs = NULL; kfree(apc->rxqs); apc->rxqs = NULL; } +static void mana_cleanup_indir_table(struct mana_port_context *apc) +{ + apc->indir_table_sz = 0; + kfree(apc->indir_table); + kfree(apc->rxobj_table); +} + static int mana_init_port_context(struct mana_port_context *apc) { apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), @@ -932,6 +965,8 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, else gc->adapter_mtu = ETH_FRAME_LEN; + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); + return 0; } @@ -962,7 +997,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, *max_sq = resp.max_num_sq; *max_rq = resp.max_num_rq; - *num_indir_entry = resp.num_indirection_ent; + if (resp.num_indirection_ent > 0 && + resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && + is_power_of_2(resp.num_indirection_ent)) { + *num_indir_entry = resp.num_indirection_ent; + } else { + netdev_warn(apc->ndev, + "Setting indirection table size to default %d for vPort %d\n", + MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); + *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; + } apc->port_handle = resp.vport; ether_addr_copy(apc->mac_addr, resp.mac_addr); @@ -977,7 +1021,7 @@ void mana_uncfg_vport(struct mana_port_context *apc) WARN_ON(apc->vport_use_count < 0); mutex_unlock(&apc->vport_mutex); } -EXPORT_SYMBOL_NS(mana_uncfg_vport, NET_MANA); +EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA"); int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, u32 doorbell_pg_id) @@ -1047,22 +1091,20 @@ out: return err; } -EXPORT_SYMBOL_NS(mana_cfg_vport, NET_MANA); +EXPORT_SYMBOL_NS(mana_cfg_vport, "NET_MANA"); static int mana_cfg_vport_steering(struct mana_port_context *apc, enum TRI_STATE rx, bool update_default_rxobj, bool update_key, bool update_tab) { - u16 num_entries = MANA_INDIRECT_TABLE_SIZE; struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; - mana_handle_t *req_indir_tab; u32 req_buf_size; int err; - req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1073,8 +1115,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->hdr.req.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; - req->num_indir_entries = num_entries; - req->indir_tab_offset = sizeof(*req); + req->num_indir_entries = apc->indir_table_sz; + req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, + indir_tab); req->rx_enable = rx; req->rss_enable = apc->rss_state; req->update_default_rxobj = update_default_rxobj; @@ -1086,11 +1129,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); - if (update_tab) { - req_indir_tab = (mana_handle_t *)(req + 1); - memcpy(req_indir_tab, apc->rxobj_table, - req->num_indir_entries * sizeof(mana_handle_t)); - } + if (update_tab) + memcpy(req->indir_tab, apc->rxobj_table, + flex_array_size(req, indir_tab, req->num_indir_entries)); err = mana_send_request(apc->ac, req, req_buf_size, &resp, sizeof(resp)); @@ -1113,7 +1154,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, } netdev_info(ndev, "Configured steering vPort %llu entries %u\n", - apc->port_handle, num_entries); + apc->port_handle, apc->indir_table_sz); out: kfree(req); return err; @@ -1172,7 +1213,7 @@ int mana_create_wq_obj(struct mana_port_context *apc, out: return err; } -EXPORT_SYMBOL_NS(mana_create_wq_obj, NET_MANA); +EXPORT_SYMBOL_NS(mana_create_wq_obj, "NET_MANA"); void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, mana_handle_t wq_obj) @@ -1200,7 +1241,7 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, resp.hdr.status); } -EXPORT_SYMBOL_NS(mana_destroy_wq_obj, NET_MANA); +EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA"); static void mana_destroy_eq(struct mana_context *ac) { @@ -1211,6 +1252,9 @@ static void mana_destroy_eq(struct mana_context *ac) if (!ac->eqs) return; + debugfs_remove_recursive(ac->mana_eqs_debugfs); + ac->mana_eqs_debugfs = NULL; + for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; if (!eq) @@ -1223,6 +1267,18 @@ static void mana_destroy_eq(struct mana_context *ac) ac->eqs = NULL; } +static void mana_create_eq_debugfs(struct mana_context *ac, int i) +{ + struct mana_eq eq = ac->eqs[i]; + char eqnum[32]; + + sprintf(eqnum, "eq%d", i); + eq.mana_eq_debugfs = debugfs_create_dir(eqnum, ac->mana_eqs_debugfs); + debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); + debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); + debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); +} + static int mana_create_eq(struct mana_context *ac) { struct gdma_dev *gd = ac->gdma_dev; @@ -1243,11 +1299,14 @@ static int mana_create_eq(struct mana_context *ac) spec.eq.context = ac->eqs; spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + ac->mana_eqs_debugfs = debugfs_create_dir("EQs", gc->mana_pci_debugfs); + for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); if (err) goto out; + mana_create_eq_debugfs(ac, i); } return 0; @@ -1775,7 +1834,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1786,16 +1844,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } @@ -1848,11 +1913,16 @@ static void mana_destroy_txq(struct mana_port_context *apc) return; for (i = 0; i < apc->num_queues; i++) { - napi = &apc->tx_qp[i].tx_cq.napi; - napi_synchronize(napi); - napi_disable(napi); - netif_napi_del(napi); + debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + apc->tx_qp[i].mana_tx_debugfs = NULL; + napi = &apc->tx_qp[i].tx_cq.napi; + if (apc->tx_qp[i].txq.napi_initialized) { + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + apc->tx_qp[i].txq.napi_initialized = false; + } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1864,6 +1934,31 @@ static void mana_destroy_txq(struct mana_port_context *apc) apc->tx_qp = NULL; } +static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_tx_qp *tx_qp = &apc->tx_qp[idx]; + char qnum[32]; + + sprintf(qnum, "TX-%d", idx); + tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->head); + debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->tail); + debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.pending_skbs.qlen); + debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.budget); + debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->txq.gdma_sq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops); +} + static int mana_create_txq(struct mana_port_context *apc, struct net_device *net) { @@ -1886,15 +1981,17 @@ static int mana_create_txq(struct mana_port_context *apc, return -ENOMEM; /* The minimum size of the WQE is 32 bytes, hence - * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs + * apc->tx_queue_size represents the maximum number of WQEs * the SQ can store. This value is then used to size other queues * to prevent overflow. + * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED, + * as min val of apc->tx_queue_size is 128 and that would make + * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size + * are always power of two */ - txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; - BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + txq_size = apc->tx_queue_size * 32; - cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; - cq_size = PAGE_ALIGN(cq_size); + cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; gc = gd->gdma_context; @@ -1908,6 +2005,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; + txq->napi_initialized = false; skb_queue_head_init(&txq->pending_skbs); memset(&spec, 0, sizeof(spec)); @@ -1972,8 +2070,11 @@ static int mana_create_txq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; + mana_create_txq_debugfs(apc, i); + netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); + txq->napi_initialized = true; mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } @@ -1985,7 +2086,7 @@ out: } static void mana_destroy_rxq(struct mana_port_context *apc, - struct mana_rxq *rxq, bool validate_state) + struct mana_rxq *rxq, bool napi_initialized) { struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; @@ -1998,17 +2099,20 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rxq) return; + debugfs_remove_recursive(rxq->mana_rx_debugfs); + rxq->mana_rx_debugfs = NULL; + napi = &rxq->rx_cq.napi; - if (validate_state) + if (napi_initialized) { napi_synchronize(napi); - napi_disable(napi); + napi_disable(napi); + netif_napi_del(napi); + } xdp_rxq_info_unreg(&rxq->xdp_rxq); - netif_napi_del(napi); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); @@ -2132,10 +2236,11 @@ static int mana_push_wqe(struct mana_rxq *rxq) static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) { + struct mana_port_context *mpc = netdev_priv(rxq->ndev); struct page_pool_params pprm = {}; int ret; - pprm.pool_size = RX_BUFFERS_PER_QUEUE; + pprm.pool_size = mpc->rx_queue_size; pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; @@ -2167,13 +2272,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc = gd->gdma_context; - rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), + rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size), GFP_KERNEL); if (!rxq) return NULL; rxq->ndev = ndev; - rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->num_rx_buf = apc->rx_queue_size; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; @@ -2191,8 +2296,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - rq_size = PAGE_ALIGN(rq_size); - cq_size = PAGE_ALIGN(cq_size); + rq_size = MANA_PAGE_ALIGN(rq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); /* Create RQ */ memset(&spec, 0, sizeof(spec)); @@ -2278,6 +2383,28 @@ out: return NULL; } +static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_rxq *rxq; + char qnum[32]; + + rxq = apc->rxqs[idx]; + + sprintf(qnum, "RX-%d", idx); + rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head); + debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail); + debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf); + debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); + debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, + &mana_dbg_q_fops); +} + static int mana_add_rx_queues(struct mana_port_context *apc, struct net_device *ndev) { @@ -2296,6 +2423,8 @@ static int mana_add_rx_queues(struct mana_port_context *apc, u64_stats_init(&rxq->stats.syncp); apc->rxqs[i] = rxq; + + mana_create_rxq_debugfs(apc, i); } apc->default_rxobj = apc->rxqs[0]->rxobj; @@ -2346,11 +2475,33 @@ static int mana_create_vport(struct mana_port_context *apc, return mana_create_txq(apc, net); } +static int mana_rss_table_alloc(struct mana_port_context *apc) +{ + if (!apc->indir_table_sz) { + netdev_err(apc->ndev, + "Indirection table size not set for vPort %d\n", + apc->port_idx); + return -EINVAL; + } + + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!apc->indir_table) + return -ENOMEM; + + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); + if (!apc->rxobj_table) { + kfree(apc->indir_table); + return -ENOMEM; + } + + return 0; +} + static void mana_rss_table_init(struct mana_port_context *apc) { int i; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = ethtool_rxfh_indir_default(i, apc->num_queues); } @@ -2363,7 +2514,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, int i; if (update_tab) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { queue_idx = apc->indir_table[i]; apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; } @@ -2387,6 +2538,7 @@ void mana_query_gf_stats(struct mana_port_context *apc) mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, sizeof(req), sizeof(resp)); + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE | STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED | STATISTICS_FLAGS_HC_RX_BYTES | @@ -2466,17 +2618,21 @@ void mana_query_gf_stats(struct mana_port_context *apc) static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; - u32 num_indirect_entries; + struct gdma_context *gc; + char vport[32]; int err; err = mana_init_port_context(apc); if (err) return err; + gc = gd->gdma_context; + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, - &num_indirect_entries); + &apc->indir_table_sz); if (err) { netdev_err(ndev, "Failed to query info for vPort %d\n", port_idx); @@ -2491,12 +2647,12 @@ static int mana_init_port(struct net_device *ndev) apc->num_queues = apc->max_queues; eth_hw_addr_set(ndev, apc->mac_addr); - + sprintf(vport, "vport%d", port_idx); + apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); return 0; reset_apc: - kfree(apc->rxqs); - apc->rxqs = NULL; + mana_cleanup_port_context(apc); return err; } @@ -2701,6 +2857,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->ndev = ndev; apc->max_queues = gc->max_num_queues; apc->num_queues = gc->max_num_queues; + apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; + apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; apc->port_handle = INVALID_MANA_HANDLE; apc->pf_filter_handle = INVALID_MANA_HANDLE; apc->port_idx = port_idx; @@ -2725,6 +2883,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, if (err) goto free_net; + err = mana_rss_table_alloc(apc); + if (err) + goto reset_apc; + netdev_lockdep_set_classes(ndev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -2741,14 +2903,15 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, err = register_netdev(ndev); if (err) { netdev_err(ndev, "Unable to register netdev.\n"); - goto reset_apc; + goto free_indir; } return 0; +free_indir: + mana_cleanup_indir_table(apc); reset_apc: - kfree(apc->rxqs); - apc->rxqs = NULL; + mana_cleanup_port_context(apc); free_net: *ndev_storage = NULL; netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); @@ -2800,6 +2963,8 @@ static int add_adev(struct gdma_dev *gd) if (ret) goto init_fail; + /* madev is owned by the auxiliary device */ + madev = NULL; ret = auxiliary_device_add(adev); if (ret) goto add_fail; @@ -2874,16 +3039,30 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); - if (err) + /* we log the port for which the probe failed and stop + * probes for subsequent ports. + * Note that we keep running ports, for which the probes + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Probe Failed for port %d\n", i); break; + } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); - if (err) + /* we log the port for which the attach failed and stop + * attach for subsequent ports + * Note that we keep running ports, for which the attach + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Attach Failed for port %d\n", i); break; + } } } @@ -2899,6 +3078,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; + struct mana_port_context *apc; struct device *dev = gc->dev; struct net_device *ndev; int err; @@ -2910,6 +3090,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) for (i = 0; i < ac->num_ports; i++) { ndev = ac->ports[i]; + apc = netdev_priv(ndev); if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); @@ -2933,6 +3114,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) } unregister_netdevice(ndev); + mana_cleanup_indir_table(apc); rtnl_unlock(); @@ -2950,3 +3132,22 @@ out: gd->gdma_context = NULL; kfree(ac); } + +struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index) +{ + struct net_device *ndev; + + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "Taking primary netdev without holding the RCU read lock"); + if (port_index >= ac->num_ports) + return NULL; + + /* When mana is used in netvsc, the upper netdevice should be returned. */ + if (ac->ports[port_index]->flags & IFF_SLAVE) + ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); + else + ndev = ac->ports[port_index]; + + return ndev; +} +EXPORT_SYMBOL_NS(mana_get_primary_netdev_rcu, "NET_MANA"); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index ab2413d71f6c..c419626073f5 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -91,53 +91,34 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; - u8 *p = data; int i; if (stringset != ETH_SS_STATS) return; - for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) { - memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) + ethtool_puts(&data, mana_eth_stats[i].name); for (i = 0; i < num_queues; i++) { - sprintf(p, "rx_%d_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_xdp_drop", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_xdp_tx", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_%d_xdp_redirect", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "rx_%d_packets", i); + ethtool_sprintf(&data, "rx_%d_bytes", i); + ethtool_sprintf(&data, "rx_%d_xdp_drop", i); + ethtool_sprintf(&data, "rx_%d_xdp_tx", i); + ethtool_sprintf(&data, "rx_%d_xdp_redirect", i); } for (i = 0; i < num_queues; i++) { - sprintf(p, "tx_%d_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_xdp_xmit", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_inner_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_tso_inner_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_long_pkt_fmt", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_short_pkt_fmt", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_csum_partial", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_%d_mana_map_err", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "tx_%d_packets", i); + ethtool_sprintf(&data, "tx_%d_bytes", i); + ethtool_sprintf(&data, "tx_%d_xdp_xmit", i); + ethtool_sprintf(&data, "tx_%d_tso_packets", i); + ethtool_sprintf(&data, "tx_%d_tso_bytes", i); + ethtool_sprintf(&data, "tx_%d_tso_inner_packets", i); + ethtool_sprintf(&data, "tx_%d_tso_inner_bytes", i); + ethtool_sprintf(&data, "tx_%d_long_pkt_fmt", i); + ethtool_sprintf(&data, "tx_%d_short_pkt_fmt", i); + ethtool_sprintf(&data, "tx_%d_csum_partial", i); + ethtool_sprintf(&data, "tx_%d_mana_map_err", i); } } @@ -245,7 +226,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev) static u32 mana_rss_indir_size(struct net_device *ndev) { - return MANA_INDIRECT_TABLE_SIZE; + struct mana_port_context *apc = netdev_priv(ndev); + + return apc->indir_table_sz; } static int mana_get_rxfh(struct net_device *ndev, @@ -257,7 +240,7 @@ static int mana_get_rxfh(struct net_device *ndev, rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) rxfh->indir[i] = apc->indir_table[i]; } @@ -273,8 +256,8 @@ static int mana_set_rxfh(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); bool update_hash = false, update_table = false; - u32 save_table[MANA_INDIRECT_TABLE_SIZE]; u8 save_key[MANA_HASH_KEY_SIZE]; + u32 *save_table; int i, err; if (!apc->port_is_up) @@ -284,13 +267,19 @@ static int mana_set_rxfh(struct net_device *ndev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!save_table) + return -ENOMEM; + if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) - if (rxfh->indir[i] >= apc->num_queues) - return -EINVAL; + for (i = 0; i < apc->indir_table_sz; i++) + if (rxfh->indir[i] >= apc->num_queues) { + err = -EINVAL; + goto cleanup; + } update_table = true; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { save_table[i] = apc->indir_table[i]; apc->indir_table[i] = rxfh->indir[i]; } @@ -306,7 +295,7 @@ static int mana_set_rxfh(struct net_device *ndev, if (err) { /* recover to original values */ if (update_table) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = save_table[i]; } @@ -316,6 +305,9 @@ static int mana_set_rxfh(struct net_device *ndev, mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); } +cleanup: + kfree(save_table); + return err; } @@ -334,30 +326,113 @@ static int mana_set_channels(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int new_count = channels->combined_count; unsigned int old_count = apc->num_queues; - int err, err2; + int err; + + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, new_count); + if (err) { + netdev_err(ndev, "Insufficient memory for new allocations"); + return err; + } err = mana_detach(ndev, false); if (err) { netdev_err(ndev, "mana_detach failed: %d\n", err); - return err; + goto out; } apc->num_queues = new_count; err = mana_attach(ndev); - if (!err) - return 0; + if (err) { + apc->num_queues = old_count; + netdev_err(ndev, "mana_attach failed: %d\n", err); + } + +out: + mana_pre_dealloc_rxbufs(apc); + return err; +} + +static void mana_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + ring->rx_pending = apc->rx_queue_size; + ring->tx_pending = apc->tx_queue_size; + ring->rx_max_pending = MAX_RX_BUFFERS_PER_QUEUE; + ring->tx_max_pending = MAX_TX_BUFFERS_PER_QUEUE; +} + +static int mana_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 new_tx, new_rx; + u32 old_tx, old_rx; + int err; - netdev_err(ndev, "mana_attach failed: %d\n", err); + old_tx = apc->tx_queue_size; + old_rx = apc->rx_queue_size; - /* Try to roll it back to the old configuration. */ - apc->num_queues = old_count; - err2 = mana_attach(ndev); - if (err2) - netdev_err(ndev, "mana re-attach failed: %d\n", err2); + if (ring->tx_pending < MIN_TX_BUFFERS_PER_QUEUE) { + NL_SET_ERR_MSG_FMT(extack, "tx:%d less than the min:%d", ring->tx_pending, + MIN_TX_BUFFERS_PER_QUEUE); + return -EINVAL; + } + + if (ring->rx_pending < MIN_RX_BUFFERS_PER_QUEUE) { + NL_SET_ERR_MSG_FMT(extack, "rx:%d less than the min:%d", ring->rx_pending, + MIN_RX_BUFFERS_PER_QUEUE); + return -EINVAL; + } + + new_rx = roundup_pow_of_two(ring->rx_pending); + new_tx = roundup_pow_of_two(ring->tx_pending); + netdev_info(ndev, "Using nearest power of 2 values for Txq:%d Rxq:%d\n", + new_tx, new_rx); + + /* pre-allocating new buffers to prevent failures in mana_attach() later */ + apc->rx_queue_size = new_rx; + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); + apc->rx_queue_size = old_rx; + if (err) { + netdev_err(ndev, "Insufficient memory for new allocations\n"); + return err; + } + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + goto out; + } + apc->tx_queue_size = new_tx; + apc->rx_queue_size = new_rx; + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, "mana_attach failed: %d\n", err); + apc->tx_queue_size = old_tx; + apc->rx_queue_size = old_rx; + } +out: + mana_pre_dealloc_rxbufs(apc); return err; } +static int mana_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_OTHER; + + return 0; +} + const struct ethtool_ops mana_ethtool_ops = { .get_ethtool_stats = mana_get_ethtool_stats, .get_sset_count = mana_get_sset_count, @@ -369,4 +444,8 @@ const struct ethtool_ops mana_ethtool_ops = { .set_rxfh = mana_set_rxfh, .get_channels = mana_get_channels, .set_channels = mana_set_channels, + .get_ringparam = mana_get_ringparam, + .set_ringparam = mana_set_ringparam, + .get_link_ksettings = mana_get_link_ksettings, + .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 5553af9c8085..0f1679ebad96 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -6,6 +6,7 @@ #include <linux/io.h> #include <linux/mm.h> +#include <net/mana/gdma.h> #include <net/mana/shm_channel.h> #define PAGE_FRAME_L48_WIDTH_BYTES 6 @@ -155,8 +156,8 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, return err; } - if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || - !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + if (!MANA_PAGE_ALIGNED(eq_addr) || !MANA_PAGE_ALIGNED(cq_addr) || + !MANA_PAGE_ALIGNED(rq_addr) || !MANA_PAGE_ALIGNED(sq_addr)) return -EINVAL; if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) @@ -183,7 +184,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* EQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(eq_addr); + frame_addr = MANA_PFN(eq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -191,7 +192,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* CQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(cq_addr); + frame_addr = MANA_PFN(cq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -199,7 +200,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* RQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(rq_addr); + frame_addr = MANA_PFN(rq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -207,7 +208,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* SQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(sq_addr); + frame_addr = MANA_PFN(sq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); |