diff options
Diffstat (limited to 'drivers/net/ethernet/microsoft/mana/mana_en.c')
-rw-r--r-- | drivers/net/ethernet/microsoft/mana/mana_en.c | 581 |
1 files changed, 458 insertions, 123 deletions
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d8af5e7e15b4..ccd2885c939e 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3,6 +3,7 @@ #include <uapi/linux/bpf.h> +#include <linux/debugfs.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -12,6 +13,7 @@ #include <net/checksum.h> #include <net/ip6_checksum.h> +#include <net/netdev_lock.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> @@ -30,16 +32,33 @@ static void mana_adev_idx_free(int idx) ida_free(&mana_adev_ida, idx); } +static ssize_t mana_dbg_q_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct gdma_queue *gdma_q = filp->private_data; + + return simple_read_from_buffer(buf, count, pos, gdma_q->queue_mem_ptr, + gdma_q->queue_size); +} + +static const struct file_operations mana_dbg_q_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mana_dbg_q_read, +}; + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); int err; - err = mana_alloc_queues(ndev); - if (err) + + if (err) { + netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); return err; + } apc->port_is_up = true; @@ -48,7 +67,7 @@ static int mana_open(struct net_device *ndev) netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); - + netdev_dbg(ndev, "%s successful\n", __func__); return 0; } @@ -160,6 +179,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return 0; frag_err: + if (net_ratelimit()) + netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", + skb->len); for (i = sg_i - 1; i >= hsg; i--) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); @@ -240,6 +262,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (skb_cow_head(skb, MANA_HEADROOM)) goto tx_drop_count; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto tx_drop_count; + txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; @@ -481,7 +506,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, struct sock *sk = skb->sk; int txq; - txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; if (txq != old_q && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -511,7 +536,7 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, } /* Release pre-allocated RX buffers */ -static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) +void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) { struct device *dev; int i; @@ -599,12 +624,16 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } -static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) +int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) { struct device *dev; struct page *page; @@ -618,7 +647,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) dev = mpc->ac->gdma_dev->gdma_context->dev; - num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE; + num_rxb = num_queues * mpc->rx_queue_size; WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); @@ -632,30 +661,16 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) mpc->rxbpre_total = 0; for (i = 0; i < num_rxb; i++) { - if (mpc->rxbpre_alloc_size > PAGE_SIZE) { - va = netdev_alloc_frag(mpc->rxbpre_alloc_size); - if (!va) - goto error; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < - get_order(mpc->rxbpre_alloc_size)) { - put_page(page); - goto error; - } - } else { - page = dev_alloc_page(); - if (!page) - goto error; + page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); + if (!page) + goto error; - va = page_to_virt(page); - } + va = page_to_virt(page); da = dma_map_single(dev, va + mpc->rxbpre_headroom, mpc->rxbpre_datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { - put_page(virt_to_head_page(va)); + put_page(page); goto error; } @@ -667,6 +682,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) return 0; error: + netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); mana_pre_dealloc_rxbufs(mpc); return -ENOMEM; } @@ -678,7 +694,7 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) int err; /* Pre-allocate buffers to prevent failure in mana_attach later */ - err = mana_pre_alloc_rxbufs(mpc, new_mtu); + err = mana_pre_alloc_rxbufs(mpc, new_mtu, mpc->num_queues); if (err) { netdev_err(ndev, "Insufficient memory for new MTU\n"); return err; @@ -690,12 +706,12 @@ static int mana_change_mtu(struct net_device *ndev, int new_mtu) goto out; } - ndev->mtu = new_mtu; + WRITE_ONCE(ndev->mtu, new_mtu); err = mana_attach(ndev); if (err) { netdev_err(ndev, "mana_attach failed: %d\n", err); - ndev->mtu = old_mtu; + WRITE_ONCE(ndev->mtu, old_mtu); } out: @@ -717,10 +733,23 @@ static const struct net_device_ops mana_devops = { static void mana_cleanup_port_context(struct mana_port_context *apc) { + /* + * make sure subsequent cleanup attempts don't end up removing already + * cleaned dentry pointer + */ + debugfs_remove(apc->mana_port_debugfs); + apc->mana_port_debugfs = NULL; kfree(apc->rxqs); apc->rxqs = NULL; } +static void mana_cleanup_indir_table(struct mana_port_context *apc) +{ + apc->indir_table_sz = 0; + kfree(apc->indir_table); + kfree(apc->rxobj_table); +} + static int mana_init_port_context(struct mana_port_context *apc) { apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), @@ -892,7 +921,7 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, - u16 *max_num_vports) + u16 *max_num_vports, u8 *bm_hostmode) { struct gdma_context *gc = ac->gdma_dev->gdma_context; struct mana_query_device_cfg_resp resp = {}; @@ -903,7 +932,7 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; req.proto_major_ver = proto_major_ver; req.proto_minor_ver = proto_minor_ver; @@ -927,11 +956,18 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, *max_num_vports = resp.max_num_vports; - if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) gc->adapter_mtu = resp.adapter_mtu; else gc->adapter_mtu = ETH_FRAME_LEN; + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) + *bm_hostmode = resp.bm_hostmode; + else + *bm_hostmode = 0; + + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); + return 0; } @@ -962,7 +998,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, *max_sq = resp.max_num_sq; *max_rq = resp.max_num_rq; - *num_indir_entry = resp.num_indirection_ent; + if (resp.num_indirection_ent > 0 && + resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && + is_power_of_2(resp.num_indirection_ent)) { + *num_indir_entry = resp.num_indirection_ent; + } else { + netdev_warn(apc->ndev, + "Setting indirection table size to default %d for vPort %d\n", + MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); + *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; + } apc->port_handle = resp.vport; ether_addr_copy(apc->mac_addr, resp.mac_addr); @@ -977,7 +1022,7 @@ void mana_uncfg_vport(struct mana_port_context *apc) WARN_ON(apc->vport_use_count < 0); mutex_unlock(&apc->vport_mutex); } -EXPORT_SYMBOL_NS(mana_uncfg_vport, NET_MANA); +EXPORT_SYMBOL_NS(mana_uncfg_vport, "NET_MANA"); int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, u32 doorbell_pg_id) @@ -1047,22 +1092,20 @@ out: return err; } -EXPORT_SYMBOL_NS(mana_cfg_vport, NET_MANA); +EXPORT_SYMBOL_NS(mana_cfg_vport, "NET_MANA"); static int mana_cfg_vport_steering(struct mana_port_context *apc, enum TRI_STATE rx, bool update_default_rxobj, bool update_key, bool update_tab) { - u16 num_entries = MANA_INDIRECT_TABLE_SIZE; struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; - mana_handle_t *req_indir_tab; u32 req_buf_size; int err; - req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1073,8 +1116,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->hdr.req.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; - req->num_indir_entries = num_entries; - req->indir_tab_offset = sizeof(*req); + req->num_indir_entries = apc->indir_table_sz; + req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, + indir_tab); req->rx_enable = rx; req->rss_enable = apc->rss_state; req->update_default_rxobj = update_default_rxobj; @@ -1086,11 +1130,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); - if (update_tab) { - req_indir_tab = (mana_handle_t *)(req + 1); - memcpy(req_indir_tab, apc->rxobj_table, - req->num_indir_entries * sizeof(mana_handle_t)); - } + if (update_tab) + memcpy(req->indir_tab, apc->rxobj_table, + flex_array_size(req, indir_tab, req->num_indir_entries)); err = mana_send_request(apc->ac, req, req_buf_size, &resp, sizeof(resp)); @@ -1113,7 +1155,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, } netdev_info(ndev, "Configured steering vPort %llu entries %u\n", - apc->port_handle, num_entries); + apc->port_handle, apc->indir_table_sz); out: kfree(req); return err; @@ -1172,7 +1214,7 @@ int mana_create_wq_obj(struct mana_port_context *apc, out: return err; } -EXPORT_SYMBOL_NS(mana_create_wq_obj, NET_MANA); +EXPORT_SYMBOL_NS(mana_create_wq_obj, "NET_MANA"); void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, mana_handle_t wq_obj) @@ -1200,7 +1242,7 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, resp.hdr.status); } -EXPORT_SYMBOL_NS(mana_destroy_wq_obj, NET_MANA); +EXPORT_SYMBOL_NS(mana_destroy_wq_obj, "NET_MANA"); static void mana_destroy_eq(struct mana_context *ac) { @@ -1211,6 +1253,9 @@ static void mana_destroy_eq(struct mana_context *ac) if (!ac->eqs) return; + debugfs_remove_recursive(ac->mana_eqs_debugfs); + ac->mana_eqs_debugfs = NULL; + for (i = 0; i < gc->max_num_queues; i++) { eq = ac->eqs[i].eq; if (!eq) @@ -1223,6 +1268,18 @@ static void mana_destroy_eq(struct mana_context *ac) ac->eqs = NULL; } +static void mana_create_eq_debugfs(struct mana_context *ac, int i) +{ + struct mana_eq eq = ac->eqs[i]; + char eqnum[32]; + + sprintf(eqnum, "eq%d", i); + eq.mana_eq_debugfs = debugfs_create_dir(eqnum, ac->mana_eqs_debugfs); + debugfs_create_u32("head", 0400, eq.mana_eq_debugfs, &eq.eq->head); + debugfs_create_u32("tail", 0400, eq.mana_eq_debugfs, &eq.eq->tail); + debugfs_create_file("eq_dump", 0400, eq.mana_eq_debugfs, eq.eq, &mana_dbg_q_fops); +} + static int mana_create_eq(struct mana_context *ac) { struct gdma_dev *gd = ac->gdma_dev; @@ -1243,11 +1300,16 @@ static int mana_create_eq(struct mana_context *ac) spec.eq.context = ac->eqs; spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + ac->mana_eqs_debugfs = debugfs_create_dir("EQs", gc->mana_pci_debugfs); + for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); - if (err) + if (err) { + dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); goto out; + } + mana_create_eq_debugfs(ac, i); } return 0; @@ -1488,8 +1550,12 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, return NULL; if (xdp->data_hard_start) { + u32 metasize = xdp->data - xdp->data_meta; + skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); return skb; } @@ -1601,7 +1667,7 @@ drop: } static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, - dma_addr_t *da, bool *from_pool, bool is_napi) + dma_addr_t *da, bool *from_pool) { struct page *page; void *va; @@ -1612,21 +1678,6 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, if (rxq->xdp_save_va) { va = rxq->xdp_save_va; rxq->xdp_save_va = NULL; - } else if (rxq->alloc_size > PAGE_SIZE) { - if (is_napi) - va = napi_alloc_frag(rxq->alloc_size); - else - va = netdev_alloc_frag(rxq->alloc_size); - - if (!va) - return NULL; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < get_order(rxq->alloc_size)) { - put_page(page); - return NULL; - } } else { page = page_pool_dev_alloc_pages(rxq->page_pool); if (!page) @@ -1659,7 +1710,7 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, dma_addr_t da; void *va; - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; @@ -1775,7 +1826,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1786,16 +1836,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } @@ -1848,11 +1905,16 @@ static void mana_destroy_txq(struct mana_port_context *apc) return; for (i = 0; i < apc->num_queues; i++) { - napi = &apc->tx_qp[i].tx_cq.napi; - napi_synchronize(napi); - napi_disable(napi); - netif_napi_del(napi); + debugfs_remove_recursive(apc->tx_qp[i].mana_tx_debugfs); + apc->tx_qp[i].mana_tx_debugfs = NULL; + napi = &apc->tx_qp[i].tx_cq.napi; + if (apc->tx_qp[i].txq.napi_initialized) { + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + apc->tx_qp[i].txq.napi_initialized = false; + } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1864,6 +1926,31 @@ static void mana_destroy_txq(struct mana_port_context *apc) apc->tx_qp = NULL; } +static void mana_create_txq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_tx_qp *tx_qp = &apc->tx_qp[idx]; + char qnum[32]; + + sprintf(qnum, "TX-%d", idx); + tx_qp->mana_tx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("sq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->head); + debugfs_create_u32("sq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.gdma_sq->tail); + debugfs_create_u32("sq_pend_skb_qlen", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->txq.pending_skbs.qlen); + debugfs_create_u32("cq_head", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, tx_qp->mana_tx_debugfs, + &tx_qp->tx_cq.budget); + debugfs_create_file("txq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->txq.gdma_sq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, tx_qp->mana_tx_debugfs, + tx_qp->tx_cq.gdma_cq, &mana_dbg_q_fops); +} + static int mana_create_txq(struct mana_port_context *apc, struct net_device *net) { @@ -1886,15 +1973,17 @@ static int mana_create_txq(struct mana_port_context *apc, return -ENOMEM; /* The minimum size of the WQE is 32 bytes, hence - * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs + * apc->tx_queue_size represents the maximum number of WQEs * the SQ can store. This value is then used to size other queues * to prevent overflow. + * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED, + * as min val of apc->tx_queue_size is 128 and that would make + * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size + * are always power of two */ - txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; - BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + txq_size = apc->tx_queue_size * 32; - cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; - cq_size = PAGE_ALIGN(cq_size); + cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE; gc = gd->gdma_context; @@ -1908,6 +1997,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; + txq->napi_initialized = false; skb_queue_head_init(&txq->pending_skbs); memset(&spec, 0, sizeof(spec)); @@ -1972,20 +2062,25 @@ static int mana_create_txq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; + mana_create_txq_debugfs(apc, i); + netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); + txq->napi_initialized = true; mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } return 0; out: + netdev_err(net, "Failed to create %d TX queues, %d\n", + apc->num_queues, err); mana_destroy_txq(apc); return err; } static void mana_destroy_rxq(struct mana_port_context *apc, - struct mana_rxq *rxq, bool validate_state) + struct mana_rxq *rxq, bool napi_initialized) { struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; @@ -1998,17 +2093,20 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rxq) return; + debugfs_remove_recursive(rxq->mana_rx_debugfs); + rxq->mana_rx_debugfs = NULL; + napi = &rxq->rx_cq.napi; - if (validate_state) + if (napi_initialized) { napi_synchronize(napi); - napi_disable(napi); + napi_disable(napi); + netif_napi_del(napi); + } xdp_rxq_info_unreg(&rxq->xdp_rxq); - netif_napi_del(napi); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); @@ -2054,7 +2152,7 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, if (mpc->rxbufs_pre) va = mana_get_rxbuf_pre(rxq, &da); else - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return -ENOMEM; @@ -2132,13 +2230,15 @@ static int mana_push_wqe(struct mana_rxq *rxq) static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) { + struct mana_port_context *mpc = netdev_priv(rxq->ndev); struct page_pool_params pprm = {}; int ret; - pprm.pool_size = RX_BUFFERS_PER_QUEUE; + pprm.pool_size = mpc->rx_queue_size; pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; + pprm.order = get_order(rxq->alloc_size); rxq->page_pool = page_pool_create(&pprm); @@ -2167,13 +2267,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc = gd->gdma_context; - rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), + rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size), GFP_KERNEL); if (!rxq) return NULL; rxq->ndev = ndev; - rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->num_rx_buf = apc->rx_queue_size; rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; @@ -2191,8 +2291,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - rq_size = PAGE_ALIGN(rq_size); - cq_size = PAGE_ALIGN(cq_size); + rq_size = MANA_PAGE_ALIGN(rq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); /* Create RQ */ memset(&spec, 0, sizeof(spec)); @@ -2278,6 +2378,28 @@ out: return NULL; } +static void mana_create_rxq_debugfs(struct mana_port_context *apc, int idx) +{ + struct mana_rxq *rxq; + char qnum[32]; + + rxq = apc->rxqs[idx]; + + sprintf(qnum, "RX-%d", idx); + rxq->mana_rx_debugfs = debugfs_create_dir(qnum, apc->mana_port_debugfs); + debugfs_create_u32("rq_head", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->head); + debugfs_create_u32("rq_tail", 0400, rxq->mana_rx_debugfs, &rxq->gdma_rq->tail); + debugfs_create_u32("rq_nbuf", 0400, rxq->mana_rx_debugfs, &rxq->num_rx_buf); + debugfs_create_u32("cq_head", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->head); + debugfs_create_u32("cq_tail", 0400, rxq->mana_rx_debugfs, + &rxq->rx_cq.gdma_cq->tail); + debugfs_create_u32("cq_budget", 0400, rxq->mana_rx_debugfs, &rxq->rx_cq.budget); + debugfs_create_file("rxq_dump", 0400, rxq->mana_rx_debugfs, rxq->gdma_rq, &mana_dbg_q_fops); + debugfs_create_file("cq_dump", 0400, rxq->mana_rx_debugfs, rxq->rx_cq.gdma_cq, + &mana_dbg_q_fops); +} + static int mana_add_rx_queues(struct mana_port_context *apc, struct net_device *ndev) { @@ -2290,12 +2412,15 @@ static int mana_add_rx_queues(struct mana_port_context *apc, rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); if (!rxq) { err = -ENOMEM; + netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); goto out; } u64_stats_init(&rxq->stats.syncp); apc->rxqs[i] = rxq; + + mana_create_rxq_debugfs(apc, i); } apc->default_rxobj = apc->rxqs[0]->rxobj; @@ -2321,7 +2446,7 @@ static void mana_destroy_vport(struct mana_port_context *apc) mana_destroy_txq(apc); mana_uncfg_vport(apc); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_hw_vport(apc); } @@ -2333,7 +2458,7 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_hw_vport(apc); if (err) return err; @@ -2346,11 +2471,33 @@ static int mana_create_vport(struct mana_port_context *apc, return mana_create_txq(apc, net); } +static int mana_rss_table_alloc(struct mana_port_context *apc) +{ + if (!apc->indir_table_sz) { + netdev_err(apc->ndev, + "Indirection table size not set for vPort %d\n", + apc->port_idx); + return -EINVAL; + } + + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!apc->indir_table) + return -ENOMEM; + + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); + if (!apc->rxobj_table) { + kfree(apc->indir_table); + return -ENOMEM; + } + + return 0; +} + static void mana_rss_table_init(struct mana_port_context *apc) { int i; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = ethtool_rxfh_indir_default(i, apc->num_queues); } @@ -2363,7 +2510,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, int i; if (update_tab) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { queue_idx = apc->indir_table[i]; apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; } @@ -2387,6 +2534,7 @@ void mana_query_gf_stats(struct mana_port_context *apc) mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, sizeof(req), sizeof(resp)); + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; req.req_stats = STATISTICS_FLAGS_RX_DISCARDS_NO_WQE | STATISTICS_FLAGS_RX_ERRORS_VPORT_DISABLED | STATISTICS_FLAGS_HC_RX_BYTES | @@ -2466,17 +2614,21 @@ void mana_query_gf_stats(struct mana_port_context *apc) static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; - u32 num_indirect_entries; + struct gdma_context *gc; + char vport[32]; int err; err = mana_init_port_context(apc); if (err) return err; + gc = gd->gdma_context; + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, - &num_indirect_entries); + &apc->indir_table_sz); if (err) { netdev_err(ndev, "Failed to query info for vPort %d\n", port_idx); @@ -2491,12 +2643,12 @@ static int mana_init_port(struct net_device *ndev) apc->num_queues = apc->max_queues; eth_hw_addr_set(ndev, apc->mac_addr); - + sprintf(vport, "vport%d", port_idx); + apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); return 0; reset_apc: - kfree(apc->rxqs); - apc->rxqs = NULL; + mana_cleanup_port_context(apc); return err; } @@ -2507,12 +2659,18 @@ int mana_alloc_queues(struct net_device *ndev) int err; err = mana_create_vport(apc, ndev); - if (err) + if (err) { + netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); return err; + } err = netif_set_real_num_tx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } err = mana_add_rx_queues(apc, ndev); if (err) @@ -2521,16 +2679,22 @@ int mana_alloc_queues(struct net_device *ndev) apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; err = netif_set_real_num_rx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } mana_rss_table_init(apc); err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); - if (err) + if (err) { + netdev_err(ndev, "Failed to configure RSS table: %d\n", err); goto destroy_vport; + } - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_filter(apc); if (err) goto destroy_vport; @@ -2592,7 +2756,7 @@ static int mana_dealloc_queues(struct net_device *ndev) mana_chn_setxdp(apc, NULL); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); /* No packet can be transmitted now since apc->port_is_up is false. @@ -2669,8 +2833,10 @@ int mana_detach(struct net_device *ndev, bool from_close) if (apc->port_st_save) { err = mana_dealloc_queues(ndev); - if (err) + if (err) { + netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); return err; + } } if (!from_close) { @@ -2701,6 +2867,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->ndev = ndev; apc->max_queues = gc->max_num_queues; apc->num_queues = gc->max_num_queues; + apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; + apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; apc->port_handle = INVALID_MANA_HANDLE; apc->pf_filter_handle = INVALID_MANA_HANDLE; apc->port_idx = port_idx; @@ -2717,6 +2885,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->dev_port = port_idx; SET_NETDEV_DEV(ndev, gc->dev); + netif_set_tso_max_size(ndev, GSO_MAX_SIZE); + netif_carrier_off(ndev); netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); @@ -2725,6 +2895,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, if (err) goto free_net; + err = mana_rss_table_alloc(apc); + if (err) + goto reset_apc; + netdev_lockdep_set_classes(ndev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -2741,14 +2915,15 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, err = register_netdev(ndev); if (err) { netdev_err(ndev, "Unable to register netdev.\n"); - goto reset_apc; + goto free_indir; } return 0; +free_indir: + mana_cleanup_indir_table(apc); reset_apc: - kfree(apc->rxqs); - apc->rxqs = NULL; + mana_cleanup_port_context(apc); free_net: *ndev_storage = NULL; netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); @@ -2775,7 +2950,7 @@ static void remove_adev(struct gdma_dev *gd) gd->adev = NULL; } -static int add_adev(struct gdma_dev *gd) +static int add_adev(struct gdma_dev *gd, const char *name) { struct auxiliary_device *adev; struct mana_adev *madev; @@ -2791,7 +2966,7 @@ static int add_adev(struct gdma_dev *gd) goto idx_fail; adev->id = ret; - adev->name = "rdma"; + adev->name = name; adev->dev.parent = gd->gdma_context->dev; adev->dev.release = adev_release; madev->mdev = gd; @@ -2800,11 +2975,15 @@ static int add_adev(struct gdma_dev *gd) if (ret) goto init_fail; + /* madev is owned by the auxiliary device */ + madev = NULL; ret = auxiliary_device_add(adev); if (ret) goto add_fail; gd->adev = adev; + dev_dbg(gd->gdma_context->dev, + "Auxiliary device added successfully\n"); return 0; add_fail: @@ -2819,11 +2998,76 @@ idx_fail: return ret; } +static void mana_rdma_service_handle(struct work_struct *work) +{ + struct mana_service_work *serv_work = + container_of(work, struct mana_service_work, work); + struct gdma_dev *gd = serv_work->gdma_dev; + struct device *dev = gd->gdma_context->dev; + int ret; + + if (READ_ONCE(gd->rdma_teardown)) + goto out; + + switch (serv_work->event) { + case GDMA_SERVICE_TYPE_RDMA_SUSPEND: + if (!gd->adev || gd->is_suspended) + break; + + remove_adev(gd); + gd->is_suspended = true; + break; + + case GDMA_SERVICE_TYPE_RDMA_RESUME: + if (!gd->is_suspended) + break; + + ret = add_adev(gd, "rdma"); + if (ret) + dev_err(dev, "Failed to add adev on resume: %d\n", ret); + else + gd->is_suspended = false; + break; + + default: + dev_warn(dev, "unknown adev service event %u\n", + serv_work->event); + break; + } + +out: + kfree(serv_work); +} + +int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event) +{ + struct gdma_dev *gd = &gc->mana_ib; + struct mana_service_work *serv_work; + + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { + /* RDMA device is not detected on pci */ + return 0; + } + + serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC); + if (!serv_work) + return -ENOMEM; + + serv_work->event = event; + serv_work->gdma_dev = gd; + + INIT_WORK(&serv_work->work, mana_rdma_service_handle); + queue_work(gc->service_wq, &serv_work->work); + + return 0; +} + int mana_probe(struct gdma_dev *gd, bool resuming) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; struct device *dev = gc->dev; + u8 bm_hostmode = 0; u16 num_ports = 0; int err; int i; @@ -2846,14 +3090,18 @@ int mana_probe(struct gdma_dev *gd, bool resuming) } err = mana_create_eq(ac); - if (err) + if (err) { + dev_err(dev, "Failed to create EQs: %d\n", err); goto out; + } err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, - MANA_MICRO_VERSION, &num_ports); + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); if (err) goto out; + ac->bm_hostmode = bm_hostmode; + if (!resuming) { ac->num_ports = num_ports; } else { @@ -2874,23 +3122,43 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); - if (err) + /* we log the port for which the probe failed and stop + * probes for subsequent ports. + * Note that we keep running ports, for which the probes + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Probe Failed for port %d\n", i); break; + } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); - if (err) + /* we log the port for which the attach failed and stop + * attach for subsequent ports + * Note that we keep running ports, for which the attach + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Attach Failed for port %d\n", i); break; + } } } - err = add_adev(gd); + err = add_adev(gd, "eth"); out: - if (err) + if (err) { mana_remove(gd, false); + } else { + dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", + gd, gd->dev_id.as_uint32, ac->num_ports, + gd->dev_id.type, gd->dev_id.instance); + dev_dbg(dev, "%s succeeded\n", __func__); + } return err; } @@ -2899,6 +3167,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; + struct mana_port_context *apc; struct device *dev = gc->dev; struct net_device *ndev; int err; @@ -2910,6 +3179,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) for (i = 0; i < ac->num_ports; i++) { ndev = ac->ports[i]; + apc = netdev_priv(ndev); if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); @@ -2933,6 +3203,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) } unregister_netdevice(ndev); + mana_cleanup_indir_table(apc); rtnl_unlock(); @@ -2949,4 +3220,68 @@ out: gd->driver_data = NULL; gd->gdma_context = NULL; kfree(ac); + dev_dbg(dev, "%s succeeded\n", __func__); +} + +int mana_rdma_probe(struct gdma_dev *gd) +{ + int err = 0; + + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { + /* RDMA device is not detected on pci */ + return err; + } + + err = mana_gd_register_device(gd); + if (err) + return err; + + err = add_adev(gd, "rdma"); + if (err) + mana_gd_deregister_device(gd); + + return err; +} + +void mana_rdma_remove(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) { + /* RDMA device is not detected on pci */ + return; + } + + WRITE_ONCE(gd->rdma_teardown, true); + flush_workqueue(gc->service_wq); + + if (gd->adev) + remove_adev(gd); + + mana_gd_deregister_device(gd); +} + +struct net_device *mana_get_primary_netdev(struct mana_context *ac, + u32 port_index, + netdevice_tracker *tracker) +{ + struct net_device *ndev; + + if (port_index >= ac->num_ports) + return NULL; + + rcu_read_lock(); + + /* If mana is used in netvsc, the upper netdevice should be returned. */ + ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); + + /* If there is no upper device, use the parent Ethernet device */ + if (!ndev) + ndev = ac->ports[port_index]; + + netdev_hold(ndev, tracker, GFP_ATOMIC); + rcu_read_unlock(); + + return ndev; } +EXPORT_SYMBOL_NS(mana_get_primary_netdev, "NET_MANA"); |