diff options
Diffstat (limited to 'drivers/net/ethernet/cavium/thunder/nicvf_main.c')
| -rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_main.c | 533 |
1 files changed, 441 insertions, 92 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 49b80da51ba7..0b6e30a8feb0 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2015 Cavium, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. */ #include <linux/module.h> @@ -20,15 +17,25 @@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/filter.h> +#include <linux/net_tstamp.h> +#include <linux/workqueue.h> #include "nic_reg.h" #include "nic.h" #include "nicvf_queues.h" #include "thunder_bgx.h" +#include "../common/cavium_ptp.h" -#define DRV_NAME "thunder-nicvf" +#define DRV_NAME "nicvf" #define DRV_VERSION "1.0" +/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs + * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed + * this value, keeping headroom for the 14 byte Ethernet header and two + * VLAN tags (for QinQ) + */ +#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) + /* Supported devices */ static const struct pci_device_id nicvf_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, @@ -61,7 +68,7 @@ module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Debug message level bitmap"); static int cpi_alg = CPI_ALG_NONE; -module_param(cpi_alg, int, S_IRUGO); +module_param(cpi_alg, int, 0444); MODULE_PARM_DESC(cpi_alg, "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); @@ -119,34 +126,39 @@ static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) { - int timeout = NIC_MBOX_MSG_TIMEOUT; - int sleep = 10; + unsigned long timeout; + int ret = 0; + + mutex_lock(&nic->rx_mode_mtx); nic->pf_acked = false; nic->pf_nacked = false; nicvf_write_to_mbx(nic, mbx); + timeout = jiffies + msecs_to_jiffies(NIC_MBOX_MSG_TIMEOUT); /* Wait for previous message to be acked, timeout 2sec */ while (!nic->pf_acked) { if (nic->pf_nacked) { netdev_err(nic->netdev, "PF NACK to mbox msg 0x%02x from VF%d\n", (mbx->msg.msg & 0xFF), nic->vf_id); - return -EINVAL; + ret = -EINVAL; + break; } - msleep(sleep); + usleep_range(8000, 10000); if (nic->pf_acked) break; - timeout -= sleep; - if (!timeout) { + if (time_after(jiffies, timeout)) { netdev_err(nic->netdev, "PF didn't ACK to mbox msg 0x%02x from VF%d\n", (mbx->msg.msg & 0xFF), nic->vf_id); - return -EBUSY; + ret = -EBUSY; + break; } } - return 0; + mutex_unlock(&nic->rx_mode_mtx); + return ret; } /* Checks if VF is able to comminicate with PF @@ -166,6 +178,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic) return 1; } +static void nicvf_send_cfg_done(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + if (nicvf_send_msg_to_pf(nic, &mbx)) { + netdev_err(nic->netdev, + "PF didn't respond to CFG DONE msg\n"); + } +} + static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) { if (bgx->rx) @@ -198,8 +221,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic) nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; nic->node = mbx.nic_cfg.node_id; if (!nic->set_mac_pending) - ether_addr_copy(nic->netdev->dev_addr, - mbx.nic_cfg.mac_addr); + eth_hw_addr_set(nic->netdev, mbx.nic_cfg.mac_addr); nic->sqs_mode = mbx.nic_cfg.sqs_mode; nic->loopback_supported = mbx.nic_cfg.loopback_supported; nic->link_up = false; @@ -222,21 +244,24 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic) break; case NIC_MBOX_MSG_BGX_LINK_CHANGE: nic->pf_acked = true; - nic->link_up = mbx.link_status.link_up; - nic->duplex = mbx.link_status.duplex; - nic->speed = mbx.link_status.speed; - nic->mac_type = mbx.link_status.mac_type; - if (nic->link_up) { - netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", - nic->speed, - nic->duplex == DUPLEX_FULL ? - "Full" : "Half"); - netif_carrier_on(nic->netdev); - netif_tx_start_all_queues(nic->netdev); - } else { - netdev_info(nic->netdev, "Link is Down\n"); - netif_carrier_off(nic->netdev); - netif_tx_stop_all_queues(nic->netdev); + if (nic->link_up != mbx.link_status.link_up) { + nic->link_up = mbx.link_status.link_up; + nic->duplex = mbx.link_status.duplex; + nic->speed = mbx.link_status.speed; + nic->mac_type = mbx.link_status.mac_type; + if (nic->link_up) { + netdev_info(nic->netdev, + "Link is Up %d Mbps %s duplex\n", + nic->speed, + nic->duplex == DUPLEX_FULL ? + "Full" : "Half"); + netif_carrier_on(nic->netdev); + netif_tx_start_all_queues(nic->netdev); + } else { + netdev_info(nic->netdev, "Link is Down\n"); + netif_carrier_off(nic->netdev); + netif_tx_stop_all_queues(nic->netdev); + } } break; case NIC_MBOX_MSG_ALLOC_SQS: @@ -502,8 +527,9 @@ static int nicvf_init_resources(struct nicvf *nic) static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, struct cqe_rx_t *cqe_rx, struct snd_queue *sq, - struct sk_buff **skb) + struct rcv_queue *rq, struct sk_buff **skb) { + unsigned char *hard_start, *data; struct xdp_buff xdp; struct page *page; u32 action; @@ -521,18 +547,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, cpu_addr = (u64)phys_to_virt(cpu_addr); page = virt_to_page((void *)cpu_addr); - xdp.data_hard_start = page_address(page); - xdp.data = (void *)cpu_addr; - xdp.data_end = xdp.data + len; + xdp_init_buff(&xdp, RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + &rq->xdp_rxq); + hard_start = page_address(page); + data = (unsigned char *)cpu_addr; + xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false); orig_data = xdp.data; - rcu_read_lock(); action = bpf_prog_run_xdp(prog, &xdp); - rcu_read_unlock(); + len = xdp.data_end - xdp.data; /* Check if XDP program has changed headers */ if (orig_data != xdp.data) { - len = xdp.data_end - xdp.data; offset = orig_data - xdp.data; dma_addr -= offset; } @@ -564,9 +590,11 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); return true; default: - bpf_warn_invalid_xdp_action(action); + bpf_warn_invalid_xdp_action(nic->netdev, prog, action); + fallthrough; case XDP_ABORTED: trace_xdp_exception(nic->netdev, prog, action); + fallthrough; case XDP_DROP: /* Check if it's a recycled page, if not * unmap the DMA mapping. @@ -586,6 +614,44 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, return false; } +static void nicvf_snd_ptp_handler(struct net_device *netdev, + struct cqe_send_t *cqe_tx) +{ + struct nicvf *nic = netdev_priv(netdev); + struct skb_shared_hwtstamps ts; + u64 ns; + + nic = nic->pnicvf; + + /* Sync for 'ptp_skb' */ + smp_rmb(); + + /* New timestamp request can be queued now */ + atomic_set(&nic->tx_ptp_skbs, 0); + + /* Check for timestamp requested skb */ + if (!nic->ptp_skb) + return; + + /* Check if timestamping is timedout, which is set to 10us */ + if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT || + cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT) + goto no_tstamp; + + /* Get the timestamp */ + memset(&ts, 0, sizeof(ts)); + ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp); + ts.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(nic->ptp_skb, &ts); + +no_tstamp: + /* Free the original skb */ + dev_kfree_skb_any(nic->ptp_skb); + nic->ptp_skb = NULL; + /* Sync 'ptp_skb' */ + smp_wmb(); +} + static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cqe_send_t *cqe_tx, int budget, int *subdesc_cnt, @@ -642,7 +708,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, prefetch(skb); (*tx_pkts)++; *tx_bytes += skb->len; - napi_consume_skb(skb, budget); + /* If timestamp is requested for this skb, don't free it */ + if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && + !nic->pnicvf->ptp_skb) + nic->pnicvf->ptp_skb = skb; + else + napi_consume_skb(skb, budget); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { /* In case of SW TSO on 88xx, only last segment will have @@ -681,9 +752,25 @@ static inline void nicvf_set_rxhash(struct net_device *netdev, skb_set_hash(skb, hash, hash_type); } +static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb) +{ + u64 ns; + + if (!nic->ptp_clock || !nic->hw_rx_tstamp) + return; + + /* The first 8 bytes is the timestamp */ + ns = cavium_ptp_tstamp2time(nic->ptp_clock, + be64_to_cpu(*(__be64 *)skb->data)); + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns); + + __skb_pull(skb, 8); +} + static void nicvf_rcv_pkt_handler(struct net_device *netdev, struct napi_struct *napi, - struct cqe_rx_t *cqe_rx, struct snd_queue *sq) + struct cqe_rx_t *cqe_rx, + struct snd_queue *sq, struct rcv_queue *rq) { struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); @@ -709,7 +796,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, /* For XDP, ignore pkts spanning multiple pages */ if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { /* Packet consumed by XDP */ - if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb)) + if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb)) return; } else { skb = nicvf_get_rcv_skb(snic, cqe_rx, @@ -731,6 +818,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, return; } + nicvf_set_rxtstamp(nic, skb); nicvf_set_rxhash(netdev, cqe_rx, skb); skb_record_rx_queue(skb, rq_idx); @@ -766,6 +854,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cqe_rx_t *cq_desc; struct netdev_queue *txq; struct snd_queue *sq = &qs->sq[cq_idx]; + struct rcv_queue *rq = &qs->rq[cq_idx]; unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; spin_lock_bh(&cq->lock); @@ -796,7 +885,7 @@ loop: switch (cq_desc->cqe_type) { case CQE_TYPE_RX: - nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq); + nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq); work_done++; break; case CQE_TYPE_SEND: @@ -805,10 +894,12 @@ loop: &tx_pkts, &tx_bytes); tx_done++; break; + case CQE_TYPE_SEND_PTP: + nicvf_snd_ptp_handler(netdev, (void *)cq_desc); + break; case CQE_TYPE_INVALID: case CQE_TYPE_RX_SPLIT: case CQE_TYPE_RX_TCP: - case CQE_TYPE_SEND_PTP: /* Ignore for now */ break; } @@ -891,9 +982,9 @@ static int nicvf_poll(struct napi_struct *napi, int budget) * * As of now only CQ errors are handled */ -static void nicvf_handle_qs_err(unsigned long data) +static void nicvf_handle_qs_err(struct tasklet_struct *t) { - struct nicvf *nic = (struct nicvf *)data; + struct nicvf *nic = from_tasklet(nic, t, qs_err_task); struct queue_set *qs = nic->qs; int qidx; u64 status; @@ -1132,7 +1223,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (ret < 0) { netdev_err(nic->netdev, "Req for #%d msix vectors failed\n", nic->num_vec); - return 1; + return ret; } sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); @@ -1151,7 +1242,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (!nicvf_check_pf_ready(nic)) { nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); nicvf_unregister_interrupts(nic); - return 1; + return -EIO; } return 0; @@ -1238,6 +1329,12 @@ int nicvf_stop(struct net_device *netdev) struct nicvf_cq_poll *cq_poll = NULL; union nic_mbx mbx = {}; + /* wait till all queued set_rx_mode tasks completes */ + if (nic->nicvf_rx_mode_wq) { + cancel_delayed_work_sync(&nic->link_change_work); + drain_workqueue(nic->nicvf_rx_mode_wq); + } + mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; nicvf_send_msg_to_pf(nic, &mbx); @@ -1304,12 +1401,28 @@ int nicvf_stop(struct net_device *netdev) nicvf_free_cq_poll(nic); + /* Free any pending SKB saved to receive timestamp */ + if (nic->ptp_skb) { + dev_kfree_skb_any(nic->ptp_skb); + nic->ptp_skb = NULL; + } + /* Clear multiqset info */ nic->pnicvf = nic; return 0; } +static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable) +{ + union nic_mbx mbx = {}; + + mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG; + mbx.ptp.enable = enable; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) { union nic_mbx mbx = {}; @@ -1321,13 +1434,28 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) return nicvf_send_msg_to_pf(nic, &mbx); } +static void nicvf_link_status_check_task(struct work_struct *work_arg) +{ + struct nicvf *nic = container_of(work_arg, + struct nicvf, + link_change_work.work); + union nic_mbx mbx = {}; + mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; + nicvf_send_msg_to_pf(nic, &mbx); + queue_delayed_work(nic->nicvf_rx_mode_wq, + &nic->link_change_work, 2 * HZ); +} + int nicvf_open(struct net_device *netdev) { int cpu, err, qidx; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; - union nic_mbx mbx = {}; + + /* wait till all queued set_rx_mode tasks completes if any */ + if (nic->nicvf_rx_mode_wq) + drain_workqueue(nic->nicvf_rx_mode_wq); netif_carrier_off(netdev); @@ -1344,8 +1472,7 @@ int nicvf_open(struct net_device *netdev) } cq_poll->cq_idx = qidx; cq_poll->nicvf = nic; - netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, - NAPI_POLL_WEIGHT); + netif_napi_add(netdev, &cq_poll->napi, nicvf_poll); napi_enable(&cq_poll->napi); nic->napi[qidx] = cq_poll; } @@ -1362,12 +1489,10 @@ int nicvf_open(struct net_device *netdev) } /* Init tasklet for handling Qset err interrupt */ - tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, - (unsigned long)nic); + tasklet_setup(&nic->qs_err_task, nicvf_handle_qs_err); /* Init RBDR tasklet which will refill RBDR */ - tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, - (unsigned long)nic); + tasklet_setup(&nic->rbdr_task, nicvf_rbdr_task); INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); /* Configure CPI alorithm */ @@ -1379,6 +1504,12 @@ int nicvf_open(struct net_device *netdev) if (nic->sqs_mode) nicvf_get_primary_vf_struct(nic); + /* Configure PTP timestamp */ + if (nic->ptp_clock) + nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); + atomic_set(&nic->tx_ptp_skbs, 0); + nic->ptp_skb = NULL; + /* Configure receive side scaling and MTU */ if (!nic->sqs_mode) { nicvf_rss_init(nic); @@ -1417,8 +1548,14 @@ int nicvf_open(struct net_device *netdev) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); /* Send VF config done msg to PF */ - mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; - nicvf_write_to_mbx(nic, &mbx); + nicvf_send_cfg_done(nic); + + if (nic->nicvf_rx_mode_wq) { + INIT_DELAYED_WORK(&nic->link_change_work, + nicvf_link_status_check_task); + queue_delayed_work(nic->nicvf_rx_mode_wq, + &nic->link_change_work, 0); + } return 0; cleanup: @@ -1441,18 +1578,21 @@ napi_del: static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); - int orig_mtu = netdev->mtu; - - netdev->mtu = new_mtu; - - if (!netif_running(netdev)) - return 0; - if (nicvf_update_hw_max_frs(nic, new_mtu)) { - netdev->mtu = orig_mtu; + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { + netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + netdev->mtu); return -EINVAL; } + if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu)) + return -EINVAL; + + WRITE_ONCE(netdev->mtu, new_mtu); + return 0; } @@ -1464,7 +1604,7 @@ static int nicvf_set_mac_address(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); if (nic->pdev->msix_enabled) { if (nicvf_hw_set_mac_addr(nic, netdev)) @@ -1589,7 +1729,7 @@ static void nicvf_get_stats64(struct net_device *netdev, } -static void nicvf_tx_timeout(struct net_device *dev) +static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct nicvf *nic = netdev_priv(dev); @@ -1689,9 +1829,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) bool if_up = netif_running(nic->netdev); struct bpf_prog *old_prog; bool bpf_attached = false; + int ret = 0; - /* For now just support only the usual MTU sized frames */ - if (prog && (dev->mtu > 1500)) { + /* For now just support only the usual MTU sized frames, + * plus some headroom for VLAN, QinQ. + */ + if (prog && dev->mtu > MAX_XDP_MTU) { netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", dev->mtu); return -EOPNOTSUPP; @@ -1721,9 +1864,8 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) if (nic->xdp_prog) { /* Attach BPF program */ - nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); - if (!IS_ERR(nic->xdp_prog)) - bpf_attached = true; + bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); + bpf_attached = true; } /* Calculate Tx queues needed for XDP and network stack */ @@ -1735,10 +1877,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) netif_trans_update(nic->netdev); } - return 0; + return ret; } -static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nicvf *nic = netdev_priv(netdev); @@ -1752,15 +1894,186 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return nicvf_xdp_setup(nic, xdp->prog); - case XDP_QUERY_PROG: - xdp->prog_attached = !!nic->xdp_prog; - xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; - return 0; default: return -EINVAL; } } +static int nicvf_hwtstamp_set(struct net_device *netdev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct nicvf *nic = netdev_priv(netdev); + + if (!nic->ptp_clock) { + NL_SET_ERR_MSG_MOD(extack, "HW timestamping is not supported"); + return -ENODEV; + } + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + nic->hw_rx_tstamp = false; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + nic->hw_rx_tstamp = true; + config->rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + if (netif_running(netdev)) + nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); + + return 0; +} + +static int nicvf_hwtstamp_get(struct net_device *netdev, + struct kernel_hwtstamp_config *config) +{ + struct nicvf *nic = netdev_priv(netdev); + + if (!nic->ptp_clock) + return -ENODEV; + + /* TX timestamping is technically always on */ + config->tx_type = HWTSTAMP_TX_ON; + config->rx_filter = nic->hw_rx_tstamp ? + HWTSTAMP_FILTER_ALL : + HWTSTAMP_FILTER_NONE; + + return 0; +} + +static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, + struct nicvf *nic) +{ + union nic_mbx mbx = {}; + int idx; + + /* From the inside of VM code flow we have only 128 bits memory + * available to send message to host's PF, so send all mc addrs + * one by one, starting from flush command in case if kernel + * requests to configure specific MAC filtering + */ + + /* flush DMAC filters and reset RX mode */ + mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST; + if (nicvf_send_msg_to_pf(nic, &mbx) < 0) + goto free_mc; + + if (mode & BGX_XCAST_MCAST_FILTER) { + /* once enabling filtering, we need to signal to PF to add + * its' own LMAC to the filter to accept packets for it. + */ + mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; + mbx.xcast.mac = 0; + if (nicvf_send_msg_to_pf(nic, &mbx) < 0) + goto free_mc; + } + + /* check if we have any specific MACs to be added to PF DMAC filter */ + if (mc_addrs) { + /* now go through kernel list of MACs and add them one by one */ + for (idx = 0; idx < mc_addrs->count; idx++) { + mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; + mbx.xcast.mac = mc_addrs->mc[idx]; + if (nicvf_send_msg_to_pf(nic, &mbx) < 0) + goto free_mc; + } + } + + /* and finally set rx mode for PF accordingly */ + mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST; + mbx.xcast.mode = mode; + + nicvf_send_msg_to_pf(nic, &mbx); +free_mc: + kfree(mc_addrs); +} + +static void nicvf_set_rx_mode_task(struct work_struct *work_arg) +{ + struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work, + work); + struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work); + u8 mode; + struct xcast_addr_list *mc; + + /* Save message data locally to prevent them from + * being overwritten by next ndo_set_rx_mode call(). + */ + spin_lock_bh(&nic->rx_mode_wq_lock); + mode = vf_work->mode; + mc = vf_work->mc; + vf_work->mc = NULL; + spin_unlock_bh(&nic->rx_mode_wq_lock); + + __nicvf_set_rx_mode_task(mode, mc, nic); +} + +static void nicvf_set_rx_mode(struct net_device *netdev) +{ + struct nicvf *nic = netdev_priv(netdev); + struct netdev_hw_addr *ha; + struct xcast_addr_list *mc_list = NULL; + u8 mode = 0; + + if (netdev->flags & IFF_PROMISC) { + mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT; + } else { + if (netdev->flags & IFF_BROADCAST) + mode |= BGX_XCAST_BCAST_ACCEPT; + + if (netdev->flags & IFF_ALLMULTI) { + mode |= BGX_XCAST_MCAST_ACCEPT; + } else if (netdev->flags & IFF_MULTICAST) { + mode |= BGX_XCAST_MCAST_FILTER; + /* here we need to copy mc addrs */ + if (netdev_mc_count(netdev)) { + mc_list = kmalloc(struct_size(mc_list, mc, + netdev_mc_count(netdev)), + GFP_ATOMIC); + if (unlikely(!mc_list)) + return; + mc_list->count = 0; + netdev_hw_addr_list_for_each(ha, &netdev->mc) { + mc_list->mc[mc_list->count] = + ether_addr_to_u64(ha->addr); + mc_list->count++; + } + } + } + } + spin_lock(&nic->rx_mode_wq_lock); + kfree(nic->rx_mode_work.mc); + nic->rx_mode_work.mc = mc_list; + nic->rx_mode_work.mode = mode; + queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work); + spin_unlock(&nic->rx_mode_wq_lock); +} + static const struct net_device_ops nicvf_netdev_ops = { .ndo_open = nicvf_open, .ndo_stop = nicvf_stop, @@ -1771,7 +2084,10 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, - .ndo_xdp = nicvf_xdp, + .ndo_bpf = nicvf_xdp, + .ndo_set_rx_mode = nicvf_set_rx_mode, + .ndo_hwtstamp_get = nicvf_hwtstamp_get, + .ndo_hwtstamp_set = nicvf_hwtstamp_set, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1781,31 +2097,33 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct nicvf *nic; int err, qcount; u16 sdevid; + struct cavium_ptp *ptp_clock; - err = pci_enable_device(pdev); - if (err) { - dev_err(dev, "Failed to enable PCI device\n"); - return err; + ptp_clock = cavium_ptp_get(); + if (IS_ERR(ptp_clock)) { + if (PTR_ERR(ptp_clock) == -ENODEV) + /* In virtualized environment we proceed without ptp */ + ptp_clock = NULL; + else + return PTR_ERR(ptp_clock); } + err = pci_enable_device(pdev); + if (err) + return dev_err_probe(dev, err, "Failed to enable PCI device\n"); + err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "Unable to get usable DMA configuration\n"); goto err_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); - goto err_release_regions; - } - qcount = netif_get_num_default_rss_queues(); /* Restrict multiqset support only for host bound VFs */ @@ -1830,6 +2148,15 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->pdev = pdev; nic->pnicvf = nic; nic->max_queues = qcount; + /* If no of CPUs are too low, there won't be any queues left + * for XDP_TX, hence double it. + */ + if (!nic->t88) + nic->max_queues *= 2; + nic->ptp_clock = ptp_clock; + + /* Initialize mutex that serializes usage of VF's mailbox */ + mutex_init(&nic->rx_mode_mtx); /* MAP VF's configuration registers */ nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); @@ -1890,16 +2217,32 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; + if (!pass1_silicon(nic->pdev) && + nic->rx_queues + nic->tx_queues <= nic->max_queues) + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + /* MTU range: 64 - 9200 */ netdev->min_mtu = NIC_HW_MIN_FRS; netdev->max_mtu = NIC_HW_MAX_FRS; INIT_WORK(&nic->reset_task, nicvf_reset_task); + nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d", + WQ_MEM_RECLAIM, + nic->vf_id); + if (!nic->nicvf_rx_mode_wq) { + err = -ENOMEM; + dev_err(dev, "Failed to allocate work queue\n"); + goto err_unregister_interrupts; + } + + INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); + spin_lock_init(&nic->rx_mode_wq_lock); + err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_unregister_interrupts; + goto err_destroy_workqueue; } nic->msg_enable = debug; @@ -1908,6 +2251,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_destroy_workqueue: + destroy_workqueue(nic->nicvf_rx_mode_wq); err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: @@ -1939,10 +2284,15 @@ static void nicvf_remove(struct pci_dev *pdev) */ if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) unregister_netdev(pnetdev); + if (nic->nicvf_rx_mode_wq) { + destroy_workqueue(nic->nicvf_rx_mode_wq); + nic->nicvf_rx_mode_wq = NULL; + } nicvf_unregister_interrupts(nic); pci_set_drvdata(pdev, NULL); if (nic->drv_stats) free_percpu(nic->drv_stats); + cavium_ptp_put(nic->ptp_clock); free_netdev(netdev); pci_release_regions(pdev); pci_disable_device(pdev); @@ -1964,7 +2314,6 @@ static struct pci_driver nicvf_driver = { static int __init nicvf_init_module(void) { pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); - return pci_register_driver(&nicvf_driver); } |
