summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/cavium/thunder/nicvf_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/cavium/thunder/nicvf_main.c')
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c533
1 files changed, 441 insertions, 92 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 49b80da51ba7..0b6e30a8feb0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2015 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
*/
#include <linux/module.h>
@@ -20,15 +17,25 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/filter.h>
+#include <linux/net_tstamp.h>
+#include <linux/workqueue.h>
#include "nic_reg.h"
#include "nic.h"
#include "nicvf_queues.h"
#include "thunder_bgx.h"
+#include "../common/cavium_ptp.h"
-#define DRV_NAME "thunder-nicvf"
+#define DRV_NAME "nicvf"
#define DRV_VERSION "1.0"
+/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
+ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
+ * this value, keeping headroom for the 14 byte Ethernet header and two
+ * VLAN tags (for QinQ)
+ */
+#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2)
+
/* Supported devices */
static const struct pci_device_id nicvf_id_table[] = {
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
@@ -61,7 +68,7 @@ module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "Debug message level bitmap");
static int cpi_alg = CPI_ALG_NONE;
-module_param(cpi_alg, int, S_IRUGO);
+module_param(cpi_alg, int, 0444);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
@@ -119,34 +126,39 @@ static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
{
- int timeout = NIC_MBOX_MSG_TIMEOUT;
- int sleep = 10;
+ unsigned long timeout;
+ int ret = 0;
+
+ mutex_lock(&nic->rx_mode_mtx);
nic->pf_acked = false;
nic->pf_nacked = false;
nicvf_write_to_mbx(nic, mbx);
+ timeout = jiffies + msecs_to_jiffies(NIC_MBOX_MSG_TIMEOUT);
/* Wait for previous message to be acked, timeout 2sec */
while (!nic->pf_acked) {
if (nic->pf_nacked) {
netdev_err(nic->netdev,
"PF NACK to mbox msg 0x%02x from VF%d\n",
(mbx->msg.msg & 0xFF), nic->vf_id);
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
- msleep(sleep);
+ usleep_range(8000, 10000);
if (nic->pf_acked)
break;
- timeout -= sleep;
- if (!timeout) {
+ if (time_after(jiffies, timeout)) {
netdev_err(nic->netdev,
"PF didn't ACK to mbox msg 0x%02x from VF%d\n",
(mbx->msg.msg & 0xFF), nic->vf_id);
- return -EBUSY;
+ ret = -EBUSY;
+ break;
}
}
- return 0;
+ mutex_unlock(&nic->rx_mode_mtx);
+ return ret;
}
/* Checks if VF is able to comminicate with PF
@@ -166,6 +178,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic)
return 1;
}
+static void nicvf_send_cfg_done(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ netdev_err(nic->netdev,
+ "PF didn't respond to CFG DONE msg\n");
+ }
+}
+
static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
{
if (bgx->rx)
@@ -198,8 +221,7 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
nic->node = mbx.nic_cfg.node_id;
if (!nic->set_mac_pending)
- ether_addr_copy(nic->netdev->dev_addr,
- mbx.nic_cfg.mac_addr);
+ eth_hw_addr_set(nic->netdev, mbx.nic_cfg.mac_addr);
nic->sqs_mode = mbx.nic_cfg.sqs_mode;
nic->loopback_supported = mbx.nic_cfg.loopback_supported;
nic->link_up = false;
@@ -222,21 +244,24 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
break;
case NIC_MBOX_MSG_BGX_LINK_CHANGE:
nic->pf_acked = true;
- nic->link_up = mbx.link_status.link_up;
- nic->duplex = mbx.link_status.duplex;
- nic->speed = mbx.link_status.speed;
- nic->mac_type = mbx.link_status.mac_type;
- if (nic->link_up) {
- netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
- nic->speed,
- nic->duplex == DUPLEX_FULL ?
- "Full" : "Half");
- netif_carrier_on(nic->netdev);
- netif_tx_start_all_queues(nic->netdev);
- } else {
- netdev_info(nic->netdev, "Link is Down\n");
- netif_carrier_off(nic->netdev);
- netif_tx_stop_all_queues(nic->netdev);
+ if (nic->link_up != mbx.link_status.link_up) {
+ nic->link_up = mbx.link_status.link_up;
+ nic->duplex = mbx.link_status.duplex;
+ nic->speed = mbx.link_status.speed;
+ nic->mac_type = mbx.link_status.mac_type;
+ if (nic->link_up) {
+ netdev_info(nic->netdev,
+ "Link is Up %d Mbps %s duplex\n",
+ nic->speed,
+ nic->duplex == DUPLEX_FULL ?
+ "Full" : "Half");
+ netif_carrier_on(nic->netdev);
+ netif_tx_start_all_queues(nic->netdev);
+ } else {
+ netdev_info(nic->netdev, "Link is Down\n");
+ netif_carrier_off(nic->netdev);
+ netif_tx_stop_all_queues(nic->netdev);
+ }
}
break;
case NIC_MBOX_MSG_ALLOC_SQS:
@@ -502,8 +527,9 @@ static int nicvf_init_resources(struct nicvf *nic)
static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
- struct sk_buff **skb)
+ struct rcv_queue *rq, struct sk_buff **skb)
{
+ unsigned char *hard_start, *data;
struct xdp_buff xdp;
struct page *page;
u32 action;
@@ -521,18 +547,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
cpu_addr = (u64)phys_to_virt(cpu_addr);
page = virt_to_page((void *)cpu_addr);
- xdp.data_hard_start = page_address(page);
- xdp.data = (void *)cpu_addr;
- xdp.data_end = xdp.data + len;
+ xdp_init_buff(&xdp, RCV_FRAG_LEN + XDP_PACKET_HEADROOM,
+ &rq->xdp_rxq);
+ hard_start = page_address(page);
+ data = (unsigned char *)cpu_addr;
+ xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false);
orig_data = xdp.data;
- rcu_read_lock();
action = bpf_prog_run_xdp(prog, &xdp);
- rcu_read_unlock();
+ len = xdp.data_end - xdp.data;
/* Check if XDP program has changed headers */
if (orig_data != xdp.data) {
- len = xdp.data_end - xdp.data;
offset = orig_data - xdp.data;
dma_addr -= offset;
}
@@ -564,9 +590,11 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len);
return true;
default:
- bpf_warn_invalid_xdp_action(action);
+ bpf_warn_invalid_xdp_action(nic->netdev, prog, action);
+ fallthrough;
case XDP_ABORTED:
trace_xdp_exception(nic->netdev, prog, action);
+ fallthrough;
case XDP_DROP:
/* Check if it's a recycled page, if not
* unmap the DMA mapping.
@@ -586,6 +614,44 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
return false;
}
+static void nicvf_snd_ptp_handler(struct net_device *netdev,
+ struct cqe_send_t *cqe_tx)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct skb_shared_hwtstamps ts;
+ u64 ns;
+
+ nic = nic->pnicvf;
+
+ /* Sync for 'ptp_skb' */
+ smp_rmb();
+
+ /* New timestamp request can be queued now */
+ atomic_set(&nic->tx_ptp_skbs, 0);
+
+ /* Check for timestamp requested skb */
+ if (!nic->ptp_skb)
+ return;
+
+ /* Check if timestamping is timedout, which is set to 10us */
+ if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT ||
+ cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT)
+ goto no_tstamp;
+
+ /* Get the timestamp */
+ memset(&ts, 0, sizeof(ts));
+ ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp);
+ ts.hwtstamp = ns_to_ktime(ns);
+ skb_tstamp_tx(nic->ptp_skb, &ts);
+
+no_tstamp:
+ /* Free the original skb */
+ dev_kfree_skb_any(nic->ptp_skb);
+ nic->ptp_skb = NULL;
+ /* Sync 'ptp_skb' */
+ smp_wmb();
+}
+
static void nicvf_snd_pkt_handler(struct net_device *netdev,
struct cqe_send_t *cqe_tx,
int budget, int *subdesc_cnt,
@@ -642,7 +708,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
prefetch(skb);
(*tx_pkts)++;
*tx_bytes += skb->len;
- napi_consume_skb(skb, budget);
+ /* If timestamp is requested for this skb, don't free it */
+ if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
+ !nic->pnicvf->ptp_skb)
+ nic->pnicvf->ptp_skb = skb;
+ else
+ napi_consume_skb(skb, budget);
sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
} else {
/* In case of SW TSO on 88xx, only last segment will have
@@ -681,9 +752,25 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,
skb_set_hash(skb, hash, hash_type);
}
+static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb)
+{
+ u64 ns;
+
+ if (!nic->ptp_clock || !nic->hw_rx_tstamp)
+ return;
+
+ /* The first 8 bytes is the timestamp */
+ ns = cavium_ptp_tstamp2time(nic->ptp_clock,
+ be64_to_cpu(*(__be64 *)skb->data));
+ skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns);
+
+ __skb_pull(skb, 8);
+}
+
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
struct napi_struct *napi,
- struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
+ struct cqe_rx_t *cqe_rx,
+ struct snd_queue *sq, struct rcv_queue *rq)
{
struct sk_buff *skb = NULL;
struct nicvf *nic = netdev_priv(netdev);
@@ -709,7 +796,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
/* For XDP, ignore pkts spanning multiple pages */
if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
/* Packet consumed by XDP */
- if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb))
+ if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
return;
} else {
skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -731,6 +818,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
return;
}
+ nicvf_set_rxtstamp(nic, skb);
nicvf_set_rxhash(netdev, cqe_rx, skb);
skb_record_rx_queue(skb, rq_idx);
@@ -766,6 +854,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
struct cqe_rx_t *cq_desc;
struct netdev_queue *txq;
struct snd_queue *sq = &qs->sq[cq_idx];
+ struct rcv_queue *rq = &qs->rq[cq_idx];
unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
spin_lock_bh(&cq->lock);
@@ -796,7 +885,7 @@ loop:
switch (cq_desc->cqe_type) {
case CQE_TYPE_RX:
- nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
+ nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
work_done++;
break;
case CQE_TYPE_SEND:
@@ -805,10 +894,12 @@ loop:
&tx_pkts, &tx_bytes);
tx_done++;
break;
+ case CQE_TYPE_SEND_PTP:
+ nicvf_snd_ptp_handler(netdev, (void *)cq_desc);
+ break;
case CQE_TYPE_INVALID:
case CQE_TYPE_RX_SPLIT:
case CQE_TYPE_RX_TCP:
- case CQE_TYPE_SEND_PTP:
/* Ignore for now */
break;
}
@@ -891,9 +982,9 @@ static int nicvf_poll(struct napi_struct *napi, int budget)
*
* As of now only CQ errors are handled
*/
-static void nicvf_handle_qs_err(unsigned long data)
+static void nicvf_handle_qs_err(struct tasklet_struct *t)
{
- struct nicvf *nic = (struct nicvf *)data;
+ struct nicvf *nic = from_tasklet(nic, t, qs_err_task);
struct queue_set *qs = nic->qs;
int qidx;
u64 status;
@@ -1132,7 +1223,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic)
if (ret < 0) {
netdev_err(nic->netdev,
"Req for #%d msix vectors failed\n", nic->num_vec);
- return 1;
+ return ret;
}
sprintf(nic->irq_name[irq], "%s Mbox", "NICVF");
@@ -1151,7 +1242,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic)
if (!nicvf_check_pf_ready(nic)) {
nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
nicvf_unregister_interrupts(nic);
- return 1;
+ return -EIO;
}
return 0;
@@ -1238,6 +1329,12 @@ int nicvf_stop(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
union nic_mbx mbx = {};
+ /* wait till all queued set_rx_mode tasks completes */
+ if (nic->nicvf_rx_mode_wq) {
+ cancel_delayed_work_sync(&nic->link_change_work);
+ drain_workqueue(nic->nicvf_rx_mode_wq);
+ }
+
mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
nicvf_send_msg_to_pf(nic, &mbx);
@@ -1304,12 +1401,28 @@ int nicvf_stop(struct net_device *netdev)
nicvf_free_cq_poll(nic);
+ /* Free any pending SKB saved to receive timestamp */
+ if (nic->ptp_skb) {
+ dev_kfree_skb_any(nic->ptp_skb);
+ nic->ptp_skb = NULL;
+ }
+
/* Clear multiqset info */
nic->pnicvf = nic;
return 0;
}
+static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable)
+{
+ union nic_mbx mbx = {};
+
+ mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG;
+ mbx.ptp.enable = enable;
+
+ return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
{
union nic_mbx mbx = {};
@@ -1321,13 +1434,28 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
return nicvf_send_msg_to_pf(nic, &mbx);
}
+static void nicvf_link_status_check_task(struct work_struct *work_arg)
+{
+ struct nicvf *nic = container_of(work_arg,
+ struct nicvf,
+ link_change_work.work);
+ union nic_mbx mbx = {};
+ mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+ nicvf_send_msg_to_pf(nic, &mbx);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 2 * HZ);
+}
+
int nicvf_open(struct net_device *netdev)
{
int cpu, err, qidx;
struct nicvf *nic = netdev_priv(netdev);
struct queue_set *qs = nic->qs;
struct nicvf_cq_poll *cq_poll = NULL;
- union nic_mbx mbx = {};
+
+ /* wait till all queued set_rx_mode tasks completes if any */
+ if (nic->nicvf_rx_mode_wq)
+ drain_workqueue(nic->nicvf_rx_mode_wq);
netif_carrier_off(netdev);
@@ -1344,8 +1472,7 @@ int nicvf_open(struct net_device *netdev)
}
cq_poll->cq_idx = qidx;
cq_poll->nicvf = nic;
- netif_napi_add(netdev, &cq_poll->napi, nicvf_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add(netdev, &cq_poll->napi, nicvf_poll);
napi_enable(&cq_poll->napi);
nic->napi[qidx] = cq_poll;
}
@@ -1362,12 +1489,10 @@ int nicvf_open(struct net_device *netdev)
}
/* Init tasklet for handling Qset err interrupt */
- tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err,
- (unsigned long)nic);
+ tasklet_setup(&nic->qs_err_task, nicvf_handle_qs_err);
/* Init RBDR tasklet which will refill RBDR */
- tasklet_init(&nic->rbdr_task, nicvf_rbdr_task,
- (unsigned long)nic);
+ tasklet_setup(&nic->rbdr_task, nicvf_rbdr_task);
INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work);
/* Configure CPI alorithm */
@@ -1379,6 +1504,12 @@ int nicvf_open(struct net_device *netdev)
if (nic->sqs_mode)
nicvf_get_primary_vf_struct(nic);
+ /* Configure PTP timestamp */
+ if (nic->ptp_clock)
+ nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp);
+ atomic_set(&nic->tx_ptp_skbs, 0);
+ nic->ptp_skb = NULL;
+
/* Configure receive side scaling and MTU */
if (!nic->sqs_mode) {
nicvf_rss_init(nic);
@@ -1417,8 +1548,14 @@ int nicvf_open(struct net_device *netdev)
nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
/* Send VF config done msg to PF */
- mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
- nicvf_write_to_mbx(nic, &mbx);
+ nicvf_send_cfg_done(nic);
+
+ if (nic->nicvf_rx_mode_wq) {
+ INIT_DELAYED_WORK(&nic->link_change_work,
+ nicvf_link_status_check_task);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 0);
+ }
return 0;
cleanup:
@@ -1441,18 +1578,21 @@ napi_del:
static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nicvf *nic = netdev_priv(netdev);
- int orig_mtu = netdev->mtu;
-
- netdev->mtu = new_mtu;
-
- if (!netif_running(netdev))
- return 0;
- if (nicvf_update_hw_max_frs(nic, new_mtu)) {
- netdev->mtu = orig_mtu;
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
+ netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+ netdev->mtu);
return -EINVAL;
}
+ if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu))
+ return -EINVAL;
+
+ WRITE_ONCE(netdev->mtu, new_mtu);
+
return 0;
}
@@ -1464,7 +1604,7 @@ static int nicvf_set_mac_address(struct net_device *netdev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
- memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ eth_hw_addr_set(netdev, addr->sa_data);
if (nic->pdev->msix_enabled) {
if (nicvf_hw_set_mac_addr(nic, netdev))
@@ -1589,7 +1729,7 @@ static void nicvf_get_stats64(struct net_device *netdev,
}
-static void nicvf_tx_timeout(struct net_device *dev)
+static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct nicvf *nic = netdev_priv(dev);
@@ -1689,9 +1829,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
bool if_up = netif_running(nic->netdev);
struct bpf_prog *old_prog;
bool bpf_attached = false;
+ int ret = 0;
- /* For now just support only the usual MTU sized frames */
- if (prog && (dev->mtu > 1500)) {
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (prog && dev->mtu > MAX_XDP_MTU) {
netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
dev->mtu);
return -EOPNOTSUPP;
@@ -1721,9 +1864,8 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
if (nic->xdp_prog) {
/* Attach BPF program */
- nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
- if (!IS_ERR(nic->xdp_prog))
- bpf_attached = true;
+ bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
+ bpf_attached = true;
}
/* Calculate Tx queues needed for XDP and network stack */
@@ -1735,10 +1877,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
netif_trans_update(nic->netdev);
}
- return 0;
+ return ret;
}
-static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
{
struct nicvf *nic = netdev_priv(netdev);
@@ -1752,15 +1894,186 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
switch (xdp->command) {
case XDP_SETUP_PROG:
return nicvf_xdp_setup(nic, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_attached = !!nic->xdp_prog;
- xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
}
+static int nicvf_hwtstamp_set(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if (!nic->ptp_clock) {
+ NL_SET_ERR_MSG_MOD(extack, "HW timestamping is not supported");
+ return -ENODEV;
+ }
+
+ switch (config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ nic->hw_rx_tstamp = false;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ nic->hw_rx_tstamp = true;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ if (netif_running(netdev))
+ nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp);
+
+ return 0;
+}
+
+static int nicvf_hwtstamp_get(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ if (!nic->ptp_clock)
+ return -ENODEV;
+
+ /* TX timestamping is technically always on */
+ config->tx_type = HWTSTAMP_TX_ON;
+ config->rx_filter = nic->hw_rx_tstamp ?
+ HWTSTAMP_FILTER_ALL :
+ HWTSTAMP_FILTER_NONE;
+
+ return 0;
+}
+
+static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
+ struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+ int idx;
+
+ /* From the inside of VM code flow we have only 128 bits memory
+ * available to send message to host's PF, so send all mc addrs
+ * one by one, starting from flush command in case if kernel
+ * requests to configure specific MAC filtering
+ */
+
+ /* flush DMAC filters and reset RX mode */
+ mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
+
+ if (mode & BGX_XCAST_MCAST_FILTER) {
+ /* once enabling filtering, we need to signal to PF to add
+ * its' own LMAC to the filter to accept packets for it.
+ */
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.mac = 0;
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
+ }
+
+ /* check if we have any specific MACs to be added to PF DMAC filter */
+ if (mc_addrs) {
+ /* now go through kernel list of MACs and add them one by one */
+ for (idx = 0; idx < mc_addrs->count; idx++) {
+ mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+ mbx.xcast.mac = mc_addrs->mc[idx];
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
+ }
+ }
+
+ /* and finally set rx mode for PF accordingly */
+ mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
+ mbx.xcast.mode = mode;
+
+ nicvf_send_msg_to_pf(nic, &mbx);
+free_mc:
+ kfree(mc_addrs);
+}
+
+static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+{
+ struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
+ work);
+ struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
+ u8 mode;
+ struct xcast_addr_list *mc;
+
+ /* Save message data locally to prevent them from
+ * being overwritten by next ndo_set_rx_mode call().
+ */
+ spin_lock_bh(&nic->rx_mode_wq_lock);
+ mode = vf_work->mode;
+ mc = vf_work->mc;
+ vf_work->mc = NULL;
+ spin_unlock_bh(&nic->rx_mode_wq_lock);
+
+ __nicvf_set_rx_mode_task(mode, mc, nic);
+}
+
+static void nicvf_set_rx_mode(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+ struct netdev_hw_addr *ha;
+ struct xcast_addr_list *mc_list = NULL;
+ u8 mode = 0;
+
+ if (netdev->flags & IFF_PROMISC) {
+ mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT;
+ } else {
+ if (netdev->flags & IFF_BROADCAST)
+ mode |= BGX_XCAST_BCAST_ACCEPT;
+
+ if (netdev->flags & IFF_ALLMULTI) {
+ mode |= BGX_XCAST_MCAST_ACCEPT;
+ } else if (netdev->flags & IFF_MULTICAST) {
+ mode |= BGX_XCAST_MCAST_FILTER;
+ /* here we need to copy mc addrs */
+ if (netdev_mc_count(netdev)) {
+ mc_list = kmalloc(struct_size(mc_list, mc,
+ netdev_mc_count(netdev)),
+ GFP_ATOMIC);
+ if (unlikely(!mc_list))
+ return;
+ mc_list->count = 0;
+ netdev_hw_addr_list_for_each(ha, &netdev->mc) {
+ mc_list->mc[mc_list->count] =
+ ether_addr_to_u64(ha->addr);
+ mc_list->count++;
+ }
+ }
+ }
+ }
+ spin_lock(&nic->rx_mode_wq_lock);
+ kfree(nic->rx_mode_work.mc);
+ nic->rx_mode_work.mc = mc_list;
+ nic->rx_mode_work.mode = mode;
+ queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work);
+ spin_unlock(&nic->rx_mode_wq_lock);
+}
+
static const struct net_device_ops nicvf_netdev_ops = {
.ndo_open = nicvf_open,
.ndo_stop = nicvf_stop,
@@ -1771,7 +2084,10 @@ static const struct net_device_ops nicvf_netdev_ops = {
.ndo_tx_timeout = nicvf_tx_timeout,
.ndo_fix_features = nicvf_fix_features,
.ndo_set_features = nicvf_set_features,
- .ndo_xdp = nicvf_xdp,
+ .ndo_bpf = nicvf_xdp,
+ .ndo_set_rx_mode = nicvf_set_rx_mode,
+ .ndo_hwtstamp_get = nicvf_hwtstamp_get,
+ .ndo_hwtstamp_set = nicvf_hwtstamp_set,
};
static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -1781,31 +2097,33 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct nicvf *nic;
int err, qcount;
u16 sdevid;
+ struct cavium_ptp *ptp_clock;
- err = pci_enable_device(pdev);
- if (err) {
- dev_err(dev, "Failed to enable PCI device\n");
- return err;
+ ptp_clock = cavium_ptp_get();
+ if (IS_ERR(ptp_clock)) {
+ if (PTR_ERR(ptp_clock) == -ENODEV)
+ /* In virtualized environment we proceed without ptp */
+ ptp_clock = NULL;
+ else
+ return PTR_ERR(ptp_clock);
}
+ err = pci_enable_device(pdev);
+ if (err)
+ return dev_err_probe(dev, err, "Failed to enable PCI device\n");
+
err = pci_request_regions(pdev, DRV_NAME);
if (err) {
dev_err(dev, "PCI request regions failed 0x%x\n", err);
goto err_disable_device;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (err) {
dev_err(dev, "Unable to get usable DMA configuration\n");
goto err_release_regions;
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
- if (err) {
- dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
- goto err_release_regions;
- }
-
qcount = netif_get_num_default_rss_queues();
/* Restrict multiqset support only for host bound VFs */
@@ -1830,6 +2148,15 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
nic->pdev = pdev;
nic->pnicvf = nic;
nic->max_queues = qcount;
+ /* If no of CPUs are too low, there won't be any queues left
+ * for XDP_TX, hence double it.
+ */
+ if (!nic->t88)
+ nic->max_queues *= 2;
+ nic->ptp_clock = ptp_clock;
+
+ /* Initialize mutex that serializes usage of VF's mailbox */
+ mutex_init(&nic->rx_mode_mtx);
/* MAP VF's configuration registers */
nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
@@ -1890,16 +2217,32 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->netdev_ops = &nicvf_netdev_ops;
netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
+ if (!pass1_silicon(nic->pdev) &&
+ nic->rx_queues + nic->tx_queues <= nic->max_queues)
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
+
/* MTU range: 64 - 9200 */
netdev->min_mtu = NIC_HW_MIN_FRS;
netdev->max_mtu = NIC_HW_MAX_FRS;
INIT_WORK(&nic->reset_task, nicvf_reset_task);
+ nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d",
+ WQ_MEM_RECLAIM,
+ nic->vf_id);
+ if (!nic->nicvf_rx_mode_wq) {
+ err = -ENOMEM;
+ dev_err(dev, "Failed to allocate work queue\n");
+ goto err_unregister_interrupts;
+ }
+
+ INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+ spin_lock_init(&nic->rx_mode_wq_lock);
+
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
- goto err_unregister_interrupts;
+ goto err_destroy_workqueue;
}
nic->msg_enable = debug;
@@ -1908,6 +2251,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
+err_destroy_workqueue:
+ destroy_workqueue(nic->nicvf_rx_mode_wq);
err_unregister_interrupts:
nicvf_unregister_interrupts(nic);
err_free_netdev:
@@ -1939,10 +2284,15 @@ static void nicvf_remove(struct pci_dev *pdev)
*/
if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
unregister_netdev(pnetdev);
+ if (nic->nicvf_rx_mode_wq) {
+ destroy_workqueue(nic->nicvf_rx_mode_wq);
+ nic->nicvf_rx_mode_wq = NULL;
+ }
nicvf_unregister_interrupts(nic);
pci_set_drvdata(pdev, NULL);
if (nic->drv_stats)
free_percpu(nic->drv_stats);
+ cavium_ptp_put(nic->ptp_clock);
free_netdev(netdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
@@ -1964,7 +2314,6 @@ static struct pci_driver nicvf_driver = {
static int __init nicvf_init_module(void)
{
pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
-
return pci_register_driver(&nicvf_driver);
}