summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 16:40:27 -0700
commit8d65b08debc7e62b2c6032d7fe7389d895b92cbc (patch)
tree0c3141b60c3a03cc32742b5750c5e763b9dae489 /drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
parent5a0387a8a8efb90ae7fea1e2e5c62de3efa74691 (diff)
parent5d15af6778b8e4ed1fd41b040283af278e7a9a72 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Millar: "Here are some highlights from the 2065 networking commits that happened this development cycle: 1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri) 2) Add a generic XDP driver, so that anyone can test XDP even if they lack a networking device whose driver has explicit XDP support (me). 3) Sparc64 now has an eBPF JIT too (me) 4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei Starovoitov) 5) Make netfitler network namespace teardown less expensive (Florian Westphal) 6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana) 7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger) 8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky) 9) Multiqueue support in stmmac driver (Joao Pinto) 10) Remove TCP timewait recycling, it never really could possibly work well in the real world and timestamp randomization really zaps any hint of usability this feature had (Soheil Hassas Yeganeh) 11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay Aleksandrov) 12) Add socket busy poll support to epoll (Sridhar Samudrala) 13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso, and several others) 14) IPSEC hw offload infrastructure (Steffen Klassert)" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits) tipc: refactor function tipc_sk_recv_stream() tipc: refactor function tipc_sk_recvmsg() net: thunderx: Optimize page recycling for XDP net: thunderx: Support for XDP header adjustment net: thunderx: Add support for XDP_TX net: thunderx: Add support for XDP_DROP net: thunderx: Add basic XDP support net: thunderx: Cleanup receive buffer allocation net: thunderx: Optimize CQE_TX handling net: thunderx: Optimize RBDR descriptor handling net: thunderx: Support for page recycling ipx: call ipxitf_put() in ioctl error path net: sched: add helpers to handle extended actions qed*: Fix issues in the ptp filter config implementation. qede: Fix concurrency issue in PTP Tx path processing. stmmac: Add support for SIMATIC IOT2000 platform net: hns: fix ethtool_get_strings overflow in hns driver tcp: fix wraparound issue in tcp_lp bpf, arm64: fix jit branch offset related to ldimm64 bpf, arm64: implement jiting of BPF_XADD ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_rep.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c682
1 files changed, 586 insertions, 96 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index f621373bd7a5..79462c0368a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -34,10 +34,14 @@
#include <linux/mlx5/fs.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
+#include <net/netevent.h>
+#include <net/arp.h>
#include "eswitch.h"
#include "en.h"
+#include "en_rep.h"
#include "en_tc.h"
+#include "fs_core.h"
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
@@ -75,7 +79,8 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct rtnl_link_stats64 *vport_stats;
struct ifla_vf_stats vf_stats;
int err;
@@ -102,14 +107,16 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
int i, j;
memset(s, 0, sizeof(*s));
- for (i = 0; i < priv->params.num_channels; i++) {
- rq_stats = &priv->channel[i]->rq.stats;
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ rq_stats = &c->rq.stats;
s->rx_packets += rq_stats->packets;
s->rx_bytes += rq_stats->bytes;
- for (j = 0; j < priv->params.num_tc; j++) {
- sq_stats = &priv->channel[i]->sq[j].stats;
+ for (j = 0; j < priv->channels.params.num_tc; j++) {
+ sq_stats = &c->sq[j].stats;
s->tx_packets += sq_stats->packets;
s->tx_bytes += sq_stats->bytes;
@@ -163,7 +170,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
if (esw->mode == SRIOV_NONE)
@@ -182,66 +190,426 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
}
int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
-
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5e_channel *c;
- int n, tc, err, num_sqs = 0;
+ int n, tc, num_sqs = 0;
+ int err = -ENOMEM;
u16 *sqs;
- sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL);
+ sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(u16), GFP_KERNEL);
if (!sqs)
- return -ENOMEM;
+ goto out;
- for (n = 0; n < priv->params.num_channels; n++) {
- c = priv->channel[n];
+ for (n = 0; n < priv->channels.num; n++) {
+ c = priv->channels.c[n];
for (tc = 0; tc < c->num_tc; tc++)
sqs[num_sqs++] = c->sq[tc].sqn;
}
err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs);
-
kfree(sqs);
+
+out:
+ if (err)
+ netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err);
return err;
}
-int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
{
- struct net_device *netdev = rep->netdev;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ mlx5_eswitch_sqs2vport_stop(esw, rep);
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms,
+ DELAY_PROBE_TIME);
+#else
+ unsigned long ipv6_interval = ~0UL;
+#endif
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
+ DELAY_PROBE_TIME);
+ struct net_device *netdev = rpriv->rep->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- return mlx5e_add_sqs_fwd_rules(priv);
- return 0;
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
}
-void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
- mlx5_eswitch_sqs2vport_stop(esw, rep);
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
}
-void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep)
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
{
- struct net_device *netdev = rep->netdev;
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->rep->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe;
- if (test_bit(MLX5E_STATE_OPENED, &priv->state))
- mlx5e_remove_sqs_fwd_rules(priv);
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
- /* clean (and re-init) existing uplink offloaded TC rules */
- mlx5e_tc_cleanup(priv);
- mlx5e_tc_init(priv);
+ list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ refcount_inc(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt))
+ kfree(nhe);
+}
+
+static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+
+ ASSERT_RTNL();
+
+ if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_tc_encap_flows_del(priv, e);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+
+ mlx5e_tc_encap_flows_add(priv, e);
+ }
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct mlx5e_neigh_hash_entry *nhe =
+ container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+ struct neighbour *n = nhe->n;
+ struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+ struct mlx5e_priv *priv;
+ bool neigh_connected;
+ bool encap_connected;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ priv = netdev_priv(e->out_dev);
+
+ if (encap_connected != neigh_connected ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+ }
+ mlx5e_rep_neigh_entry_release(nhe);
+ rtnl_unlock();
+ neigh_release(n);
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->rep->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct mlx5e_neigh m_neigh = {};
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* We are in atomic context and can't take RTNL mutex, so use
+ * spin_lock_bh to lookup the neigh table. bh is used since
+ * netevent can be called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ if (!nhe) {
+ spin_unlock_bh(&neigh_update->encap_lock);
+ return NOTIFY_DONE;
+ }
+
+ /* This assignment is valid as long as the the neigh reference
+ * is taken
+ */
+ nhe->n = n;
+
+ /* Take a reference to ensure the neighbour and mlx5 encap
+ * entry won't be destructed until we drop the reference in
+ * delayed work.
+ */
+ neigh_hold(n);
+ mlx5e_rep_neigh_entry_hold(nhe);
+
+ if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+ mlx5e_rep_neigh_entry_release(nhe);
+ neigh_release(n);
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ /* We are in atomic context and can't take RTNL mutex,
+ * so use spin_lock_bh to walk the neigh list and look for
+ * the relevant device. bh is used since netevent can be
+ * called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) {
+ if (p->dev == nhe->m_neigh.dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ spin_lock_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ if (err)
+ goto out_err;
+ return 0;
+
+out_err:
+ rhashtable_destroy(&neigh_update->neigh_ht);
+ return err;
+}
+
+static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev);
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ spin_lock_bh(&rpriv->neigh_update.encap_lock);
+
+ list_del(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ spin_unlock_bh(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under RTNL lock or under the
+ * representor's encap_lock in case RTNL mutex can't be held.
+ */
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+}
+
+static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ /* The neigh hash entry must be removed from the hash table regardless
+ * of the reference count value, so it won't be found by the next
+ * neigh notification call. The neigh hash entry reference count is
+ * incremented only during creation and neigh notification calls and
+ * protects from freeing the nhe struct.
+ */
+ mlx5e_rep_neigh_entry_remove(priv, nhe);
+ mlx5e_rep_neigh_entry_release(nhe);
+}
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ if (err)
+ return err;
+ }
+ list_add(&e->encap_list, &nhe->encap_list);
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ list_del(&e->encap_list);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+
+ if (list_empty(&nhe->encap_list))
+ mlx5e_rep_neigh_entry_destroy(priv, nhe);
}
static int mlx5e_rep_open(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
int err;
@@ -259,7 +627,8 @@ static int mlx5e_rep_open(struct net_device *dev)
static int mlx5e_rep_close(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
(void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
@@ -271,7 +640,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
char *buf, size_t len)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5_eswitch_rep *rep = priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
int ret;
ret = snprintf(buf, len, "%d", rep->vport - 1);
@@ -314,18 +684,25 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
- if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS)
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+ return false;
+
+ rep = rpriv->rep;
+ if (esw->mode == SRIOV_OFFLOADS &&
+ rep && rep->vport == FDB_UPLINK_VPORT)
return true;
return false;
}
-bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
if (rep && rep->vport != FDB_UPLINK_VPORT)
return true;
@@ -397,42 +774,23 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
.ndo_get_offload_stats = mlx5e_get_offload_stats,
};
-static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
{
- struct mlx5e_priv *priv = netdev_priv(netdev);
u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
- priv->params.log_sq_size =
- MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
- priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
- priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
-
- priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
- BIT(priv->params.log_rq_size));
-
- priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
- mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
-
- priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
- priv->params.num_tc = 1;
-
- priv->params.lro_wqe_sz =
- MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
-
- priv->mdev = mdev;
- priv->netdev = netdev;
- priv->params.num_channels = profile->max_nch(mdev);
- priv->profile = profile;
- priv->ppriv = ppriv;
+ params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+ params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
+ params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
- mutex_init(&priv->state_lock);
+ params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
- INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+ params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
+ params->num_tc = 1;
+ params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
}
static void mlx5e_build_rep_netdev(struct net_device *netdev)
@@ -458,30 +816,39 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile,
void *ppriv)
{
- mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+
+ mutex_init(&priv->state_lock);
+
+ INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ priv->channels.params.num_channels = profile->max_nch(mdev);
+ mlx5e_build_rep_params(mdev, &priv->channels.params);
mlx5e_build_rep_netdev(netdev);
}
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_eswitch_rep *rep = priv->ppriv;
- struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_flow_handle *flow_rule;
int err;
- int i;
+
+ mlx5e_init_l2_addr(priv);
err = mlx5e_create_direct_rqts(priv);
- if (err) {
- mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err);
+ if (err)
return err;
- }
err = mlx5e_create_direct_tirs(priv);
- if (err) {
- mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err);
+ if (err)
goto err_destroy_direct_rqts;
- }
flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
rep->vport,
@@ -503,21 +870,19 @@ err_del_flow_rule:
err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
err_destroy_direct_rqts:
- for (i = 0; i < priv->params.num_channels; i++)
- mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+ mlx5e_destroy_direct_rqts(priv);
return err;
}
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
{
- struct mlx5_eswitch_rep *rep = priv->ppriv;
- int i;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
mlx5e_tc_cleanup(priv);
mlx5_del_flow_rules(rep->vport_rx_rule);
mlx5e_destroy_direct_tirs(priv);
- for (i = 0; i < priv->params.num_channels; i++)
- mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+ mlx5e_destroy_direct_rqts(priv);
}
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
@@ -546,56 +911,181 @@ static struct mlx5e_profile mlx5e_rep_profile = {
.cleanup_tx = mlx5e_cleanup_nic_tx,
.update_stats = mlx5e_rep_update_stats,
.max_nch = mlx5e_get_rep_max_num_channels,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */,
.max_tc = 1,
};
-int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep)
+/* e-Switch vport representors */
+
+static int
+mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_priv *priv = netdev_priv(rep->netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ int err;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_add_sqs_fwd_rules(priv);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_remove_sqs;
+
+ return 0;
+
+err_remove_sqs:
+ mlx5e_remove_sqs_fwd_rules(priv);
+ return err;
+}
+
+static void
+mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
{
+ struct mlx5e_priv *priv = netdev_priv(rep->netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_remove_sqs_fwd_rules(priv);
+
+ /* clean (and re-init) existing uplink offloaded TC rules */
+ mlx5e_tc_cleanup(priv);
+ mlx5e_tc_init(priv);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
int err;
- netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return -ENOMEM;
+
+ netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
+ kfree(rpriv);
return -EINVAL;
}
rep->netdev = netdev;
+ rpriv->rep = rep;
- err = mlx5e_attach_netdev(esw->dev, netdev);
+ err = mlx5e_attach_netdev(netdev_priv(netdev));
if (err) {
pr_warn("Failed to attach representor netdev for vport %d\n",
rep->vport);
goto err_destroy_netdev;
}
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err) {
+ pr_warn("Failed to initialized neighbours handling for vport %d\n",
+ rep->vport);
+ goto err_detach_netdev;
+ }
+
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_detach_netdev;
+ goto err_neigh_cleanup;
}
return 0;
+err_neigh_cleanup:
+ mlx5e_rep_neigh_cleanup(rpriv);
+
err_detach_netdev:
- mlx5e_detach_netdev(esw->dev, netdev);
+ mlx5e_detach_netdev(netdev_priv(netdev));
err_destroy_netdev:
- mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev));
-
+ mlx5e_destroy_netdev(netdev_priv(netdev));
+ kfree(rpriv);
return err;
}
-void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
- struct mlx5_eswitch_rep *rep)
+static void
+mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
{
struct net_device *netdev = rep->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ void *ppriv = priv->ppriv;
+
+ unregister_netdev(rep->netdev);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_netdev(priv);
+ kfree(ppriv); /* mlx5e_rep_priv */
+}
+
+static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+ u8 mac[ETH_ALEN];
+
+ mlx5_query_nic_vport_mac_address(mdev, 0, mac);
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep rep;
+
+ rep.load = mlx5e_vport_rep_load;
+ rep.unload = mlx5e_vport_rep_unload;
+ rep.vport = vport;
+ ether_addr_copy(rep.hw_id, mac);
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep);
+ }
+}
+
+static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport);
+}
+
+void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_eswitch_rep rep;
+
+ mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
+ rep.load = mlx5e_nic_rep_load;
+ rep.unload = mlx5e_nic_rep_unload;
+ rep.vport = FDB_UPLINK_VPORT;
+ rep.netdev = priv->netdev;
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/
+
+ mlx5e_rep_register_vf_vports(priv); /* VFs vports */
+}
+
+void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
- unregister_netdev(netdev);
- mlx5e_detach_netdev(esw->dev, netdev);
- mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev));
+ mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/
}