// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies. */ #include #include #include #include #include #include #include #include #include "neigh.h" #include "tc.h" #include "en_rep.h" #include "fs_core.h" #include "diag/en_rep_tracepoint.h" static unsigned long mlx5e_rep_ipv6_interval(void) { if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); return ~0UL; } static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) { unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); } void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; mlx5_fc_queue_stats_work(priv->mdev, &neigh_update->neigh_stats_work, neigh_update->min_interval); } static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) { return refcount_inc_not_zero(&nhe->refcnt); } static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) { if (refcount_dec_and_test(&nhe->refcnt)) { mlx5e_rep_neigh_entry_remove(nhe); kfree_rcu(nhe, rcu); } } static struct mlx5e_neigh_hash_entry * mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh_hash_entry *next = NULL; rcu_read_lock(); for (next = nhe ? list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, &nhe->neigh_list, struct mlx5e_neigh_hash_entry, neigh_list) : list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, struct mlx5e_neigh_hash_entry, neigh_list); next; next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, &next->neigh_list, struct mlx5e_neigh_hash_entry, neigh_list)) if (mlx5e_rep_neigh_entry_hold(next)) break; rcu_read_unlock(); if (nhe) mlx5e_rep_neigh_entry_release(nhe); return next; } static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, neigh_update.neigh_stats_work.work); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_neigh_hash_entry *nhe = NULL; rtnl_lock(); if (!list_empty(&rpriv->neigh_update.neigh_list)) mlx5e_rep_queue_neigh_stats_work(priv); while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) mlx5e_tc_update_neigh_used_value(nhe); rtnl_unlock(); } static void mlx5e_rep_neigh_update(struct work_struct *work) { struct mlx5e_neigh_hash_entry *nhe = container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); struct neighbour *n = nhe->n; struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; bool neigh_connected; u8 nud_state, dead; rtnl_lock(); /* If these parameters are changed after we release the lock, * we'll receive another event letting us know about it. * We use this lock to avoid inconsistency between the neigh validity * and it's hw address. */ read_lock_bh(&n->lock); memcpy(ha, n->ha, ETH_ALEN); nud_state = n->nud_state; dead = n->dead; read_unlock_bh(&n->lock); neigh_connected = (nud_state & NUD_VALID) && !dead; trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); list_for_each_entry(e, &nhe->encap_list, encap_list) { if (!mlx5e_encap_take(e)) continue; priv = netdev_priv(e->out_dev); mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_encap_put(priv, e); } mlx5e_rep_neigh_entry_release(nhe); rtnl_unlock(); neigh_release(n); } static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, struct mlx5e_neigh_hash_entry *nhe, struct neighbour *n) { /* Take a reference to ensure the neighbour and mlx5 encap * entry won't be destructed until we drop the reference in * delayed work. */ neigh_hold(n); /* This assignment is valid as long as the the neigh reference * is taken */ nhe->n = n; if (!queue_work(priv->wq, &nhe->neigh_update_work)) { mlx5e_rep_neigh_entry_release(nhe); neigh_release(n); } } static int mlx5e_rep_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, neigh_update.netevent_nb); struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_neigh_hash_entry *nhe = NULL; struct mlx5e_neigh m_neigh = {}; struct neigh_parms *p; struct neighbour *n; bool found = false; switch (event) { case NETEVENT_NEIGH_UPDATE: n = ptr; #if IS_ENABLED(CONFIG_IPV6) if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) #else if (n->tbl != &arp_tbl) #endif return NOTIFY_DONE; m_neigh.dev = n->dev; m_neigh.family = n->ops->family; memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); rcu_read_lock(); nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); rcu_read_unlock(); if (!nhe) return NOTIFY_DONE; mlx5e_rep_queue_neigh_update_work(priv, nhe, n); break; case NETEVENT_DELAY_PROBE_TIME_UPDATE: p = ptr; /* We check the device is present since we don't care about * changes in the default table, we only care about changes * done per device delay prob time parameter. */ #if IS_ENABLED(CONFIG_IPV6) if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) #else if (!p->dev || p->tbl != &arp_tbl) #endif return NOTIFY_DONE; rcu_read_lock(); list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, neigh_list) { if (p->dev == nhe->m_neigh.dev) { found = true; break; } } rcu_read_unlock(); if (!found) return NOTIFY_DONE; neigh_update->min_interval = min_t(unsigned long, NEIGH_VAR(p, DELAY_PROBE_TIME), neigh_update->min_interval); mlx5_fc_update_sampling_interval(priv->mdev, neigh_update->min_interval); break; } return NOTIFY_DONE; } static const struct rhashtable_params mlx5e_neigh_ht_params = { .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), .key_len = sizeof(struct mlx5e_neigh), .automatic_shrinking = true, }; int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; int err; err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); if (err) return err; INIT_LIST_HEAD(&neigh_update->neigh_list); mutex_init(&neigh_update->encap_lock); INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, mlx5e_rep_neigh_stats_work); mlx5e_rep_neigh_update_init_interval(rpriv); rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); if (err) goto out_err; return 0; out_err: rhashtable_destroy(&neigh_update->neigh_ht); return err; } void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) { struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); unregister_netevent_notifier(&neigh_update->netevent_nb); flush_workqueue(priv->wq); /* flush neigh update works */ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); mutex_destroy(&neigh_update->encap_lock); rhashtable_destroy(&neigh_update->neigh_ht); } static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_rep_priv *rpriv = priv->ppriv; int err; err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, &nhe->rhash_node, mlx5e_neigh_ht_params); if (err) return err; list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); return err; } static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; mutex_lock(&rpriv->neigh_update.encap_lock); list_del_rcu(&nhe->neigh_list); rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, &nhe->rhash_node, mlx5e_neigh_ht_params); mutex_unlock(&rpriv->neigh_update.encap_lock); } /* This function must only be called under the representor's encap_lock or * inside rcu read lock section. */ struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct mlx5e_neigh_hash_entry *nhe; nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, mlx5e_neigh_ht_params); return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; } int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, struct mlx5e_neigh_hash_entry **nhe) { int err; *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); if (!*nhe) return -ENOMEM; (*nhe)->priv = priv; memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); err = mlx5e_rep_neigh_entry_insert(priv, *nhe); if (err) goto out_free; return 0; out_free: kfree(*nhe); return err; }