diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c')
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 7344 |
1 files changed, 5575 insertions, 1769 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 230e1f6e192b..a2033837182e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -13,14 +13,20 @@ #include <linux/socket.h> #include <linux/route.h> #include <linux/gcd.h> -#include <linux/random.h> #include <linux/if_macvlan.h> #include <linux/refcount.h> +#include <linux/jhash.h> +#include <linux/net_namespace.h> +#include <linux/mutex.h> +#include <linux/genalloc.h> +#include <linux/xarray.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> +#include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> +#include <net/nexthop.h> #include <net/fib_rules.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> @@ -46,45 +52,34 @@ struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; -struct mlxsw_sp_router { - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif **rifs; - struct mlxsw_sp_vr *vrs; - struct rhashtable neigh_ht; - struct rhashtable nexthop_group_ht; - struct rhashtable nexthop_ht; +struct mlxsw_sp_crif_key { + struct net_device *dev; +}; + +struct mlxsw_sp_crif { + struct mlxsw_sp_crif_key key; + struct rhash_head ht_node; + bool can_destroy; struct list_head nexthop_list; - struct { - /* One tree for each protocol: IPv4 and IPv6 */ - struct mlxsw_sp_lpm_tree *proto_trees[2]; - struct mlxsw_sp_lpm_tree *trees; - unsigned int tree_count; - } lpm; - struct { - struct delayed_work dw; - unsigned long interval; /* ms */ - } neighs_update; - struct delayed_work nexthop_probe_dw; -#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ - struct list_head nexthop_neighs_list; - struct list_head ipip_list; - bool aborted; - struct notifier_block fib_nb; - struct notifier_block netevent_nb; - struct notifier_block inetaddr_nb; - struct notifier_block inet6addr_nb; - const struct mlxsw_sp_rif_ops **rif_ops_arr; - const struct mlxsw_sp_ipip_ops **ipip_ops_arr; + struct mlxsw_sp_rif *rif; +}; + +static const struct rhashtable_params mlxsw_sp_crif_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_crif, key), + .key_len = sizeof_field(struct mlxsw_sp_crif, key), + .head_offset = offsetof(struct mlxsw_sp_crif, ht_node), }; struct mlxsw_sp_rif { - struct list_head nexthop_list; + struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ + netdevice_tracker dev_tracker; struct list_head neigh_list; - struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; u16 rif_index; + u8 mac_profile_id; + u8 rif_entries; u16 vr_id; const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp *mlxsw_sp; @@ -95,6 +90,13 @@ struct mlxsw_sp_rif { bool counter_egress_valid; }; +static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + if (!rif->crif) + return NULL; + return rif->crif->key.dev; +} + struct mlxsw_sp_rif_params { struct net_device *dev; union { @@ -103,6 +105,7 @@ struct mlxsw_sp_rif_params { }; u16 vid; bool lag; + bool double_entry; }; struct mlxsw_sp_rif_subport { @@ -119,8 +122,8 @@ struct mlxsw_sp_rif_subport { struct mlxsw_sp_rif_ipip_lb { struct mlxsw_sp_rif common; struct mlxsw_sp_rif_ipip_lb_config lb_config; - u16 ul_vr_id; /* Reserved for Spectrum-2. */ - u16 ul_rif_id; /* Reserved for Spectrum. */ + u16 ul_vr_id; /* Spectrum-1. */ + u16 ul_rif_id; /* Spectrum-2+. */ }; struct mlxsw_sp_rif_params_ipip_lb { @@ -134,13 +137,29 @@ struct mlxsw_sp_rif_ops { void (*setup)(struct mlxsw_sp_rif *rif, const struct mlxsw_sp_rif_params *params); - int (*configure)(struct mlxsw_sp_rif *rif); + int (*configure)(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack); void (*deconfigure)(struct mlxsw_sp_rif *rif); struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack); void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); }; +struct mlxsw_sp_rif_mac_profile { + unsigned char mac_prefix[ETH_ALEN]; + refcount_t ref_count; + u8 id; +}; + +struct mlxsw_sp_router_ops { + int (*init)(struct mlxsw_sp *mlxsw_sp); + int (*ipips_init)(struct mlxsw_sp *mlxsw_sp); +}; + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree); static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, @@ -237,6 +256,64 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, return 0; } +struct mlxsw_sp_rif_counter_set_basic { + u64 good_unicast_packets; + u64 good_multicast_packets; + u64 good_broadcast_packets; + u64 good_unicast_bytes; + u64 good_multicast_bytes; + u64 good_broadcast_bytes; + u64 error_packets; + u64 discard_packets; + u64 error_bytes; + u64 discard_bytes; +}; + +static int +mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + struct mlxsw_sp_rif_counter_set_basic *set) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + int err; + + if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + + if (!set) + return 0; + +#define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME) \ + (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl)) + + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets); + MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes); + MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes); + +#undef MLXSW_SP_RIF_COUNTER_EXTRACT + + return 0; +} + static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index) { @@ -247,16 +324,20 @@ static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); } -int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; unsigned int *p_counter_index; int err; + if (mlxsw_sp_rif_counter_valid_get(rif, dir)) + return 0; + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); if (!p_counter_index) return -EINVAL; + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, p_counter_index); if (err) @@ -280,10 +361,10 @@ err_counter_clear: return err; } -void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif, +void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; unsigned int *p_counter_index; if (!mlxsw_sp_rif_counter_valid_get(rif, dir)) @@ -308,14 +389,12 @@ static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif) if (!devlink_dpipe_table_counter_enabled(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) return; - mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - - mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); } #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) @@ -364,6 +443,8 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE, + MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE, /* This is a special case of local delivery, where a packet should be * decapsulated on reception. Note that there is no corresponding ENCAP, @@ -375,10 +456,12 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, }; +struct mlxsw_sp_nexthop_group_info; struct mlxsw_sp_nexthop_group; +struct mlxsw_sp_fib_entry; struct mlxsw_sp_fib_node { - struct list_head entry_list; + struct mlxsw_sp_fib_entry *fib_entry; struct list_head list; struct rhash_head ht_node; struct mlxsw_sp_fib *fib; @@ -391,7 +474,6 @@ struct mlxsw_sp_fib_entry_decap { }; struct mlxsw_sp_fib_entry { - struct list_head list; struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; @@ -401,9 +483,9 @@ struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib4_entry { struct mlxsw_sp_fib_entry common; + struct fib_info *fi; u32 tb_id; - u32 prio; - u8 tos; + dscp_t dscp; u8 type; }; @@ -420,7 +502,7 @@ struct mlxsw_sp_rt6 { struct mlxsw_sp_lpm_tree { u8 id; /* tree ID */ - unsigned int ref_count; + refcount_t ref_count; enum mlxsw_sp_l3proto proto; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; @@ -497,7 +579,7 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { lpm_tree = &mlxsw_sp->router->lpm.trees[i]; - if (lpm_tree->ref_count == 0) + if (refcount_read(&lpm_tree->ref_count) == 0) return lpm_tree; } return NULL; @@ -573,7 +655,7 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, sizeof(lpm_tree->prefix_usage)); memset(&lpm_tree->prefix_ref_count, 0, sizeof(lpm_tree->prefix_ref_count)); - lpm_tree->ref_count = 1; + refcount_set(&lpm_tree->ref_count, 1); return lpm_tree; err_left_struct_set: @@ -597,7 +679,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) { lpm_tree = &mlxsw_sp->router->lpm.trees[i]; - if (lpm_tree->ref_count != 0 && + if (refcount_read(&lpm_tree->ref_count) && lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, prefix_usage)) { @@ -610,14 +692,15 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) { - lpm_tree->ref_count++; + refcount_inc(&lpm_tree->ref_count); } static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *lpm_tree) { - if (--lpm_tree->ref_count == 0) - mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); + if (!refcount_dec_and_test(&lpm_tree->ref_count)) + return; + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); } #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ @@ -693,10 +776,11 @@ static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) return vr; @@ -737,12 +821,13 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id) static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) return vr; @@ -754,13 +839,18 @@ int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, u16 *vr_id) { struct mlxsw_sp_vr *vr; + int err = 0; + mutex_lock(&mlxsw_sp->router->lock); vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); - if (!vr) - return -ESRCH; + if (!vr) { + err = -ESRCH; + goto out; + } *vr_id = vr->id; - - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, @@ -899,6 +989,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, struct mlxsw_sp_lpm_tree *new_tree) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); enum mlxsw_sp_l3proto proto = fib->proto; struct mlxsw_sp_lpm_tree *old_tree; u8 old_id, new_id = new_tree->id; @@ -908,7 +999,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, old_tree = mlxsw_sp->router->lpm.proto_trees[proto]; old_id = old_tree->id; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) continue; @@ -976,23 +1067,75 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->vrs); } -static struct net_device * -__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) { - struct ip_tunnel *tun = netdev_priv(ol_dev); - struct net *net = dev_net(ol_dev); + struct net_device *d; + u32 tb_id; + + rcu_read_lock(); + d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + if (d) + tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + else + tb_id = RT_TABLE_MAIN; + rcu_read_unlock(); - return __dev_get_by_index(net, tun->parms.link); + return tb_id; } -u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +static void +mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) { - struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + crif->key.dev = dev; + INIT_LIST_HEAD(&crif->nexthop_list); +} - if (d) - return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; - else - return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN; +static struct mlxsw_sp_crif * +mlxsw_sp_crif_alloc(struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + crif = kzalloc(sizeof(*crif), GFP_KERNEL); + if (!crif) + return NULL; + + mlxsw_sp_crif_init(crif, dev); + return crif; +} + +static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) +{ + if (WARN_ON(crif->rif)) + return; + + WARN_ON(!list_empty(&crif->nexthop_list)); + kfree(crif); +} + +static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + rhashtable_remove_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router, + const struct net_device *dev) +{ + struct mlxsw_sp_crif_key key = { + .dev = (struct net_device *)dev, + }; + + return rhashtable_lookup_fast(&router->crif_ht, &key, + mlxsw_sp_crif_ht_params); } static struct mlxsw_sp_rif * @@ -1014,6 +1157,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { .common.dev = ol_dev, .common.lag = false, + .common.double_entry = ipip_ops->double_rif_entry, .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), }; @@ -1031,6 +1175,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_ipip_entry *ret = NULL; + int err; ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); @@ -1046,26 +1191,30 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, ipip_entry->ipipt = ipipt; ipip_entry->ol_dev = ol_dev; + ipip_entry->parms = ipip_ops->parms_init(ol_dev); - switch (ipip_ops->ul_proto) { - case MLXSW_SP_L3_PROTO_IPV4: - ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); - break; - case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON(1); - break; + err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry); + if (err) { + ret = ERR_PTR(err); + goto err_rem_ip_addr_set; } return ipip_entry; +err_rem_ip_addr_set: + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); err_ol_ipip_lb_create: kfree(ipip_entry); return ret; } -static void -mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) +static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { + const struct mlxsw_sp_ipip_ops *ipip_ops = + mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + + ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry); mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); kfree(ipip_entry); } @@ -1089,6 +1238,32 @@ mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_l3addr_eq(&tun_saddr, &saddr); } +static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + + /* Not all tunnels require to increase the default pasing depth + * (96 bytes). + */ + if (ipip_ops->inc_parsing_depth) + return mlxsw_sp_parsing_depth_inc(mlxsw_sp); + + return 0; +} + +static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops = + mlxsw_sp->router->ipip_ops_arr[ipipt]; + + if (ipip_ops->inc_parsing_depth) + mlxsw_sp_parsing_depth_dec(mlxsw_sp); +} + static int mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, @@ -1102,18 +1277,32 @@ mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp, + ipip_entry->ipipt); + if (err) + goto err_parsing_depth_inc; + ipip_entry->decap_fib_entry = fib_entry; fib_entry->decap.ipip_entry = ipip_entry; fib_entry->decap.tunnel_index = tunnel_index; + return 0; + +err_parsing_depth_inc: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + fib_entry->decap.tunnel_index); + return err; } static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { + enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt; + /* Unlink this node from the IPIP entry that it's the decap entry of. */ fib_entry->decap.ipip_entry->decap_fib_entry = NULL; fib_entry->decap.ipip_entry = NULL; + mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt); mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, fib_entry->decap.tunnel_index); } @@ -1156,7 +1345,6 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const union mlxsw_sp_l3addr *addr, enum mlxsw_sp_fib_entry_type type) { - struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_node *fib_node; unsigned char addr_prefix_len; struct mlxsw_sp_fib *fib; @@ -1177,7 +1365,11 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, addr_len = 4; addr_prefix_len = 32; break; - case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + case MLXSW_SP_L3_PROTO_IPV6: + addrp = &addr->addr6; + addr_len = 16; + addr_prefix_len = 128; + break; default: WARN_ON(1); return NULL; @@ -1185,15 +1377,10 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, addr_prefix_len); - if (!fib_node || list_empty(&fib_node->entry_list)) - return NULL; - - fib_entry = list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list); - if (fib_entry->type != type) + if (!fib_node || fib_node->fib_entry->type != type) return NULL; - return fib_entry; + return fib_node->fib_entry; } /* Given an IPIP entry, find the corresponding decap route. */ @@ -1203,7 +1390,6 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, { static struct mlxsw_sp_fib_node *fib_node; const struct mlxsw_sp_ipip_ops *ipip_ops; - struct mlxsw_sp_fib_entry *fib_entry; unsigned char saddr_prefix_len; union mlxsw_sp_l3addr saddr; struct mlxsw_sp_fib *ul_fib; @@ -1232,21 +1418,22 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, saddr_prefix_len = 32; break; case MLXSW_SP_L3_PROTO_IPV6: + saddrp = &saddr.addr6; + saddr_len = 16; + saddr_prefix_len = 128; + break; + default: WARN_ON(1); return NULL; } fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, saddr_prefix_len); - if (!fib_node || list_empty(&fib_node->entry_list)) + if (!fib_node || + fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) return NULL; - fib_entry = list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list); - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) - return NULL; - - return fib_entry; + return fib_node->fib_entry; } static struct mlxsw_sp_ipip_entry * @@ -1271,7 +1458,7 @@ mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { list_del(&ipip_entry->ipip_list_node); - mlxsw_sp_ipip_entry_dealloc(ipip_entry); + mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry); } static bool @@ -1293,21 +1480,33 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, /* Given decap parameters, find the corresponding IPIP entry. */ static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ul_dev, +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex, enum mlxsw_sp_l3proto ul_proto, union mlxsw_sp_l3addr ul_dip) { - struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + struct net_device *ul_dev; + + rcu_read_lock(); + + ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex); + if (!ul_dev) + goto out_unlock; list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, ul_proto, ul_dip, ipip_entry)) - return ipip_entry; + goto out_unlock; + + rcu_read_unlock(); return NULL; + +out_unlock: + rcu_read_unlock(); + return ipip_entry; } static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, @@ -1329,8 +1528,8 @@ static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, return false; } -bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) +static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) { return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); } @@ -1360,8 +1559,12 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, ipip_list_node); list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - struct net_device *ipip_ul_dev = - __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + + rcu_read_lock(); + ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); if (ipip_ul_dev == ul_dev) return ipip_entry; @@ -1370,8 +1573,8 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) +static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) { return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); } @@ -1383,19 +1586,15 @@ static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_ops *ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; - /* For deciding whether decap should be offloaded, we don't care about - * overlay protocol, so ask whether either one is supported. - */ - return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) || - ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6); + return ops->can_offload(mlxsw_sp, ol_dev); } static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev) { + enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_l3proto ul_proto; - enum mlxsw_sp_ipip_type ipipt; union mlxsw_sp_l3addr saddr; u32 ul_tb_id; @@ -1444,23 +1643,35 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, u16 ul_rif_id, bool enable) { struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); + enum mlxsw_reg_ritr_loopback_ipip_options ipip_options; struct mlxsw_sp_rif *rif = &lb_rif->common; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; + struct in6_addr *saddr6; u32 saddr4; + ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET; switch (lb_cf.ul_protocol) { case MLXSW_SP_L3_PROTO_IPV4: saddr4 = be32_to_cpu(lb_cf.saddr.addr4); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); + rif->rif_index, rif->vr_id, dev->mtu); mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, - MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, - ul_vr_id, ul_rif_id, saddr4, lb_cf.okey); + ipip_options, ul_vr_id, + ul_rif_id, saddr4, + lb_cf.okey); break; case MLXSW_SP_L3_PROTO_IPV6: - return -EAFNOSUPPORT; + saddr6 = &lb_cf.saddr.addr6; + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + rif->rif_index, rif->vr_id, dev->mtu); + mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt, + ipip_options, ul_vr_id, + ul_rif_id, saddr6, + lb_cf.okey); + break; } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); @@ -1515,9 +1726,29 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + +static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif); + struct mlxsw_sp_rif *new_rif, + bool migrate_nhs) +{ + struct mlxsw_sp_crif *crif = old_rif->crif; + struct mlxsw_sp_crif mock_crif = {}; + + if (migrate_nhs) + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); + + /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload + * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link. + */ + mlxsw_sp_crif_init(&mock_crif, crif->key.dev); + old_rif->crif = &mock_crif; + mock_crif.rif = old_rif; + mlxsw_sp_rif_destroy(old_rif); +} + static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1535,26 +1766,23 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; - if (keep_encap) - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, - &new_lb_rif->common); - - mlxsw_sp_rif_destroy(&old_lb_rif->common); - + mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common, keep_encap); return 0; } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); - /** - * Update the offload related to an IPIP entry. This always updates decap, and - * in addition to that it also: - * @recreate_loopback: recreates the associated loopback RIF - * @keep_encap: updates next hops that use the tunnel netdevice. This is only + * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. + * @mlxsw_sp: mlxsw_sp. + * @ipip_entry: IPIP entry. + * @recreate_loopback: Recreates the associated loopback RIF. + * @keep_encap: Updates next hops that use the tunnel netdevice. This is only * relevant when recreate_loopback is true. - * @update_nexthops: updates next hops, keeping the current loopback RIF. This + * @update_nexthops: Updates next hops, keeping the current loopback RIF. This * is only relevant when recreate_loopback is false. + * @extack: extack. + * + * Return: Non-zero value on failure. */ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1596,27 +1824,10 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); - enum mlxsw_sp_l3proto ul_proto; - union mlxsw_sp_l3addr saddr; - u32 ul_tb_id; if (!ipip_entry) return 0; - /* For flat configuration cases, moving overlay to a different VRF might - * cause local address conflict, and the conflicting tunnels need to be - * demoted. - */ - ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); - ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; - saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); - if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, - saddr, ul_tb_id, - ipip_entry)) { - mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); - return 0; - } - return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, false, false, extack); } @@ -1625,8 +1836,25 @@ static int mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, struct net_device *ul_dev, + bool *demote_this, struct netlink_ext_ack *extack) { + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + + /* Moving underlay to a different VRF might cause local address + * conflict, and the conflicting tunnels need to be demoted. + */ + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + ipip_entry)) { + *demote_this = true; + return 0; + } + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, true, true, false, extack); } @@ -1695,7 +1923,7 @@ void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, /* The configuration where several tunnels have the same local address in the * same underlay table needs special treatment in the HW. That is currently not * implemented in the driver. This function finds and demotes the first tunnel - * with a given source address, except the one passed in in the argument + * with a given source address, except the one passed in the argument * `except'. */ bool @@ -1727,56 +1955,64 @@ static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, ipip_list_node) { - struct net_device *ipip_ul_dev = - __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + struct net_device *ol_dev = ipip_entry->ol_dev; + struct net_device *ipip_ul_dev; + rcu_read_lock(); + ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + rcu_read_unlock(); if (ipip_ul_dev == ul_dev) mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); } } -int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev, - unsigned long event, - struct netdev_notifier_info *info) +static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_info *info) { struct netdev_notifier_changeupper_info *chup; struct netlink_ext_ack *extack; + int err = 0; switch (event) { case NETDEV_REGISTER: - return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + break; case NETDEV_UNREGISTER: mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_UP: mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_DOWN: mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); - return 0; + break; case NETDEV_CHANGEUPPER: chup = container_of(info, typeof(*chup), info); extack = info->extack; if (netif_is_l3_master(chup->upper_dev)) - return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, - ol_dev, - extack); - return 0; + err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, + ol_dev, + extack); + break; case NETDEV_CHANGE: extack = info->extack; - return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, - ol_dev, extack); + err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, + ol_dev, extack); + break; case NETDEV_CHANGEMTU: - return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); + break; } - return 0; + return err; } static int __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, struct net_device *ul_dev, + bool *demote_this, unsigned long event, struct netdev_notifier_info *info) { @@ -1791,6 +2027,7 @@ __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp, ipip_entry, ul_dev, + demote_this, extack); break; @@ -1805,7 +2042,7 @@ __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return 0; } -int +static int mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ul_dev, unsigned long event, @@ -1817,13 +2054,31 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, ul_dev, ipip_entry))) { + struct mlxsw_sp_ipip_entry *prev; + bool demote_this = false; + err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry, - ul_dev, event, info); + ul_dev, &demote_this, + event, info); if (err) { mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, ul_dev); return err; } + + if (demote_this) { + if (list_is_first(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list)) + prev = NULL; + else + /* This can't be cached from previous iteration, + * because that entry could be gone now. + */ + prev = list_prev_entry(ipip_entry, + ipip_list_node); + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + ipip_entry = prev; + } } return 0; @@ -1835,8 +2090,22 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, u32 tunnel_index) { enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_fib_entry *fib_entry; - int err; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(router->nve_decap_config.valid)) { + err = -EINVAL; + goto out; + } + + router->nve_decap_config.ul_tb_id = ul_tb_id; + router->nve_decap_config.tunnel_index = tunnel_index; + router->nve_decap_config.ul_proto = ul_proto; + router->nve_decap_config.ul_sip = *ul_sip; + router->nve_decap_config.valid = true; /* It is valid to create a tunnel with a local IP and only later * assign this IP address to a local interface @@ -1845,7 +2114,7 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, ul_proto, ul_sip, type); if (!fib_entry) - return 0; + goto out; fib_entry->decap.tunnel_index = tunnel_index; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; @@ -1854,11 +2123,13 @@ int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, if (err) goto err_fib_entry_update; - return 0; + goto out; err_fib_entry_update: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -1867,16 +2138,40 @@ void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, const union mlxsw_sp_l3addr *ul_sip) { enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_fib_entry *fib_entry; + mutex_lock(&mlxsw_sp->router->lock); + + if (WARN_ON_ONCE(!router->nve_decap_config.valid)) + goto out; + + router->nve_decap_config.valid = false; + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, ul_proto, ul_sip, type); if (!fib_entry) - return; + goto out; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp, + u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + + return router->nve_decap_config.valid && + router->nve_decap_config.ul_tb_id == ul_tb_id && + router->nve_decap_config.ul_proto == ul_proto && + !memcmp(&router->nve_decap_config.ul_sip, ul_sip, + sizeof(*ul_sip)); } struct mlxsw_sp_neigh_key { @@ -1957,7 +2252,7 @@ int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, return -EINVAL; return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index, - p_counter, NULL); + false, p_counter, NULL); } static struct mlxsw_sp_neigh_entry * @@ -2067,6 +2362,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) goto err_neigh_entry_insert; mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); + atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count); list_add(&neigh_entry->rif_list_node, &rif->neigh_list); return neigh_entry; @@ -2081,6 +2377,7 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { list_del(&neigh_entry->rif_list_node); + atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count); mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_free(neigh_entry); @@ -2115,6 +2412,7 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int ent_index) { + u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); struct net_device *dev; struct neighbour *n; __be32 dipn; @@ -2123,13 +2421,15 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + if (WARN_ON_ONCE(rif >= max_rifs)) + return; if (!mlxsw_sp->router->rifs[rif]) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); return; } dipn = htonl(dip); - dev = mlxsw_sp->router->rifs[rif]->dev; + dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]); n = neigh_lookup(&arp_tbl, &dipn, dev); if (!n) return; @@ -2157,7 +2457,7 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, return; } - dev = mlxsw_sp->router->rifs[rif]->dev; + dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]); n = neigh_lookup(&nd_tbl, &dip, dev); if (!n) return; @@ -2249,10 +2549,8 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, int i, num_rec; int err; - /* Make sure the neighbour's netdev isn't removed in the - * process. - */ - rtnl_lock(); + /* Ensure the RIF we read from the device does not change mid-dump. */ + mutex_lock(&mlxsw_sp->router->lock); do { mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), @@ -2266,7 +2564,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, i); } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -2277,6 +2575,9 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) char *rauhtd_pl; int err; + if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count)) + return 0; + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); if (!rauhtd_pl) return -ENOMEM; @@ -2297,15 +2598,14 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_neigh_entry *neigh_entry; - /* Take RTNL mutex here to prevent lists from changes */ - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list, nexthop_neighs_list_node) /* If this neigh have nexthops, make the kernel think this neigh * is active regardless of the traffic. */ neigh_event_send(neigh_entry->key.n, NULL); - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); } static void @@ -2345,15 +2645,13 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) * the nexthop wouldn't get offloaded until the neighbor is resolved * but it wouldn't get resolved ever in case traffic is flowing in HW * using different nexthop. - * - * Take RTNL mutex here to prevent lists from changes. */ - rtnl_lock(); + mutex_lock(&router->lock); list_for_each_entry(neigh_entry, &router->nexthop_neighs_list, nexthop_neighs_list_node) if (!neigh_entry->connected) neigh_event_send(neigh_entry->key.n, NULL); - rtnl_unlock(); + mutex_unlock(&router->lock); mlxsw_core_schedule_dw(&router->nexthop_probe_dw, MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); @@ -2362,7 +2660,7 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing); + bool removing, bool dead); static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) { @@ -2370,7 +2668,7 @@ static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) MLXSW_REG_RAUHT_OP_WRITE_DELETE; } -static void +static int mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, enum mlxsw_reg_rauht_op op) @@ -2384,10 +2682,10 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, if (neigh_entry->counter_valid) mlxsw_reg_rauht_pack_counter(rauht_pl, neigh_entry->counter_index); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } -static void +static int mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, enum mlxsw_reg_rauht_op op) @@ -2401,7 +2699,7 @@ mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, if (neigh_entry->counter_valid) mlxsw_reg_rauht_pack_counter(rauht_pl, neigh_entry->counter_index); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) @@ -2423,20 +2721,33 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding) { + enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding); + int err; + if (!adding && !neigh_entry->connected) return; neigh_entry->connected = adding; if (neigh_entry->key.n->tbl->family == AF_INET) { - mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, - mlxsw_sp_rauht_op(adding)); + err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, + op); + if (err) + return; } else if (neigh_entry->key.n->tbl->family == AF_INET6) { if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) return; - mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, - mlxsw_sp_rauht_op(adding)); + err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + op); + if (err) + return; } else { WARN_ON_ONCE(1); + return; } + + if (adding) + neigh_entry->key.n->flags |= NTF_OFFLOADED; + else + neigh_entry->key.n->flags &= ~NTF_OFFLOADED; } void @@ -2478,7 +2789,7 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) dead = n->dead; read_unlock_bh(&n->lock); - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); entry_connected = nud_state & NUD_VALID && !dead; @@ -2491,15 +2802,20 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) goto out; } + if (neigh_entry->connected && entry_connected && + !memcmp(neigh_entry->ha, ha, ETH_ALEN)) + goto out; + memcpy(neigh_entry->ha, ha, ETH_ALEN); mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, + dead); if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); out: - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); neigh_release(n); kfree(net_work); } @@ -2529,36 +2845,61 @@ static void mlxsw_sp_router_update_priority_work(struct work_struct *work) } static int mlxsw_sp_router_schedule_work(struct net *net, - struct notifier_block *nb, + struct mlxsw_sp_router *router, + struct neighbour *n, void (*cb)(struct work_struct *)) { struct mlxsw_sp_netevent_work *net_work; - struct mlxsw_sp_router *router; - if (!net_eq(net, &init_net)) + if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) return NOTIFY_DONE; net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); if (!net_work) return NOTIFY_BAD; - router = container_of(nb, struct mlxsw_sp_router, netevent_nb); INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; + net_work->n = n; mlxsw_core_schedule_work(&net_work->work); return NOTIFY_DONE; } +static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + rcu_read_unlock(); + return !!mlxsw_sp_port; +} + +static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router, + struct neighbour *n) +{ + struct net *net; + + net = neigh_parms_net(n->parms); + + /* Take a reference to ensure the neighbour won't be destructed until we + * drop the reference in delayed work. + */ + neigh_clone(n); + return mlxsw_sp_router_schedule_work(net, router, n, + mlxsw_sp_router_neigh_event_work); +} + static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct mlxsw_sp_netevent_work *net_work; - struct mlxsw_sp_port *mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_router *router; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: p = ptr; @@ -2571,15 +2912,11 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, /* We are in atomic context and can't take RTNL mutex, * so use RCU variant to walk the device chain. */ - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); - if (!mlxsw_sp_port) + if (!mlxsw_sp_dev_lower_is_port(p->dev)) return NOTIFY_DONE; - mlxsw_sp = mlxsw_sp_port->mlxsw_sp; interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); - mlxsw_sp->router->neighs_update.interval = interval; - - mlxsw_sp_port_dev_put(mlxsw_sp_port); + router->neighs_update.interval = interval; break; case NETEVENT_NEIGH_UPDATE: n = ptr; @@ -2587,35 +2924,18 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); - if (!mlxsw_sp_port) + if (!mlxsw_sp_dev_lower_is_port(n->dev)) return NOTIFY_DONE; - net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); - if (!net_work) { - mlxsw_sp_port_dev_put(mlxsw_sp_port); - return NOTIFY_BAD; - } - - INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); - net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - net_work->n = n; + return mlxsw_sp_router_schedule_neigh_work(router, n); - /* Take a reference to ensure the neighbour won't be - * destructed until we drop the reference in delayed - * work. - */ - neigh_clone(n); - mlxsw_core_schedule_work(&net_work->work); - mlxsw_sp_port_dev_put(mlxsw_sp_port); - break; case NETEVENT_IPV4_MPATH_HASH_UPDATE: case NETEVENT_IPV6_MPATH_HASH_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, nb, + return mlxsw_sp_router_schedule_work(ptr, router, NULL, mlxsw_sp_router_mp_hash_event_work); case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, nb, + return mlxsw_sp_router_schedule_work(ptr, router, NULL, mlxsw_sp_router_update_priority_work); } @@ -2641,6 +2961,7 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_router_neighs_update_work); INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw, mlxsw_sp_router_probe_unresolved_nexthops); + atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0); mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0); mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0); return 0; @@ -2665,96 +2986,318 @@ static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } } +struct mlxsw_sp_neigh_rif_made_sync { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err; +}; + +static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data) +{ + struct mlxsw_sp_neigh_rif_made_sync *rms = data; + int rc; + + if (rms->err) + return; + if (n->dev != mlxsw_sp_rif_dev(rms->rif)) + return; + rc = mlxsw_sp_router_schedule_neigh_work(rms->mlxsw_sp->router, n); + if (rc != NOTIFY_DONE) + rms->err = -ENOMEM; +} + +static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_neigh_rif_made_sync rms = { + .mlxsw_sp = mlxsw_sp, + .rif = rif, + }; + + if (!mlxsw_sp_dev_lower_is_port(mlxsw_sp_rif_dev(rif))) + return 0; + + neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); + if (rms.err) + goto err_arp; + +#if IS_ENABLED(CONFIG_IPV6) + neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); +#endif + if (rms.err) + goto err_nd; + + return 0; + +err_nd: +err_arp: + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); + return rms.err; +} + enum mlxsw_sp_nexthop_type { MLXSW_SP_NEXTHOP_TYPE_ETH, MLXSW_SP_NEXTHOP_TYPE_IPIP, }; +enum mlxsw_sp_nexthop_action { + /* Nexthop forwards packets to an egress RIF */ + MLXSW_SP_NEXTHOP_ACTION_FORWARD, + /* Nexthop discards packets */ + MLXSW_SP_NEXTHOP_ACTION_DISCARD, + /* Nexthop traps packets */ + MLXSW_SP_NEXTHOP_ACTION_TRAP, +}; + struct mlxsw_sp_nexthop_key { struct fib_nh *fib_nh; }; +struct mlxsw_sp_nexthop_counter; + struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ - struct list_head rif_list_node; + struct list_head crif_list_node; struct list_head router_list_node; - struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group - * this belongs to - */ + struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group + * this nexthop belongs to + */ struct rhash_head ht_node; + struct neigh_table *neigh_tbl; struct mlxsw_sp_nexthop_key key; unsigned char gw_addr[sizeof(struct in6_addr)]; int ifindex; int nh_weight; int norm_nh_weight; int num_adj_entries; - struct mlxsw_sp_rif *rif; - u8 should_offload:1, /* set indicates this neigh is connected and - * should be put to KVD linear area of this group. + struct mlxsw_sp_crif *crif; + u8 should_offload:1, /* set indicates this nexthop should be written + * to the adjacency table. */ - offloaded:1, /* set in case the neigh is actually put into - * KVD linear area of this group. + offloaded:1, /* set indicates this nexthop was written to the + * adjacency table. */ - update:1; /* set indicates that MAC of this neigh should be - * updated in HW + update:1; /* set indicates this nexthop should be updated in the + * adjacency table (f.e., its MAC changed). */ + enum mlxsw_sp_nexthop_action action; enum mlxsw_sp_nexthop_type type; union { struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_ipip_entry *ipip_entry; }; - unsigned int counter_index; - bool counter_valid; + struct mlxsw_sp_nexthop_counter *counter; + u32 id; /* NH ID for members of a NH object group. */ }; -struct mlxsw_sp_nexthop_group { - void *priv; - struct rhash_head ht_node; - struct list_head fib_list; /* list of fib entries that use this group */ - struct neigh_table *neigh_tbl; - u8 adj_index_valid:1, - gateway:1; /* routes using the group use a gateway */ +static struct net_device * +mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) +{ + if (!nh->crif) + return NULL; + return nh->crif->key.dev; +} + +enum mlxsw_sp_nexthop_group_type { + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, + MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ, +}; + +struct mlxsw_sp_nexthop_group_info { + struct mlxsw_sp_nexthop_group *nh_grp; u32 adj_index; u16 ecmp_size; u16 count; int sum_norm_weight; - struct mlxsw_sp_nexthop nexthops[0]; -#define nh_rif nexthops[0].rif + u8 adj_index_valid:1, + gateway:1, /* routes using the group use a gateway */ + is_resilient:1, + hw_stats:1; + struct list_head list; /* member in nh_res_grp_list */ + struct xarray nexthop_counters; + struct mlxsw_sp_nexthop nexthops[] __counted_by(count); +}; + +static struct mlxsw_sp_rif * +mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) +{ + struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif; + + if (!crif) + return NULL; + return crif->rif; +} + +struct mlxsw_sp_nexthop_group_vr_key { + u16 vr_id; + enum mlxsw_sp_l3proto proto; }; -void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, +struct mlxsw_sp_nexthop_group_vr_entry { + struct list_head list; /* member in vr_list */ + struct rhash_head ht_node; /* member in vr_ht */ + refcount_t ref_count; + struct mlxsw_sp_nexthop_group_vr_key key; +}; + +struct mlxsw_sp_nexthop_group { + struct rhash_head ht_node; + struct list_head fib_list; /* list of fib entries that use this group */ + union { + struct { + struct fib_info *fi; + } ipv4; + struct { + u32 id; + } obj; + }; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct list_head vr_list; + struct rhashtable vr_ht; + enum mlxsw_sp_nexthop_group_type type; + bool can_destroy; +}; + +struct mlxsw_sp_nexthop_counter { + unsigned int counter_index; + refcount_t ref_count; +}; + +static struct mlxsw_sp_nexthop_counter * +mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nexthop_counter *nhct; + int err; + + nhct = kzalloc(sizeof(*nhct), GFP_KERNEL); + if (!nhct) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nhct->counter_index); + if (err) + goto err_counter_alloc; + + refcount_set(&nhct->ref_count, 1); + return nhct; + +err_counter_alloc: + kfree(nhct); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_counter *nhct) +{ + mlxsw_sp_flow_counter_free(mlxsw_sp, nhct->counter_index); + kfree(nhct); +} + +static struct mlxsw_sp_nexthop_counter * +mlxsw_sp_nexthop_sh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp; + struct mlxsw_sp_nexthop_counter *nhct; + int err; + + nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id); + if (nhct) { + refcount_inc(&nhct->ref_count); + return nhct; + } + + nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp); + if (IS_ERR(nhct)) + return nhct; + + err = xa_err(xa_store(&nh_grp->nhgi->nexthop_counters, nh->id, nhct, + GFP_KERNEL)); + if (err) + goto err_store; + + return nhct; + +err_store: + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct); + return ERR_PTR(err); +} + +static void mlxsw_sp_nexthop_sh_counter_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp; + struct mlxsw_sp_nexthop_counter *nhct; + + nhct = xa_load(&nh_grp->nhgi->nexthop_counters, nh->id); + if (WARN_ON(!nhct)) + return; + + if (!refcount_dec_and_test(&nhct->ref_count)) + return; + + xa_erase(&nh_grp->nhgi->nexthop_counters, nh->id); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nhct); +} + +int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { + const char *table_adj = MLXSW_SP_DPIPE_TABLE_NAME_ADJ; + struct mlxsw_sp_nexthop_counter *nhct; struct devlink *devlink; + bool dpipe_stats; + + if (nh->counter) + return 0; devlink = priv_to_devlink(mlxsw_sp->core); - if (!devlink_dpipe_table_counter_enabled(devlink, - MLXSW_SP_DPIPE_TABLE_NAME_ADJ)) - return; + dpipe_stats = devlink_dpipe_table_counter_enabled(devlink, table_adj); + if (!(nh->nhgi->hw_stats || dpipe_stats)) + return 0; - if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index)) - return; + if (nh->id) + nhct = mlxsw_sp_nexthop_sh_counter_get(mlxsw_sp, nh); + else + nhct = mlxsw_sp_nexthop_counter_alloc(mlxsw_sp); + if (IS_ERR(nhct)) + return PTR_ERR(nhct); - nh->counter_valid = true; + nh->counter = nhct; + return 0; } -void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - if (!nh->counter_valid) + if (!nh->counter) return; - mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index); - nh->counter_valid = false; + + if (nh->id) + mlxsw_sp_nexthop_sh_counter_put(mlxsw_sp, nh); + else + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh->counter); + nh->counter = NULL; +} + +static int mlxsw_sp_nexthop_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (nh->nhgi->hw_stats) + return mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh); + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); + return 0; } int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, u64 *p_counter) { - if (!nh->counter_valid) + if (!nh->counter) return -EINVAL; - return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index, - p_counter, NULL); + return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter->counter_index, + true, p_counter, NULL); } struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, @@ -2772,14 +3315,15 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, return list_next_entry(nh, router_list_node); } -bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh) +bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh) { - return nh->offloaded; + return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD; } unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) { - if (!nh->offloaded) + if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH || + !mlxsw_sp_nexthop_is_forward(nh)) return NULL; return nh->neigh_entry->ha; } @@ -2787,18 +3331,18 @@ unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, u32 *p_adj_size, u32 *p_adj_hash_index) { - struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; u32 adj_hash_index = 0; int i; - if (!nh->offloaded || !nh_grp->adj_index_valid) + if (!nh->offloaded || !nhgi->adj_index_valid) return -EINVAL; - *p_adj_index = nh_grp->adj_index; - *p_adj_size = nh_grp->ecmp_size; + *p_adj_index = nhgi->adj_index; + *p_adj_size = nhgi->ecmp_size; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; if (nh_iter == nh) break; @@ -2812,16 +3356,18 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) { - return nh->rif; + if (WARN_ON(!nh->crif)) + return NULL; + return nh->crif->rif; } bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) { - struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) return true; @@ -2829,17 +3375,102 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) return false; } -static struct fib_info * -mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) +static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key), + .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key), + .automatic_shrinking = true, +}; + +static struct mlxsw_sp_nexthop_group_vr_entry * +mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) { - return nh_grp->priv; + struct mlxsw_sp_nexthop_group_vr_key key; + + memset(&key, 0, sizeof(key)); + key.vr_id = fib->vr->id; + key.proto = fib->proto; + return rhashtable_lookup_fast(&nh_grp->vr_ht, &key, + mlxsw_sp_nexthop_group_vr_ht_params); +} + +static int +mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + int err; + + vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL); + if (!vr_entry) + return -ENOMEM; + + vr_entry->key.vr_id = fib->vr->id; + vr_entry->key.proto = fib->proto; + refcount_set(&vr_entry->ref_count, 1); + + err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node, + mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_hashtable_insert; + + list_add(&vr_entry->list, &nh_grp->vr_list); + + return 0; + +err_hashtable_insert: + kfree(vr_entry); + return err; +} + +static void +mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry) +{ + list_del(&vr_entry->list); + rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node, + mlxsw_sp_nexthop_group_vr_ht_params); + kfree(vr_entry); +} + +static int +mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + + vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib); + if (vr_entry) { + refcount_inc(&vr_entry->ref_count); + return 0; + } + + return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib); +} + +static void +mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; + + vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib); + if (WARN_ON_ONCE(!vr_entry)) + return; + + if (!refcount_dec_and_test(&vr_entry->ref_count)) + return; + + mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry); } struct mlxsw_sp_nexthop_group_cmp_arg { - enum mlxsw_sp_l3proto proto; + enum mlxsw_sp_nexthop_group_type type; union { struct fib_info *fi; struct mlxsw_sp_fib6_entry *fib6_entry; + u32 id; }; }; @@ -2850,10 +3481,10 @@ mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, { int i; - for (i = 0; i < nh_grp->count; i++) { + for (i = 0; i < nh_grp->nhgi->count; i++) { const struct mlxsw_sp_nexthop *nh; - nh = &nh_grp->nexthops[i]; + nh = &nh_grp->nhgi->nexthops[i]; if (nh->ifindex == ifindex && nh->nh_weight == weight && ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) return true; @@ -2868,16 +3499,17 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - if (nh_grp->count != fib6_entry->nrt6) + if (nh_grp->nhgi->count != fib6_entry->nrt6) return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; struct in6_addr *gw; int ifindex, weight; - ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex; - weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight; - gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw; + ifindex = fib6_nh->fib_nh_dev->ifindex; + weight = fib6_nh->fib_nh_weight; + gw = &fib6_nh->fib_nh_gw6; if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, weight)) return false; @@ -2892,24 +3524,23 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; const struct mlxsw_sp_nexthop_group *nh_grp = ptr; - switch (cmp_arg->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp); - case MLXSW_SP_L3_PROTO_IPV6: + if (nh_grp->type != cmp_arg->type) + return 1; + + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + return cmp_arg->fi != nh_grp->ipv4.fi; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return !mlxsw_sp_nexthop6_group_cmp(nh_grp, cmp_arg->fib6_entry); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return cmp_arg->id != nh_grp->obj.id; default: WARN_ON(1); return 1; } } -static int -mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp) -{ - return nh_grp->neigh_tbl->family; -} - static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) { const struct mlxsw_sp_nexthop_group *nh_grp = data; @@ -2918,17 +3549,20 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) unsigned int val; int i; - switch (mlxsw_sp_nexthop_group_type(nh_grp)) { - case AF_INET: - fi = mlxsw_sp_nexthop4_group_fi(nh_grp); + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + fi = nh_grp->ipv4.fi; return jhash(&fi, sizeof(fi), seed); - case AF_INET6: - val = nh_grp->count; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - val ^= nh->ifindex; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + val = nh_grp->nhgi->count; + for (i = 0; i < nh_grp->nhgi->count; i++) { + nh = &nh_grp->nhgi->nexthops[i]; + val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); + val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed); } return jhash(&val, sizeof(val), seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed); default: WARN_ON(1); return 0; @@ -2940,11 +3574,14 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) { unsigned int val = fib6_entry->nrt6; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev; - val ^= dev->ifindex; + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct net_device *dev = fib6_nh->fib_nh_dev; + struct in6_addr *gw = &fib6_nh->fib_nh_gw6; + + val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed); + val ^= jhash(gw, sizeof(*gw), seed); } return jhash(&val, sizeof(val), seed); @@ -2955,11 +3592,13 @@ mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) { const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; - switch (cmp_arg->proto) { - case MLXSW_SP_L3_PROTO_IPV4: + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); - case MLXSW_SP_L3_PROTO_IPV6: + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed); default: WARN_ON(1); return 0; @@ -2976,8 +3615,8 @@ static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && - !nh_grp->gateway) + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) return 0; return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, @@ -2988,8 +3627,8 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && - !nh_grp->gateway) + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) return; rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, @@ -3003,7 +3642,7 @@ mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; - cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4; + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; cmp_arg.fi = fi; return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &cmp_arg, @@ -3016,7 +3655,7 @@ mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; - cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6; + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; cmp_arg.fib6_entry = fib6_entry; return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &cmp_arg, @@ -3052,7 +3691,8 @@ mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib *fib, + enum mlxsw_sp_l3proto proto, + u16 vr_id, u32 adj_index, u16 ecmp_size, u32 new_adj_index, u16 new_ecmp_size) @@ -3060,8 +3700,8 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, char raleu_pl[MLXSW_REG_RALEU_LEN]; mlxsw_reg_raleu_pack(raleu_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, - fib->vr->id, adj_index, ecmp_size, new_adj_index, + (enum mlxsw_reg_ralxx_protocol) proto, vr_id, + adj_index, ecmp_size, new_adj_index, new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -3070,52 +3710,86 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, u32 old_adj_index, u16 old_ecmp_size) { - struct mlxsw_sp_fib_entry *fib_entry; - struct mlxsw_sp_fib *fib = NULL; + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + struct mlxsw_sp_nexthop_group_vr_entry *vr_entry; int err; - list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (fib == fib_entry->fib_node->fib) - continue; - fib = fib_entry->fib_node->fib; - err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib, + list_for_each_entry(vr_entry, &nh_grp->vr_list, list) { + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, + vr_entry->key.proto, + vr_entry->key.vr_id, old_adj_index, old_ecmp_size, - nh_grp->adj_index, - nh_grp->ecmp_size); + nhgi->adj_index, + nhgi->ecmp_size); if (err) - return err; + goto err_mass_update_vr; } return 0; + +err_mass_update_vr: + list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list) + mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto, + vr_entry->key.vr_id, + nhgi->adj_index, + nhgi->ecmp_size, + old_adj_index, old_ecmp_size); + return err; } -static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh, + bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; - char ratr_pl[MLXSW_REG_RATR_LEN]; + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); + enum mlxsw_reg_ratr_op op; + u16 rif_index; - mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_ETHERNET, - adj_index, neigh_entry->rif); - mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); - if (nh->counter_valid) - mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); + rif_index = rif ? rif->rif_index : + mlxsw_sp->router->lb_crif->rif->rif_index; + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, + adj_index, rif_index); + switch (nh->action) { + case MLXSW_SP_NEXTHOP_ACTION_FORWARD: + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + break; + case MLXSW_SP_NEXTHOP_ACTION_DISCARD: + mlxsw_reg_ratr_trap_action_set(ratr_pl, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS); + break; + case MLXSW_SP_NEXTHOP_ACTION_TRAP: + mlxsw_reg_ratr_trap_action_set(ratr_pl, + MLXSW_REG_RATR_TRAP_ACTION_TRAP); + mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + if (nh->counter) + mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter->counter_index, + true); else mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } -int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { int i; for (i = 0; i < nh->num_adj_entries; i++) { int err; - err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh); + err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i, + nh, force, ratr_pl); if (err) return err; } @@ -3125,17 +3799,20 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, + bool force, char *ratr_pl) { const struct mlxsw_sp_ipip_ops *ipip_ops; ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; - return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry, + force, ratr_pl); } static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { int i; @@ -3143,7 +3820,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, int err; err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, - nh); + nh, force, ratr_pl); if (err) return err; } @@ -3151,35 +3828,51 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) +{ + /* When action is discard or trap, the nexthop must be + * programmed as an Ethernet nexthop. + */ + if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH || + nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD || + nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh, + force, ratr_pl); + else + return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh, + force, ratr_pl); +} + static int mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group_info *nhgi, bool reallocate) { - u32 adj_index = nh_grp->adj_index; /* base */ + char ratr_pl[MLXSW_REG_RATR_LEN]; + u32 adj_index = nhgi->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; - int err; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) { + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); nh->offloaded = 0; continue; } if (nh->update || reallocate) { - switch (nh->type) { - case MLXSW_SP_NEXTHOP_TYPE_ETH: - err = mlxsw_sp_nexthop_update - (mlxsw_sp, adj_index, nh); - break; - case MLXSW_SP_NEXTHOP_TYPE_IPIP: - err = mlxsw_sp_nexthop_ipip_update - (mlxsw_sp, adj_index, nh); - break; - } + int err = 0; + + err = mlxsw_sp_nexthop_counter_update(mlxsw_sp, nh); + if (err) + return err; + + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, + true, ratr_pl); if (err) return err; nh->update = 0; @@ -3190,10 +3883,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static bool -mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry *fib_entry); - static int mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) @@ -3202,9 +3891,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, - fib_entry)) - continue; err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) return err; @@ -3212,52 +3898,69 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void -mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op, int err); +struct mlxsw_sp_adj_grp_size_range { + u16 start; /* Inclusive */ + u16 end; /* Inclusive */ +}; -static void -mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) -{ - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE; - struct mlxsw_sp_fib_entry *fib_entry; +/* Ordered by range start value */ +static const struct mlxsw_sp_adj_grp_size_range +mlxsw_sp1_adj_grp_size_ranges[] = { + { .start = 1, .end = 64 }, + { .start = 512, .end = 512 }, + { .start = 1024, .end = 1024 }, + { .start = 2048, .end = 2048 }, + { .start = 4096, .end = 4096 }, +}; - list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, - fib_entry)) - continue; - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); - } -} +/* Ordered by range start value */ +static const struct mlxsw_sp_adj_grp_size_range +mlxsw_sp2_adj_grp_size_ranges[] = { + { .start = 1, .end = 128 }, + { .start = 256, .end = 256 }, + { .start = 512, .end = 512 }, + { .start = 1024, .end = 1024 }, + { .start = 2048, .end = 2048 }, + { .start = 4096, .end = 4096 }, +}; -static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size) +static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size) { - /* Valid sizes for an adjacency group are: - * 1-64, 512, 1024, 2048 and 4096. - */ - if (*p_adj_grp_size <= 64) - return; - else if (*p_adj_grp_size <= 512) - *p_adj_grp_size = 512; - else if (*p_adj_grp_size <= 1024) - *p_adj_grp_size = 1024; - else if (*p_adj_grp_size <= 2048) - *p_adj_grp_size = 2048; - else - *p_adj_grp_size = 4096; + int i; + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (*p_adj_grp_size >= size_range->start && + *p_adj_grp_size <= size_range->end) + return; + + if (*p_adj_grp_size <= size_range->end) { + *p_adj_grp_size = size_range->end; + return; + } + } } -static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size, +static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size, unsigned int alloc_size) { - if (alloc_size >= 4096) - *p_adj_grp_size = 4096; - else if (alloc_size >= 2048) - *p_adj_grp_size = 2048; - else if (alloc_size >= 1024) - *p_adj_grp_size = 1024; - else if (alloc_size >= 512) - *p_adj_grp_size = 512; + int i; + + for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (alloc_size >= size_range->end) { + *p_adj_grp_size = size_range->end; + return; + } + } } static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, @@ -3269,7 +3972,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, /* Round up the requested group size to the next size supported * by the device and make sure the request can be satisfied. */ - mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size); + mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size); err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, *p_adj_grp_size, &alloc_size); @@ -3279,19 +3982,19 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, * entries than requested. Try to use as much of them as * possible. */ - mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size); + mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size); return 0; } static void -mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi) { int i, g = 0, sum_norm_weight = 0; struct mlxsw_sp_nexthop *nh; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) continue; @@ -3301,8 +4004,8 @@ mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) g = nh->nh_weight; } - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) continue; @@ -3310,18 +4013,18 @@ mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) sum_norm_weight += nh->norm_nh_weight; } - nh_grp->sum_norm_weight = sum_norm_weight; + nhgi->sum_norm_weight = sum_norm_weight; } static void -mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi) { - int total = nh_grp->sum_norm_weight; - u16 ecmp_size = nh_grp->ecmp_size; int i, weight = 0, lower_bound = 0; + int total = nhgi->sum_norm_weight; + u16 ecmp_size = nhgi->ecmp_size; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; int upper_bound; if (!nh->should_offload) @@ -3333,26 +4036,145 @@ mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) } } +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6); + static void +mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + else + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + /* Unfortunately, in IPv6 the route and the nexthop are described by + * the same struct, so we need to iterate over all the routes using the + * nexthop group and set / clear the offload indication for them. + */ + list_for_each_entry(fib6_entry, &nh_grp->fib_list, + common.nexthop_group_node) + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); +} + +static void +mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop *nh, + u16 bucket_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp; + bool offload = false, trap = false; + + if (nh->offloaded) { + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + trap = true; + else + offload = true; + } + nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + bucket_index, offload, trap); +} + +static void +mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i; + + /* Do not update the flags if the nexthop group is being destroyed + * since: + * 1. The nexthop objects is being deleted, in which case the flags are + * irrelevant. + * 2. The nexthop group was replaced by a newer group, in which case + * the flags of the nexthop object were already updated based on the + * new group. + */ + if (nh_grp->can_destroy) + return; + + nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->adj_index_valid, false); + + /* Update flags of individual nexthop buckets in case of a resilient + * nexthop group. + */ + if (!nh_grp->nhgi->is_resilient) + return; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i); + } +} + +static void +mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp); + break; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); + break; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ: + mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp); + break; + } +} + +static int mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; u16 ecmp_size, old_ecmp_size; struct mlxsw_sp_nexthop *nh; bool offload_change = false; u32 adj_index; bool old_adj_index_valid; u32 old_adj_index; - int i; - int err; + int i, err2, err; - if (!nh_grp->gateway) { - mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - return; - } + if (!nhgi->gateway) + return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (nh->should_offload != nh->offloaded) { offload_change = true; @@ -3364,21 +4186,27 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; } - return; + /* Flags of individual nexthop buckets might need to be + * updated. + */ + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + return 0; } - mlxsw_sp_nexthop_group_normalize(nh_grp); - if (!nh_grp->sum_norm_weight) + mlxsw_sp_nexthop_group_normalize(nhgi); + if (!nhgi->sum_norm_weight) { /* No neigh of this group is connected so we just set * the trap and let everthing flow through kernel. */ + err = 0; goto set_trap; + } - ecmp_size = nh_grp->sum_norm_weight; + ecmp_size = nhgi->sum_norm_weight; err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); if (err) /* No valid allocation size available. */ @@ -3393,19 +4221,21 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); goto set_trap; } - old_adj_index_valid = nh_grp->adj_index_valid; - old_adj_index = nh_grp->adj_index; - old_ecmp_size = nh_grp->ecmp_size; - nh_grp->adj_index_valid = 1; - nh_grp->adj_index = adj_index; - nh_grp->ecmp_size = ecmp_size; - mlxsw_sp_nexthop_group_rebalance(nh_grp); - err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); + old_adj_index_valid = nhgi->adj_index_valid; + old_adj_index = nhgi->adj_index; + old_ecmp_size = nhgi->ecmp_size; + nhgi->adj_index_valid = 1; + nhgi->adj_index = adj_index; + nhgi->ecmp_size = ecmp_size; + mlxsw_sp_nexthop_group_rebalance(nhgi); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; } + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); + if (!old_adj_index_valid) { /* The trap was set for fib entries, so we have to call * fib entry update to unset it and use adjacency index. @@ -3415,7 +4245,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); goto set_trap; } - return; + return 0; } err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, @@ -3427,89 +4257,164 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } - /* Offload state within the group changed, so update the flags. */ - mlxsw_sp_nexthop_fib_entries_refresh(nh_grp); - - return; + return 0; set_trap: - old_adj_index_valid = nh_grp->adj_index_valid; - nh_grp->adj_index_valid = 0; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + old_adj_index_valid = nhgi->adj_index_valid; + nhgi->adj_index_valid = 0; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; nh->offloaded = 0; } - err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - if (err) + err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err2) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); if (old_adj_index_valid) mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, - nh_grp->ecmp_size, nh_grp->adj_index); + nhgi->ecmp_size, nhgi->adj_index); + return err; } static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing) + if (!removing) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD; nh->should_offload = 1; - else + } else if (nh->nhgi->is_resilient) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } else { nh->should_offload = 0; + } nh->update = 1; } +static int +mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n, *old_n = neigh_entry->key.n; + struct mlxsw_sp_nexthop *nh; + struct net_device *dev; + bool entry_connected; + u8 nud_state, dead; + int err; + + nh = list_first_entry(&neigh_entry->nexthop_list, + struct mlxsw_sp_nexthop, neigh_list_node); + dev = mlxsw_sp_nexthop_dev(nh); + + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev); + if (!n) { + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + neigh_entry->key.n = n; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + entry_connected = nud_state & NUD_VALID && !dead; + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + neigh_release(old_n); + neigh_clone(n); + __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } + + neigh_release(n); + + return 0; + +err_neigh_entry_insert: + neigh_entry->key.n = old_n; + mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + neigh_release(n); + return err; +} + static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing) + bool removing, bool dead) { struct mlxsw_sp_nexthop *nh; + if (list_empty(&neigh_entry->nexthop_list)) + return; + + if (dead) { + int err; + + err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp, + neigh_entry); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n"); + return; + } + list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } -static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *rif) +static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_crif *crif) { - if (nh->rif) + if (nh->crif) return; - nh->rif = rif; - list_add(&nh->rif_list_node, &rif->nexthop_list); + nh->crif = crif; + list_add(&nh->crif_list_node, &crif->nexthop_list); } -static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->rif) + if (!nh->crif) return; - list_del(&nh->rif_list_node); - nh->rif = NULL; + list_del(&nh->crif_list_node); + nh->crif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; + struct net_device *dev; struct neighbour *n; u8 nud_state, dead; int err; - if (!nh->nh_grp->gateway || nh->neigh_entry) + if (WARN_ON(!nh->crif->rif)) + return 0; + + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; + dev = mlxsw_sp_nexthop_dev(nh); /* Take a reference of neigh here ensuring that neigh would * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev); if (!n) { - n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, - nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -3573,24 +4478,35 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) { - struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + struct net_device *ul_dev; + bool is_up; - return ul_dev ? (ul_dev->flags & IFF_UP) : true; + rcu_read_lock(); + ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true; + rcu_read_unlock(); + + return is_up; } static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct mlxsw_sp_ipip_entry *ipip_entry) { + struct mlxsw_sp_crif *crif; bool removing; - if (!nh->nh_grp->gateway || nh->ipip_entry) + if (!nh->nhgi->gateway || nh->ipip_entry) + return; + + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev); + if (WARN_ON(!crif)) return; nh->ipip_entry = ipip_entry; removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); + mlxsw_sp_nexthop_crif_init(nh, crif); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -3609,43 +4525,26 @@ static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib_nh *fib_nh, enum mlxsw_sp_ipip_type *p_ipipt) { - struct net_device *dev = fib_nh->nh_dev; + struct net_device *dev = fib_nh->fib_nh_dev; return dev && fib_nh->nh_parent->fib_type == RTN_UNICAST && mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); } -static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) -{ - switch (nh->type) { - case MLXSW_SP_NEXTHOP_TYPE_ETH: - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); - break; - case MLXSW_SP_NEXTHOP_TYPE_IPIP: - mlxsw_sp_nexthop_rif_fini(nh); - mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); - break; - } -} - -static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + const struct net_device *dev) { const struct mlxsw_sp_ipip_ops *ipip_ops; - struct net_device *dev = fib_nh->nh_dev; struct mlxsw_sp_ipip_entry *ipip_entry; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; int err; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); if (ipip_entry) { ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - if (ipip_ops->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4)) { + if (ipip_ops->can_offload(mlxsw_sp, dev)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); return 0; @@ -3653,11 +4552,15 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev); + if (!crif) + return 0; + + mlxsw_sp_nexthop_crif_init(nh, crif); + + if (!crif->rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) goto err_neigh_init; @@ -3665,14 +4568,41 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, return 0; err_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); return err; } -static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static int mlxsw_sp_nexthop_type_rif_made(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + return mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + break; + } + + return 0; +} + +static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } +} + +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_crif_fini(nh); } static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, @@ -3680,40 +4610,52 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct fib_nh *fib_nh) { - struct net_device *dev = fib_nh->nh_dev; + struct net_device *dev = fib_nh->fib_nh_dev; struct in_device *in_dev; int err; - nh->nh_grp = nh_grp; + nh->nhgi = nh_grp->nhgi; nh->key.fib_nh = fib_nh; #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight = fib_nh->nh_weight; + nh->nh_weight = fib_nh->fib_nh_weight; #else nh->nh_weight = 1; #endif - memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); + memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4)); + nh->neigh_tbl = &arp_tbl; err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; - mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh); + if (err) + goto err_counter_enable; + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); if (!dev) return 0; + nh->ifindex = dev->ifindex; - in_dev = __in_dev_get_rtnl(dev); + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->nh_flags & RTNH_F_LINKDOWN) + fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) { + rcu_read_unlock(); return 0; + } + rcu_read_unlock(); - err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); if (err) goto err_nexthop_neigh_init; return 0; err_nexthop_neigh_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); +err_counter_enable: mlxsw_sp_nexthop_remove(mlxsw_sp, nh); return err; } @@ -3721,9 +4663,9 @@ err_nexthop_neigh_init: static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); - mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -3733,39 +4675,37 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - if (mlxsw_sp->router->aborted) - return; - key.fib_nh = fib_nh; nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); - if (WARN_ON_ONCE(!nh)) + if (!nh) return; switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); + mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev); break; case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); break; } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_nexthop *nh; bool removing; - list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: removing = false; break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: - removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev); + removing = !mlxsw_sp_ipip_netdev_ul_up(dev); break; default: WARN_ON(1); @@ -3773,20 +4713,37 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif) +static int mlxsw_sp_nexthop_rif_made_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) { - struct mlxsw_sp_nexthop *nh; + struct mlxsw_sp_nexthop *nh, *tmp; + unsigned int n = 0; + int err; + + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + err = mlxsw_sp_nexthop_type_rif_made(mlxsw_sp, nh); + if (err) + goto err_nexthop_type_rif; + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + n++; + } + + return 0; - list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); - list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) - nh->rif = new_rif; - mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); +err_nexthop_type_rif: + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + if (!n--) + break; + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); + } + return err; } static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, @@ -3794,61 +4751,1126 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } -static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct fib_info *fi) +static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp) { - return fi->fib_nh->nh_scope == RT_SCOPE_LINK || - mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); + enum mlxsw_reg_ratr_trap_action trap_action; + char ratr_pl[MLXSW_REG_RATR_LEN]; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &mlxsw_sp->router->adj_trap_index); + if (err) + return err; + + trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP; + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, + MLXSW_REG_RATR_TYPE_ETHERNET, + mlxsw_sp->router->adj_trap_index, + mlxsw_sp->router->lb_crif->rif->rif_index); + mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); + mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); + if (err) + goto err_ratr_write; + + return 0; + +err_ratr_write: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + mlxsw_sp->router->adj_trap_index); + return err; } -static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_sp_nexthop_group *nh_grp; + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + mlxsw_sp->router->adj_trap_index); +} + +static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups)) + return 0; + + err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp); + if (err) + return err; + + refcount_set(&mlxsw_sp->router->num_groups, 1); + + return 0; +} + +static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp) +{ + if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups)) + return; + + mlxsw_sp_adj_trap_entry_fini(mlxsw_sp); +} + +static void +mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp, + unsigned long *activity) +{ + char *ratrad_pl; + int i, err; + + ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL); + if (!ratrad_pl) + return; + + mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index, + nh_grp->nhgi->count); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl); + if (err) + goto out; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i)) + continue; + bitmap_set(activity, i, 1); + } + +out: + kfree(ratrad_pl); +} + +#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */ + +static void +mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned long *activity; + + activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL); + if (!activity) + return; + + mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity); + nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->count, activity); + + bitmap_free(activity); +} + +static void +mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL; + + mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_router *router; + bool reschedule = false; + + router = container_of(work, struct mlxsw_sp_router, + nh_grp_activity_dw.work); + + mutex_lock(&router->lock); + + list_for_each_entry(nhgi, &router->nh_res_grp_list, list) { + mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp); + reschedule = true; + } + + mutex_unlock(&router->lock); + + if (!reschedule) + return; + mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp); +} + +static int +mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_single_info *nh, + struct netlink_ext_ack *extack) +{ + int err = -EINVAL; + + if (nh->is_fdb) + NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported"); + else if (nh->has_encap) + NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported"); + else + err = 0; + + return err; +} + +static int +mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_single_info *nh, + struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack); + if (err) + return err; + + /* Device only nexthops with an IPIP device are programmed as + * encapsulating adjacency entries. + */ + if (!nh->gw_family && !nh->is_reject && + !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway"); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_grp_info *nh_grp, + struct netlink_ext_ack *extack) +{ + int i; + + if (nh_grp->is_fdb) { + NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported"); + return -EINVAL; + } + + for (i = 0; i < nh_grp->num_nh; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_grp->nh_entries[i].nh; + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + unsigned int alloc_size; + bool valid_size = false; + int err, i; + + if (nh_res_table->num_nh_buckets < 32) { + NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32"); + return -EINVAL; + } + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (nh_res_table->num_nh_buckets >= size_range->start && + nh_res_table->num_nh_buckets <= size_range->end) { + valid_size = true; + break; + } + } + + if (!valid_size) { + NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets"); + return -EINVAL; + } + + err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + nh_res_table->num_nh_buckets, + &alloc_size); + if (err || nh_res_table->num_nh_buckets != alloc_size) { + NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition"); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + int err; + u16 i; + + err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp, + nh_res_table, + extack); + if (err) + return err; + + for (i = 0; i < nh_res_table->num_nh_buckets; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_res_table->nhs[i]; + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, + unsigned long event, + struct nh_notifier_info *info) +{ + struct nh_notifier_single_info *nh; + + if (event != NEXTHOP_EVENT_REPLACE && + event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE && + event != NEXTHOP_EVENT_BUCKET_REPLACE) + return 0; + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh, + info->extack); + case NH_NOTIFIER_INFO_TYPE_GRP: + return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, + info->nh_grp, + info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp, + info->nh_res_table, + info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_BUCKET: + nh = &info->nh_res_bucket->new_nh; + return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + info->extack); + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + return -EOPNOTSUPP; + } +} + +static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_info *info) +{ + const struct net_device *dev; + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + dev = info->nh->dev; + return info->nh->gw_family || info->nh->is_reject || + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); + case NH_NOTIFIER_INFO_TYPE_GRP: + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + /* Already validated earlier. */ + return true; + default: + return false; + } +} + +static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; + nh->should_offload = 1; + /* While nexthops that discard packets do not forward packets + * via an egress RIF, they still need to be programmed using a + * valid RIF, so use the loopback RIF created during init. + */ + nh->crif = mlxsw_sp->router->lb_crif; +} + +static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + nh->crif = NULL; + nh->should_offload = 0; +} + +static int +mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_single_info *nh_obj, int weight) +{ + struct net_device *dev = nh_obj->dev; + int err; + + nh->nhgi = nh_grp->nhgi; + nh->nh_weight = weight; + + switch (nh_obj->gw_family) { + case AF_INET: + memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4)); + nh->neigh_tbl = &arp_tbl; + break; + case AF_INET6: + memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif + break; + } + + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + nh->ifindex = dev->ifindex; + nh->id = nh_obj->id; + + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_type_init; + + if (nh_obj->is_reject) + mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh); + + /* In a resilient nexthop group, all the nexthops must be written to + * the adjacency table. Even if they do not have a valid neighbour or + * RIF. + */ + if (nh_grp->nhgi->is_resilient && !nh->should_offload) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } + + return 0; + +err_type_init: + list_del(&nh->router_list_node); + return err; +} + +static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD) + mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); + nh->should_offload = 0; +} + +static int +mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_nexthop *nh; - struct fib_nh *fib_nh; - size_t alloc_size; + bool is_resilient = false; + bool hw_stats = false; + unsigned int nhs; + int err, i; + + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + nhs = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nhs = info->nh_grp->num_nh; + hw_stats = info->nh_grp->hw_stats; + break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nhs = info->nh_res_table->num_nh_buckets; + hw_stats = info->nh_res_table->hw_stats; + is_resilient = true; + break; + default: + return -EINVAL; + } + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info); + nhgi->is_resilient = is_resilient; + nhgi->count = nhs; + nhgi->hw_stats = hw_stats; + + xa_init_flags(&nhgi->nexthop_counters, XA_FLAGS_ALLOC1); + + for (i = 0; i < nhgi->count; i++) { + struct nh_notifier_single_info *nh_obj; + int weight; + + nh = &nhgi->nexthops[i]; + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + nh_obj = info->nh; + weight = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nh_obj = &info->nh_grp->nh_entries[i].nh; + weight = info->nh_grp->nh_entries[i].weight; + break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nh_obj = &info->nh_res_table->nhs[i]; + weight = 1; + break; + default: + err = -EINVAL; + goto err_nexthop_obj_init; + } + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, + weight); + if (err) + goto err_nexthop_obj_init; + } + err = mlxsw_sp_nexthop_group_inc(mlxsw_sp); + if (err) + goto err_group_inc; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device"); + goto err_group_refresh; + } + + /* Add resilient nexthop groups to a list so that the activity of their + * nexthop buckets will be periodically queried and cleared. + */ + if (nhgi->is_resilient) { + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp); + list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list); + } + + return 0; + +err_group_refresh: + mlxsw_sp_nexthop_group_dec(mlxsw_sp); +err_group_inc: + i = nhgi->count; +err_nexthop_obj_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + struct mlxsw_sp_router *router = mlxsw_sp->router; int i; + + if (nhgi->is_resilient) { + list_del(&nhgi->list); + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + cancel_delayed_work(&router->nh_grp_activity_dw); + } + + mlxsw_sp_nexthop_group_dec(mlxsw_sp); + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + WARN_ON(!xa_empty(&nhgi->nexthop_counters)); + xa_destroy(&nhgi->nexthop_counters); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp; int err; - alloc_size = sizeof(*nh_grp) + - fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); - nh_grp->priv = fi; + INIT_LIST_HEAD(&nh_grp->vr_list); + err = rhashtable_init(&nh_grp->vr_ht, + &mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_nexthop_group_vr_ht_init; INIT_LIST_HEAD(&nh_grp->fib_list); - nh_grp->neigh_tbl = &arp_tbl; + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ; + nh_grp->obj.id = info->id; - nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); - nh_grp->count = fi->fib_nhs; - fib_info_hold(fi); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - fib_nh = &fi->fib_nh[i]; + err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info); + if (err) + goto err_nexthop_group_info_init; + + nh_grp->can_destroy = false; + + return nh_grp; + +err_nexthop_group_info_init: + rhashtable_destroy(&nh_grp->vr_ht); +err_nexthop_group_vr_ht_init: + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + if (!nh_grp->can_destroy) + return; + mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp); + WARN_ON_ONCE(!list_empty(&nh_grp->fib_list)); + WARN_ON_ONCE(!list_empty(&nh_grp->vr_list)); + rhashtable_destroy(&nh_grp->vr_ht); + kfree(nh_grp); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ; + cmp_arg.id = id; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group *old_nh_grp, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi; + struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi; + int err; + + old_nh_grp->nhgi = new_nhgi; + new_nhgi->nh_grp = old_nh_grp; + nh_grp->nhgi = old_nhgi; + old_nhgi->nh_grp = nh_grp; + + if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) { + /* Both the old adjacency index and the new one are valid. + * Routes are currently using the old one. Tell the device to + * replace the old adjacency index with the new one. + */ + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp, + old_nhgi->adj_index, + old_nhgi->ecmp_size); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one"); + goto err_out; + } + } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) { + /* The old adjacency index is valid, while the new one is not. + * Iterate over all the routes using the group and change them + * to trap packets to the CPU. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets"); + goto err_out; + } + } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) { + /* The old adjacency index is invalid, while the new one is. + * Iterate over all the routes using the group and change them + * to forward packets using the new valid index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets"); + goto err_out; + } + } + + /* Make sure the flags are set / cleared based on the new nexthop group + * information. + */ + mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp); + + /* At this point 'nh_grp' is just a shell that is not used by anyone + * and its nexthop group info is the old info that was just replaced + * with the new one. Remove it. + */ + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + + return 0; + +err_out: + old_nhgi->nh_grp = old_nh_grp; + nh_grp->nhgi = new_nhgi; + new_nhgi->nh_grp = nh_grp; + old_nh_grp->nhgi = old_nhgi; + return err; +} + +static int mlxsw_sp_nexthop_obj_res_group_pre(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct nh_notifier_grp_info *grp_info = info->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + int i; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) + return 0; + nhgi = nh_grp->nhgi; + + if (nhgi->hw_stats == grp_info->hw_stats) + return 0; + + nhgi->hw_stats = grp_info->hw_stats; + + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + if (nh->offloaded) + nh->update = 1; + } + + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + nhgi->hw_stats = !grp_info->hw_stats; + return err; +} + +static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp; + struct netlink_ext_ack *extack = info->extack; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + + old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!old_nh_grp) + err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp); + else + err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp, + old_nh_grp, extack); + + if (err) { + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + } + + return err; +} + +static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) + return; + + nh_grp->can_destroy = true; + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + + /* If the group still has routes using it, then defer the delete + * operation until the last route using it is deleted. + */ + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); +} + +static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, char *ratr_pl) +{ + MLXSW_REG_ZERO(ratr, ratr_pl); + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index); + mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16); + + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new) +{ + /* Clear the opcode and activity on both the old and new payload as + * they are irrelevant for the comparison. + */ + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl, 0); + mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl_new, 0); + + /* If the contents of the adjacency entry are consistent with the + * replacement request, then replacement was successful. + */ + if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN)) + return 0; + + return -EINVAL; +} + +static int +mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + bool force = info->nh_res_bucket->force; + char ratr_pl_new[MLXSW_REG_RATR_LEN]; + char ratr_pl[MLXSW_REG_RATR_LEN]; + u32 adj_index; + int err; + + /* No point in trying an atomic replacement if the idle timer interval + * is smaller than the interval in which we query and clear activity. + */ + if (!force && info->nh_res_bucket->idle_timer_ms < + MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL) + force = true; + + adj_index = nh->nhgi->adj_index + bucket_index; + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket"); + return err; + } + + if (!force) { + err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index, + ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent"); + return err; + } + + err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement"); + return err; + } + } + + nh->update = 0; + nh->offloaded = 1; + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index); + + return 0; +} + +static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct nh_notifier_single_info *nh_obj; + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found"); + return -EINVAL; + } + + nhgi = nh_grp->nhgi; + + if (bucket_index >= nhgi->count) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range"); + return -EINVAL; + } + + nh = &nhgi->nexthops[bucket_index]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + + nh_obj = &info->nh_res_bucket->new_nh; + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement"); + goto err_nexthop_obj_init; + } + + err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info); + if (err) + goto err_nexthop_obj_bucket_adj_update; + + return 0; + +err_nexthop_obj_bucket_adj_update: + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); +err_nexthop_obj_init: + nh_obj = &info->nh_res_bucket->old_nh; + mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + /* The old adjacency entry was not overwritten */ + nh->update = 0; + nh->offloaded = 1; + return err; +} + +static void +mlxsw_sp_nexthop_obj_mp_hw_stats_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group_info *nhgi, + struct nh_notifier_grp_hw_stats_info *info) +{ + int nhi; + + for (nhi = 0; nhi < info->num_nh; nhi++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[nhi]; + u64 packets; + int err; + + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets); + if (err) + continue; + + nh_grp_hw_stats_report_delta(info, nhi, packets); + } +} + +static void +mlxsw_sp_nexthop_obj_res_hw_stats_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group_info *nhgi, + struct nh_notifier_grp_hw_stats_info *info) +{ + int nhi = -1; + int bucket; + + for (bucket = 0; bucket < nhgi->count; bucket++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[bucket]; + u64 packets; + int err; + + if (nhi == -1 || info->stats[nhi].id != nh->id) { + for (nhi = 0; nhi < info->num_nh; nhi++) + if (info->stats[nhi].id == nh->id) + break; + if (WARN_ON_ONCE(nhi == info->num_nh)) { + nhi = -1; + continue; + } + } + + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &packets); + if (err) + continue; + + nh_grp_hw_stats_report_delta(info, nhi, packets); + } +} + +static void mlxsw_sp_nexthop_obj_hw_stats_get(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop_group *nh_grp; + + if (info->type != NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS) + return; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) + return; + nhgi = nh_grp->nhgi; + + if (nhgi->is_resilient) + mlxsw_sp_nexthop_obj_res_hw_stats_get(mlxsw_sp, nhgi, + info->nh_grp_hw_stats); + else + mlxsw_sp_nexthop_obj_mp_hw_stats_get(mlxsw_sp, nhgi, + info->nh_grp_hw_stats); +} + +static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nh_notifier_info *info = ptr; + struct mlxsw_sp_router *router; + int err = 0; + + router = container_of(nb, struct mlxsw_sp_router, nexthop_nb); + err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info); + if (err) + goto out; + + mutex_lock(&router->lock); + + switch (event) { + case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE: + err = mlxsw_sp_nexthop_obj_res_group_pre(router->mlxsw_sp, + info); + break; + case NEXTHOP_EVENT_REPLACE: + err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info); + break; + case NEXTHOP_EVENT_DEL: + mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info); + break; + case NEXTHOP_EVENT_BUCKET_REPLACE: + err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp, + info); + break; + case NEXTHOP_EVENT_HW_STATS_REPORT_DELTA: + mlxsw_sp_nexthop_obj_hw_stats_get(router->mlxsw_sp, info); + break; + default: + break; + } + + mutex_unlock(&router->lock); + +out: + return notifier_from_errno(err); +} + +static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, + struct fib_info *fi) +{ + const struct fib_nh *nh = fib_info_nh(fi, 0); + + return nh->fib_nh_gw_family || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); +} + +static int +mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi); + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop *nh; + int err, i; + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi); + nhgi->count = nhs; + for (i = 0; i < nhgi->count; i++) { + struct fib_nh *fib_nh; + + nh = &nhgi->nexthops[i]; + fib_nh = fib_info_nh(nh_grp->ipv4.fi, i); err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) goto err_nexthop4_init; } - err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + err = mlxsw_sp_nexthop_group_inc(mlxsw_sp); if (err) - goto err_nexthop_group_insert; - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - return nh_grp; + goto err_group_inc; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; -err_nexthop_group_insert: + return 0; + +err_group_refresh: + mlxsw_sp_nexthop_group_dec(mlxsw_sp); +err_group_inc: + i = nhgi->count; err_nexthop4_init: for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + mlxsw_sp_nexthop_group_dec(mlxsw_sp); + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->vr_list); + err = rhashtable_init(&nh_grp->vr_ht, + &mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_nexthop_group_vr_ht_init; + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; + nh_grp->ipv4.fi = fi; + fib_info_hold(fi); + + err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_info_init; + + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; + + nh_grp->can_destroy = true; + + return nh_grp; + +err_nexthop_group_insert: + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: fib_info_put(fi); + rhashtable_destroy(&nh_grp->vr_ht); +err_nexthop_group_vr_ht_init: kfree(nh_grp); return ERR_PTR(err); } @@ -3857,17 +5879,13 @@ static void mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - struct mlxsw_sp_nexthop *nh; - int i; - + if (!nh_grp->can_destroy) + return; mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); - } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - WARN_ON_ONCE(nh_grp->adj_index_valid); - fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp)); + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); + fib_info_put(nh_grp->ipv4.fi); + WARN_ON_ONCE(!list_empty(&nh_grp->vr_list)); + rhashtable_destroy(&nh_grp->vr_ht); kfree(nh_grp); } @@ -3877,12 +5895,21 @@ static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group *nh_grp; + if (fi->nh) { + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, + fi->nh->id); + if (WARN_ON_ONCE(!nh_grp)) + return -EINVAL; + goto out; + } + nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi); if (!nh_grp) { nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } +out: list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); fib_entry->nh_group = nh_grp; return 0; @@ -3896,6 +5923,12 @@ static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; + + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) { + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + return; + } + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); } @@ -3906,7 +5939,7 @@ mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, common); - return !fib4_entry->tos; + return !fib4_entry->dscp; } static bool @@ -3925,9 +5958,10 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return !!nh_group->adj_index_valid; + return !!nh_group->nhgi->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return !!nh_group->nh_rif; + return !!mlxsw_sp_nhgi_rif(nh_group->nhgi); + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: return true; @@ -3942,143 +5976,203 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, { int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + struct net_device *dev = mlxsw_sp_nexthop_dev(nh); struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev && + if (dev && dev == rt->fib6_nh->fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->fib6_nh.nh_gw)) + &rt->fib6_nh->fib_nh_gw6)) return nh; - continue; } return NULL; } static void -mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - int i; + u32 *p_dst = (u32 *) &fen_info->dst; + struct fib_rt_info fri; - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { - nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; - return; - } + fri.fi = fen_info->fi; + fri.tb_id = fen_info->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = fen_info->dst_len; + fri.dscp = fen_info->dscp; + fri.type = fen_info->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = true; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); +} - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; +static void +mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; + bool should_offload; - if (nh->offloaded) - nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; - else - nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; - } + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fib4_entry->fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.dscp = fib4_entry->dscp; + fri.type = fib4_entry->type; + fri.offload = should_offload; + fri.trap = !should_offload; + fri.offload_failed = false; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } static void -mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - int i; + u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; + int dst_len = fib_entry->fib_node->key.prefix_len; + struct mlxsw_sp_fib4_entry *fib4_entry; + struct fib_rt_info fri; - if (!list_is_singular(&nh_grp->fib_list)) - return; + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + fri.fi = fib4_entry->fi; + fri.tb_id = fib4_entry->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = dst_len; + fri.dscp = fib4_entry->dscp; + fri.type = fib4_entry->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = false; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); +} - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; +#if IS_ENABLED(CONFIG_IPV6) +static void +mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + int i; - nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; - } + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ + for (i = 0; i < nrt6; i++) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i], + false, false, true); +} +#else +static void +mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ } +#endif +#if IS_ENABLED(CONFIG_IPV6) static void -mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + bool should_offload; + + should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { - list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; - return; - } - - list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - struct mlxsw_sp_nexthop *nh; - - nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); - if (nh && nh->offloaded) - mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; - else - mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; - } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, + should_offload, !should_offload, false); +} +#else +static void +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ } +#endif +#if IS_ENABLED(CONFIG_IPV6) static void -mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct fib6_info *rt = mlxsw_sp_rt6->rt; - - rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; - } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, + false, false, false); } +#else +static void +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ +} +#endif -static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +static void +mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_sp_fib4_entry_offload_set(fib_entry); + mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_sp_fib6_entry_offload_set(fib_entry); + mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry); break; } } static void -mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_sp_fib4_entry_offload_unset(fib_entry); + mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_sp_fib6_entry_offload_unset(fib_entry); + mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; } } static void -mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op, int err) +mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (op) { - case MLXSW_REG_RALUE_OP_WRITE_DELETE: - return mlxsw_sp_fib_entry_offload_unset(fib_entry); case MLXSW_REG_RALUE_OP_WRITE_WRITE: - if (err) - return; - if (mlxsw_sp_fib_entry_should_offload(fib_entry)) - mlxsw_sp_fib_entry_offload_set(fib_entry); - else - mlxsw_sp_fib_entry_offload_unset(fib_entry); - return; + mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); + break; + case MLXSW_REG_RALUE_OP_WRITE_DELETE: + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); + break; default: - return; + break; } } @@ -4112,6 +6206,8 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { + struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; + struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi; char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; @@ -4124,8 +6220,13 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, */ if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; - adjacency_index = fib_entry->nh_group->adj_index; - ecmp_size = fib_entry->nh_group->ecmp_size; + adjacency_index = nhgi->adj_index; + ecmp_size = nhgi->ecmp_size; + } else if (!nhgi->adj_index_valid && nhgi->count && + mlxsw_sp_nhgi_rif(nhgi)) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = mlxsw_sp->router->adj_trap_index; + ecmp_size = 1; } else { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; @@ -4141,7 +6242,7 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; + struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi); enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id = 0; @@ -4172,6 +6273,36 @@ static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } +static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int +mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u16 trap_id; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS1; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, @@ -4179,13 +6310,22 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; const struct mlxsw_sp_ipip_ops *ipip_ops; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + int err; if (WARN_ON(!ipip_entry)) return -EINVAL; ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op, - fib_entry->decap.tunnel_index); + err = ipip_ops->decap_config(mlxsw_sp, ipip_entry, + fib_entry->decap.tunnel_index); + if (err) + return err; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, @@ -4211,6 +6351,11 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE: + return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry, + op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); @@ -4226,7 +6371,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, { int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + if (err) + return err; + + mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op); return err; } @@ -4250,46 +6398,50 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct mlxsw_sp_router *router = mlxsw_sp->router; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); - struct net_device *dev = fen_info->fi->fib_dev; + int ifindex = nhgi->nexthops[0].ifindex; struct mlxsw_sp_ipip_entry *ipip_entry; - struct fib_info *fi = fen_info->fi; switch (fen_info->type) { case RTN_LOCAL: - ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4, dip); + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex, + MLXSW_SP_L3_PROTO_IPV4, dip); if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry, ipip_entry); } - if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id, - dip.addr4)) { - u32 t_index; + if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id, + MLXSW_SP_L3_PROTO_IPV4, + &dip)) { + u32 tunnel_index; - t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp); - fib_entry->decap.tunnel_index = t_index; + tunnel_index = router->nve_decap_config.tunnel_index; + fib_entry->decap.tunnel_index = tunnel_index; fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; return 0; } - /* fall through */ + fallthrough; case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; - case RTN_UNREACHABLE: /* fall through */ - case RTN_BLACKHOLE: /* fall through */ + case RTN_BLACKHOLE: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; + return 0; + case RTN_UNREACHABLE: case RTN_PROHIBIT: /* Packets hitting these routes need to be trapped, but * can do so with a lower priority than packets directed * at the host, so use action type local instead of trap. */ - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; return 0; case RTN_UNICAST: - if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) + if (nhgi->gateway) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; @@ -4299,6 +6451,26 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, } } +static void +mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + break; + default: + break; + } +} + +static void +mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib4_entry *fib4_entry) +{ + mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common); +} + static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, @@ -4313,25 +6485,34 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib4_entry->common; - err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); - if (err) - goto err_fib4_entry_type_set; - err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) goto err_nexthop4_group_get; - fib4_entry->prio = fen_info->fi->fib_priority; + err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group, + fib_node->fib); + if (err) + goto err_nexthop_group_vr_link; + + err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); + if (err) + goto err_fib4_entry_type_set; + + fib4_entry->fi = fen_info->fi; + fib_info_hold(fib4_entry->fi); fib4_entry->tb_id = fen_info->tb_id; fib4_entry->type = fen_info->type; - fib4_entry->tos = fen_info->tos; + fib4_entry->dscp = fen_info->dscp; fib_entry->fib_node = fib_node; return fib4_entry; -err_nexthop4_group_get: err_fib4_entry_type_set: + mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib); +err_nexthop_group_vr_link: + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); +err_nexthop4_group_get: kfree(fib4_entry); return ERR_PTR(err); } @@ -4339,6 +6520,12 @@ err_fib4_entry_type_set: static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + + fib_info_put(fib4_entry->fi); + mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry); + mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group, + fib_node->fib); mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); kfree(fib4_entry); } @@ -4363,15 +6550,13 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (!fib_node) return NULL; - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id == fen_info->tb_id && - fib4_entry->tos == fen_info->tos && - fib4_entry->type == fen_info->type && - mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == - fen_info->fi) { - return fib4_entry; - } - } + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->dscp == fen_info->dscp && + fib4_entry->type == fen_info->type && + fib4_entry->fi == fen_info->fi) + return fib4_entry; return NULL; } @@ -4419,7 +6604,6 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, if (!fib_node) return NULL; - INIT_LIST_HEAD(&fib_node->entry_list); list_add(&fib_node->list, &fib->node_list); memcpy(fib_node->key.addr, addr, addr_len); fib_node->key.prefix_len = prefix_len; @@ -4430,18 +6614,9 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) { list_del(&fib_node->list); - WARN_ON(!list_empty(&fib_node->entry_list)); kfree(fib_node); } -static bool -mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry *fib_entry) -{ - return list_first_entry(&fib_node->entry_list, - struct mlxsw_sp_fib_entry, list) == fib_entry; -} - static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -4581,204 +6756,70 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_vr *vr = fib_node->fib->vr; - if (!list_empty(&fib_node->entry_list)) + if (fib_node->fib_entry) return; mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); mlxsw_sp_fib_node_destroy(fib_node); mlxsw_sp_vr_put(mlxsw_sp, vr); } -static struct mlxsw_sp_fib4_entry * -mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib4_entry *fib4_entry; - - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id > new4_entry->tb_id) - continue; - if (fib4_entry->tb_id != new4_entry->tb_id) - break; - if (fib4_entry->tos > new4_entry->tos) - continue; - if (fib4_entry->prio >= new4_entry->prio || - fib4_entry->tos < new4_entry->tos) - return fib4_entry; - } - - return NULL; -} - -static int -mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, - struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib_node *fib_node; - - if (WARN_ON(!fib4_entry)) - return -EINVAL; - - fib_node = fib4_entry->common.fib_node; - list_for_each_entry_from(fib4_entry, &fib_node->entry_list, - common.list) { - if (fib4_entry->tb_id != new4_entry->tb_id || - fib4_entry->tos != new4_entry->tos || - fib4_entry->prio != new4_entry->prio) - break; - } - - list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); - return 0; -} - -static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, - bool replace, bool append) -{ - struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *fib4_entry; - - fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); - - if (append) - return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); - if (replace && WARN_ON(!fib4_entry)) - return -EINVAL; - - /* Insert new entry before replaced one, so that we can later - * remove the second. - */ - if (fib4_entry) { - list_add_tail(&new4_entry->common.list, - &fib4_entry->common.list); - } else { - struct mlxsw_sp_fib4_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - if (new4_entry->tb_id > last->tb_id) - break; - fib4_entry = last; - } - - if (fib4_entry) - list_add(&new4_entry->common.list, - &fib4_entry->common.list); - else - list_add(&new4_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) -{ - list_del(&fib4_entry->common.list); -} - -static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) -{ - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) - return 0; - - /* To prevent packet loss, overwrite the previously offloaded - * entry. - */ - if (!list_is_singular(&fib_node->entry_list)) { - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); - - mlxsw_sp_fib_entry_offload_refresh(n, op, 0); - } - - return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); -} - -static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, +static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - - if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) - return; - - /* Promote the next entry by overwriting the deleted entry */ - if (!list_is_singular(&fib_node->entry_list)) { - struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); - enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; - - mlxsw_sp_fib_entry_update(mlxsw_sp, n); - mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); - return; - } - - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); -} - -static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace, bool append) -{ int err; - err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); - if (err) - return err; + fib_node->fib_entry = fib_entry; - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) - goto err_fib_node_entry_add; + goto err_fib_entry_update; return 0; -err_fib_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib4_entry); +err_fib_entry_update: + fib_node->fib_entry = NULL; return err; } static void -mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry) +mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); - mlxsw_sp_fib4_node_list_remove(fib4_entry); + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) - mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + fib_node->fib_entry = NULL; } -static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace) +static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) { struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *replaced; + struct mlxsw_sp_fib4_entry *fib4_replaced; - if (!replace) - return; + if (!fib_node->fib_entry) + return true; - /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib4_entry, common.list); + fib4_replaced = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + if (fib4_entry->tb_id == RT_TABLE_MAIN && + fib4_replaced->tb_id == RT_TABLE_LOCAL) + return false; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return true; } static int -mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info, - bool replace, bool append) +mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; + struct mlxsw_sp_fib_entry *replaced; struct mlxsw_sp_fib_node *fib_node; int err; - if (mlxsw_sp->router->aborted) + if (fen_info->fi->nh && + !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id)) return 0; fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, @@ -4797,18 +6838,32 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, - append); + if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) { + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return 0; + } + + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); - goto err_fib4_node_entry_link; + goto err_fib_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry, + common); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced); return 0; -err_fib4_node_entry_link: +err_fib_node_entry_link: + fib_node->fib_entry = replaced; mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); @@ -4821,28 +6876,18 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; - if (mlxsw_sp->router->aborted) - return; - fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib4_entry)) + if (!fib4_entry) return; fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) { - /* Packets with link-local destination IP arriving to the router - * are trapped to the CPU, so no need to program specific routes - * for them. - */ - if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL) - return true; - /* Multicast routes aren't supported, so ignore them. Neighbour * Discovery packets are specifically trapped. */ @@ -4887,16 +6932,14 @@ static void mlxsw_sp_rt6_release(struct fib6_info *rt) static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) { + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; + + if (!mlxsw_sp_rt6->rt->nh) + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); kfree(mlxsw_sp_rt6); } -static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) -{ - /* RTF_CACHE routes are ignored */ - return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; -} - static struct fib6_info * mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) { @@ -4904,37 +6947,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) list)->rt; } -static struct mlxsw_sp_fib6_entry * -mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct fib6_info *nrt, bool replace) -{ - struct mlxsw_sp_fib6_entry *fib6_entry; - - if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) - return NULL; - - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same - * virtual router. - */ - if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) - continue; - if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) - break; - if (rt->fib6_metric < nrt->fib6_metric) - continue; - if (rt->fib6_metric == nrt->fib6_metric && - mlxsw_sp_fib6_rt_can_mp(rt)) - return fib6_entry; - if (rt->fib6_metric > nrt->fib6_metric) - break; - } - - return NULL; -} - static struct mlxsw_sp_rt6 * mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, const struct fib6_info *rt) @@ -4953,53 +6965,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->fib6_nh.nh_dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret); -} - -static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - const struct fib6_info *rt) -{ - const struct mlxsw_sp_ipip_ops *ipip_ops; - struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh.nh_dev; - struct mlxsw_sp_rif *rif; - int err; - - ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); - if (ipip_entry) { - ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - if (ipip_ops->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV6)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); - return 0; - } - } - - nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); - - err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); - if (err) - goto err_nexthop_neigh_init; - - return 0; - -err_nexthop_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); - return err; -} - -static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) -{ - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + return rt->fib6_nh->fib_nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); } static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, @@ -5007,12 +6974,19 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh->fib_nh_dev; + int err; + + nh->nhgi = nh_grp->nhgi; + nh->nh_weight = rt->fib6_nh->fib_nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif - nh->nh_grp = nh_grp; - nh->nh_weight = rt->fib6_nh.nh_weight; - memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr)); - mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh); + if (err) + return err; list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -5020,71 +6994,138 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, return 0; nh->ifindex = dev->ifindex; - return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_nexthop_type_init; + + return 0; + +err_nexthop_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); + return err; } static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); - mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { - return rt->fib6_flags & RTF_GATEWAY || + return rt->fib6_nh->fib_nh_gw_family || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } -static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) +static int +mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) { - struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_nexthop *nh; - size_t alloc_size; - int i = 0; - int err; + int err, i; - alloc_size = sizeof(*nh_grp) + - fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); - if (!nh_grp) - return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&nh_grp->fib_list); -#if IS_ENABLED(CONFIG_IPV6) - nh_grp->neigh_tbl = &nd_tbl; -#endif + nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6), + GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list); - nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); - nh_grp->count = fib6_entry->nrt6; - for (i = 0; i < nh_grp->count; i++) { + nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nhgi->count = fib6_entry->nrt6; + for (i = 0; i < nhgi->count; i++) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - nh = &nh_grp->nexthops[i]; + nh = &nhgi->nexthops[i]; err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); if (err) goto err_nexthop6_init; mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); } + nh_grp->nhgi = nhgi; + err = mlxsw_sp_nexthop_group_inc(mlxsw_sp); + if (err) + goto err_group_inc; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + mlxsw_sp_nexthop_group_dec(mlxsw_sp); +err_group_inc: + i = nhgi->count; +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + mlxsw_sp_nexthop_group_dec(mlxsw_sp); + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->vr_list); + err = rhashtable_init(&nh_grp->vr_ht, + &mlxsw_sp_nexthop_group_vr_ht_params); + if (err) + goto err_nexthop_group_vr_ht_init; + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; + + err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry); + if (err) + goto err_nexthop_group_info_init; err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) goto err_nexthop_group_insert; - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + nh_grp->can_destroy = true; + return nh_grp; err_nexthop_group_insert: -err_nexthop6_init: - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); - } + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: + rhashtable_destroy(&nh_grp->vr_ht); +err_nexthop_group_vr_ht_init: kfree(nh_grp); return ERR_PTR(err); } @@ -5093,24 +7134,29 @@ static void mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - struct mlxsw_sp_nexthop *nh; - int i = nh_grp->count; - + if (!nh_grp->can_destroy) + return; mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); - } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - WARN_ON(nh_grp->adj_index_valid); + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); + WARN_ON_ONCE(!list_empty(&nh_grp->vr_list)); + rhashtable_destroy(&nh_grp->vr_ht); kfree(nh_grp); } static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry) { + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); struct mlxsw_sp_nexthop_group *nh_grp; + if (rt->nh) { + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, + rt->nh->id); + if (WARN_ON_ONCE(!nh_grp)) + return -EINVAL; + goto out; + } + nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry); if (!nh_grp) { nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); @@ -5118,6 +7164,12 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(nh_grp); } + /* The route and the nexthop are described by the same struct, so we + * need to the update the nexthop offload indication for the new route. + */ + __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); + +out: list_add_tail(&fib6_entry->common.nexthop_group_node, &nh_grp->fib_list); fib6_entry->common.nh_group = nh_grp; @@ -5133,6 +7185,12 @@ static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; + + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) { + mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); + return; + } + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); } @@ -5141,8 +7199,10 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry) { struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; int err; + mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib); fib6_entry->common.nh_group = NULL; list_del(&fib6_entry->common.nexthop_group_node); @@ -5150,91 +7210,148 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, if (err) goto err_nexthop6_group_get; + err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group, + fib_node->fib); + if (err) + goto err_nexthop_group_vr_link; + /* In case this entry is offloaded, then the adjacency index * currently associated with it in the device's table is that * of the old group. Start using the new one instead. */ - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); if (err) - goto err_fib_node_entry_add; + goto err_fib_entry_update; if (list_empty(&old_nh_grp->fib_list)) mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); return 0; -err_fib_node_entry_add: +err_fib_entry_update: + mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group, + fib_node->fib); +err_nexthop_group_vr_link: mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); err_nexthop6_group_get: list_add_tail(&fib6_entry->common.nexthop_group_node, &old_nh_grp->fib_list); fib6_entry->common.nh_group = old_nh_grp; + mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib); return err; } static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) - return PTR_ERR(mlxsw_sp_rt6); + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_unwind; + } - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6++; + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); if (err) - goto err_nexthop6_group_update; + goto err_rt6_unwind; return 0; -err_nexthop6_group_update: - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +err_rt6_unwind: + for (; i > 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } return err; } static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int i; - mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); - if (WARN_ON(!mlxsw_sp_rt6)) - return; + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, + rt_arr[i]); + if (WARN_ON_ONCE(!mlxsw_sp_rt6)) + continue; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } -static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - const struct fib6_info *rt) +static int +mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct fib6_info *rt) { - /* Packets hitting RTF_REJECT routes need to be discarded by the - * stack. We can rely on their destination device not having a - * RIF (it's the loopback device) and can thus use action type - * local, which will cause them to be trapped with a lower - * priority than packets that need to be locally received. - */ - if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) + struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi; + union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr }; + u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id); + struct mlxsw_sp_router *router = mlxsw_sp->router; + int ifindex = nhgi->nexthops[0].ifindex; + struct mlxsw_sp_ipip_entry *ipip_entry; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex, + MLXSW_SP_L3_PROTO_IPV6, + dip); + + if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry, + ipip_entry); + } + if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id, + MLXSW_SP_L3_PROTO_IPV6, &dip)) { + u32 tunnel_index; + + tunnel_index = router->nve_decap_config.tunnel_index; + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + } + + return 0; +} + +static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct fib6_info *rt) +{ + if (rt->fib6_flags & RTF_LOCAL) + return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry, + rt); + if (rt->fib6_flags & RTF_ANYCAST) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->fib6_type == RTN_BLACKHOLE) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; else if (rt->fib6_flags & RTF_REJECT) - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; + else if (fib_entry->nh_group->nhgi->gateway) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + + return 0; } static void @@ -5253,48 +7370,78 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); if (!fib6_entry) return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) { - err = PTR_ERR(mlxsw_sp_rt6); - goto err_rt6_create; - } + INIT_LIST_HEAD(&fib6_entry->rt6_list); - mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_unwind; + } + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } - INIT_LIST_HEAD(&fib6_entry->rt6_list); - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6 = 1; err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) - goto err_nexthop6_group_get; + goto err_rt6_unwind; + + err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group, + fib_node->fib); + if (err) + goto err_nexthop_group_vr_link; + + err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); + if (err) + goto err_fib6_entry_type_set; fib_entry->fib_node = fib_node; return fib6_entry; -err_nexthop6_group_get: - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); -err_rt6_create: +err_fib6_entry_type_set: + mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib); +err_nexthop_group_vr_link: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry); +err_rt6_unwind: + for (; i > 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } kfree(fib6_entry); return ERR_PTR(err); } +static void +mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common); +} + static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry) { + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry); + mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group, + fib_node->fib); mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); WARN_ON(fib6_entry->nrt6); @@ -5302,112 +7449,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_fib6_entry * -mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct fib6_info *nrt, bool replace) -{ - struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; - - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id) - continue; - if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id) - break; - if (replace && rt->fib6_metric == nrt->fib6_metric) { - if (mlxsw_sp_fib6_rt_can_mp(rt) == - mlxsw_sp_fib6_rt_can_mp(nrt)) - return fib6_entry; - if (mlxsw_sp_fib6_rt_can_mp(nrt)) - fallback = fallback ?: fib6_entry; - } - if (rt->fib6_metric > nrt->fib6_metric) - return fallback ?: fib6_entry; - } - - return fallback; -} - -static int -mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, - bool replace) -{ - struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; - struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); - struct mlxsw_sp_fib6_entry *fib6_entry; - - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); - - if (replace && WARN_ON(!fib6_entry)) - return -EINVAL; - - if (fib6_entry) { - list_add_tail(&new6_entry->common.list, - &fib6_entry->common.list); - } else { - struct mlxsw_sp_fib6_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last); - - if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id) - break; - fib6_entry = last; - } - - if (fib6_entry) - list_add(&new6_entry->common.list, - &fib6_entry->common.list); - else - list_add(&new6_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) -{ - list_del(&fib6_entry->common.list); -} - -static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) -{ - int err; - - err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); - if (err) - return err; - - err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); - if (err) - goto err_fib_node_entry_add; - - return 0; - -err_fib_node_entry_add: - mlxsw_sp_fib6_node_list_remove(fib6_entry); - return err; -} - -static void -mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) -{ - mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); - mlxsw_sp_fib6_node_list_remove(fib6_entry); -} - -static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; + struct fib6_info *cmp_rt; struct mlxsw_sp_vr *vr; vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id); @@ -5421,51 +7469,57 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (!fib_node) return NULL; - list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); - - if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id && - rt->fib6_metric == iter_rt->fib6_metric && - mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) - return fib6_entry; - } + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id && + rt->fib6_metric == cmp_rt->fib6_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; return NULL; } -static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) +static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry) { struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; - struct mlxsw_sp_fib6_entry *replaced; + struct mlxsw_sp_fib6_entry *fib6_replaced; + struct fib6_info *rt, *rt_replaced; - if (!replace) - return; + if (!fib_node->fib_entry) + return true; - replaced = list_next_entry(fib6_entry, common.list); + fib6_replaced = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, + common); + rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced); + if (rt->fib6_table->tb6_id == RT_TABLE_MAIN && + rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL) + return false; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return true; } -static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt, bool replace) +static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) { - struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; + struct mlxsw_sp_fib_entry *replaced; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; int err; - if (mlxsw_sp->router->aborted) - return 0; - if (rt->fib6_src.plen) return -EINVAL; if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; + if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id)) + return 0; + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, &rt->fib6_dst.addr, sizeof(rt->fib6_dst.addr), @@ -5474,109 +7528,119 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(fib_node)) return PTR_ERR(fib_node); - /* Before creating a new entry, try to append route to an existing - * multipath entry. - */ - fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); - if (fib6_entry) { - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); - if (err) - goto err_fib6_entry_nexthop_add; - return 0; - } - - fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, + nrt6); if (IS_ERR(fib6_entry)) { err = PTR_ERR(fib6_entry); goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) { + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return 0; + } + + replaced = fib_node->fib_entry; + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); if (err) - goto err_fib6_node_entry_link; + goto err_fib_node_entry_link; - mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + /* Nothing to replace */ + if (!replaced) + return 0; + + mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced); + fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry, + common); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced); return 0; -err_fib6_node_entry_link: +err_fib_node_entry_link: + fib_node->fib_entry = replaced; mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); err_fib6_entry_create: -err_fib6_entry_nexthop_add: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; } -static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt) +static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; + int err; - if (mlxsw_sp->router->aborted) - return; + if (rt->fib6_src.plen) + return -EINVAL; if (mlxsw_sp_fib6_rt_should_ignore(rt)) - return; + return 0; - fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); - if (WARN_ON(!fib6_entry)) - return; + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, + &rt->fib6_dst.addr, + sizeof(rt->fib6_dst.addr), + rt->fib6_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); - /* If route is part of a multipath entry, but not the last one - * removed, then only reduce its nexthop group. - */ - if (!list_is_singular(&fib6_entry->rt6_list)) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); - return; + if (WARN_ON_ONCE(!fib_node->fib_entry)) { + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return -EINVAL; } - fib_node = fib6_entry->common.fib_node; + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + if (err) + goto err_fib6_entry_nexthop_add; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + return 0; + +err_fib6_entry_nexthop_add: mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; } -static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_reg_ralxx_protocol proto, - u8 tree_id) +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; - char ralst_pl[MLXSW_REG_RALST_LEN]; - int i, err; - - mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); - if (err) - return err; - - mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); - if (err) - return err; + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; - char raltb_pl[MLXSW_REG_RALTB_LEN]; - char ralue_pl[MLXSW_REG_RALUE_LEN]; + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), - raltb_pl); - if (err) - return err; + /* Multipath routes are first added to the FIB trie and only then + * notified. If we vetoed the addition, we will get a delete + * notification for a route we do not have. Therefore, do not warn if + * route was not found. + */ + fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); + if (!fib6_entry) + return; - mlxsw_reg_ralue_pack(ralue_pl, proto, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), - ralue_pl); - if (err) - return err; + /* If not all the nexthops are deleted, then only reduce the nexthop + * group. + */ + if (nrt6 != fib6_entry->nrt6) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, + nrt6); + return; } - return 0; + fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static struct mlxsw_sp_mr_table * @@ -5595,9 +7659,6 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_table *mrt; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return 0; - vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL); if (IS_ERR(vr)) return PTR_ERR(vr); @@ -5612,9 +7673,6 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_table *mrt; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return; - vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); if (WARN_ON(!vr)) return; @@ -5632,9 +7690,6 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return 0; - vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL); if (IS_ERR(vr)) return PTR_ERR(vr); @@ -5653,9 +7708,6 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_table *mrt; struct mlxsw_sp_vr *vr; - if (mlxsw_sp->router->aborted) - return; - vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); if (WARN_ON(!vr)) return; @@ -5665,61 +7717,28 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) -{ - enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; - int err; - - err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, - MLXSW_SP_LPM_TREE_MIN); - if (err) - return err; - - /* The multicast router code does not need an abort trap as by default, - * packets that don't match any routes are trapped to the CPU. - */ - - proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; - return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, - MLXSW_SP_LPM_TREE_MIN + 1); -} - static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - - list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, - common.list) { - bool do_break = &tmp->common.list == &fib_node->entry_list; + struct mlxsw_sp_fib4_entry *fib4_entry; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - /* Break when entry list is empty and node was freed. - * Otherwise, we'll access freed memory in the next - * iteration. - */ - if (do_break) - break; - } + fib4_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib4_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; - - list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, - common.list) { - bool do_break = &tmp->common.list == &fib_node->entry_list; + struct mlxsw_sp_fib6_entry *fib6_entry; - mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); - mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); - mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); - if (do_break) - break; - } + fib6_entry = container_of(fib_node->fib_entry, + struct mlxsw_sp_fib6_entry, common); + mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, @@ -5753,9 +7772,10 @@ static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); int i, j; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) @@ -5774,24 +7794,16 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) } } -static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - if (mlxsw_sp->router->aborted) - return; - dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); - mlxsw_sp_router_fib_flush(mlxsw_sp); - mlxsw_sp->router->aborted = true; - err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); - if (err) - dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); -} +struct mlxsw_sp_fib6_event_work { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; struct mlxsw_sp_fib_event_work { struct work_struct work; + netdevice_tracker dev_tracker; union { - struct fib6_entry_notifier_info fen6_info; + struct mlxsw_sp_fib6_event_work fib6_work; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -5802,85 +7814,134 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; +static int +mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_work->rt_arr = rt_arr; + fib6_work->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void +mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +{ + int i; + + for (i = 0; i < fib6_work->nrt6; i++) + mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); + kfree(fib6_work->rt_arr); +} + static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace, append; int err; - /* Protect internal structures from changes */ - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - append = fib_work->event == FIB_EVENT_ENTRY_APPEND; - err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, - replace, append); - if (err) - mlxsw_sp_router_fib_abort(mlxsw_sp); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); + mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp, + &fib_work->fen_info); + } fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); - break; - case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, fib_work->fnh_info.fib_nh); fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); kfree(fib_work); } static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work; struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace; int err; - rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fen6_info.rt, replace); - if (err) - mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n"); + mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + } + mlxsw_sp_router_fib6_work_fini(fib6_work); break; - case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + case FIB_EVENT_ENTRY_APPEND: + err = mlxsw_sp_router_fib6_append(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n"); + mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + } + mlxsw_sp_router_fib6_work_fini(fib6_work); break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib6_del(mlxsw_sp, + fib6_work->rt_arr, + fib6_work->nrt6); + mlxsw_sp_router_fib6_work_fini(fib6_work); break; } - rtnl_unlock(); + mutex_unlock(&mlxsw_sp->router->lock); kfree(fib_work); } @@ -5893,15 +7954,16 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) int err; rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, replace); if (err) - mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n"); mr_cache_put(fib_work->men_info.mfc); break; case FIB_EVENT_ENTRY_DEL: @@ -5912,21 +7974,16 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, &fib_work->ven_info); if (err) - mlxsw_sp_router_fib_abort(mlxsw_sp); - dev_put(fib_work->ven_info.dev); + dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n"); + netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker); break; case FIB_EVENT_VIF_DEL: mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_work->ven_info); - dev_put(fib_work->ven_info.dev); - break; - case FIB_EVENT_RULE_ADD: - /* if we get here, a rule was added that we do not support. - * just do the fib_abort - */ - mlxsw_sp_router_fib_abort(mlxsw_sp); + netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker); break; } + mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); kfree(fib_work); } @@ -5938,9 +7995,7 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_nh_notifier_info *fnh_info; switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -5950,7 +8005,7 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, */ fib_info_hold(fib_work->fen_info.fi); break; - case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: fnh_info = container_of(info, struct fib_nh_notifier_info, info); @@ -5960,22 +8015,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, } } -static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, - struct fib_notifier_info *info) +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; + int err; switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - fib_work->fen6_info = *fen6_info; - fib6_info_hold(fib_work->fen6_info.rt); + err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, + fen6_info); + if (err) + return err; break; } + + return 0; } static void @@ -5983,16 +8042,17 @@ mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_DEL: memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); mr_cache_hold(fib_work->men_info.mfc); break; - case FIB_EVENT_VIF_ADD: /* fall through */ + case FIB_EVENT_VIF_ADD: case FIB_EVENT_VIF_DEL: memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); - dev_hold(fib_work->ven_info.dev); + netdev_hold(fib_work->ven_info.dev, &fib_work->dev_tracker, + GFP_ATOMIC); break; } } @@ -6010,12 +8070,13 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, if (event == FIB_EVENT_RULE_DEL) return 0; - if (mlxsw_sp->router->aborted) - return 0; - fr_info = container_of(info, struct fib_rule_notifier_info, info); rule = fr_info->rule; + /* Rule only affects locally generated traffic */ + if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex) + return 0; + switch (info->family) { case AF_INET: if (!fib4_rule_default(rule) && !rule->l3mdev) @@ -6050,8 +8111,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; int err; - if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6 && + if ((info->family != AF_INET && info->family != AF_INET6 && info->family != RTNL_FAMILY_IPMR && info->family != RTNL_FAMILY_IP6MR)) return NOTIFY_DONE; @@ -6059,23 +8119,27 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, router = container_of(nb, struct mlxsw_sp_router, fib_nb); switch (event) { - case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_ADD: case FIB_EVENT_RULE_DEL: err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); - if (!err || info->extack) - return notifier_from_errno(err); - break; + return notifier_from_errno(err); case FIB_EVENT_ENTRY_ADD: - if (router->aborted) { - NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); - return notifier_from_errno(-EINVAL); + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_APPEND: + if (info->family == AF_INET) { + struct fib_entry_notifier_info *fen_info = ptr; + + if (fen_info->fi->fib_nh_is_v6) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); + return notifier_from_errno(-EINVAL); + } } break; } fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); - if (WARN_ON(!fib_work)) + if (!fib_work) return NOTIFY_BAD; fib_work->mlxsw_sp = router->mlxsw_sp; @@ -6088,7 +8152,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; case AF_INET6: INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); - mlxsw_sp_router_fib6_event(fib_work, info); + err = mlxsw_sp_router_fib6_event(fib_work, info); + if (err) + goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: @@ -6100,17 +8166,22 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; + +err_fib_event: + kfree(fib_work); + return NOTIFY_BAD; } -struct mlxsw_sp_rif * +static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + for (i = 0; i < max_rifs; i++) if (mlxsw_sp->router->rifs[i] && - mlxsw_sp->router->rifs[i]->dev == dev) + mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev)) return mlxsw_sp->router->rifs[i]; return NULL; @@ -6123,41 +8194,82 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) mlxsw_reg_ritr_rif_pack(ritr_pl, rif); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (WARN_ON_ONCE(err)) + if (err) return err; mlxsw_reg_ritr_enable_set(ritr_pl, false); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } +static int mlxsw_sp_router_rif_made_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_neigh_rif_made_sync(mlxsw_sp, rif); + if (err) + return err; + + err = mlxsw_sp_nexthop_rif_made_sync(mlxsw_sp, rif); + if (err) + goto err_nexthop; + + return 0; + +err_nexthop: + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); + return err; +} + static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + /* Signal to nexthop cleanup that the RIF is going away. */ + rif->crif->rif = NULL; + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } +static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev) +{ + struct inet6_dev *inet6_dev; + struct in_device *idev; + + idev = __in_dev_get_rcu(dev); + if (idev && idev->ifa_list) + return false; + + inet6_dev = __in6_dev_get(dev); + if (inet6_dev && !list_empty(&inet6_dev->addr_list)) + return false; + + return true; +} + +static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev) +{ + bool addr_list_empty; + + rcu_read_lock(); + addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev); + rcu_read_unlock(); + + return addr_list_empty; +} + static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, unsigned long event) { - struct inet6_dev *inet6_dev; - bool addr_list_empty = true; - struct in_device *idev; + bool addr_list_empty; switch (event) { case NETDEV_UP: return rif == NULL; case NETDEV_DOWN: - idev = __in_dev_get_rtnl(dev); - if (idev && idev->ifa_list) - addr_list_empty = false; - - inet6_dev = __in6_dev_get(dev); - if (addr_list_empty && inet6_dev && - !list_empty(&inet6_dev->addr_list)) - addr_list_empty = false; + addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev); /* macvlans do not have a RIF, but rather piggy back on the * RIF of their lower device. @@ -6166,7 +8278,7 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, return true; if (rif && addr_list_empty && - !netif_is_l3_slave(rif->dev)) + !netif_is_l3_slave(mlxsw_sp_rif_dev(rif))) return true; /* It is possible we already removed the RIF ourselves * if it was assigned to a netdev that is now a bridge @@ -6200,43 +8312,63 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type); } -static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index) +static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index, + u8 rif_entries) { - int i; + *p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table, + rif_entries); + if (*p_rif_index == 0) + return -ENOBUFS; + *p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { - if (!mlxsw_sp->router->rifs[i]) { - *p_rif_index = i; - return 0; - } - } + /* RIF indexes must be aligned to the allocation size. */ + WARN_ON_ONCE(*p_rif_index % rif_entries); - return -ENOBUFS; + return 0; +} + +static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + u8 rif_entries) +{ + gen_pool_free(mlxsw_sp->router->rifs_table, + MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries); } static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, u16 vr_id, - struct net_device *l3_dev) + struct mlxsw_sp_crif *crif) { + struct net_device *l3_dev = crif ? crif->key.dev : NULL; struct mlxsw_sp_rif *rif; rif = kzalloc(rif_size, GFP_KERNEL); if (!rif) return NULL; - INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); if (l3_dev) { ether_addr_copy(rif->addr, l3_dev->dev_addr); rif->mtu = l3_dev->mtu; - rif->dev = l3_dev; } rif->vr_id = vr_id; rif->rif_index = rif_index; + if (crif) { + rif->crif = crif; + crif->rif = rif; + } return rif; } +static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) +{ + WARN_ON(!list_empty(&rif->neigh_list)); + + if (rif->crif) + rif->crif->rif = NULL; + kfree(rif); +} + struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index) { @@ -6253,36 +8385,196 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->common.rif_index; } -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); - struct mlxsw_sp_vr *ul_vr; + return lb_rif->ul_rif_id; +} + +static bool +mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif) +{ + return mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_EGRESS) && + mlxsw_sp_rif_counter_valid_get(rif, + MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static int +mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif) +{ + int err; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + if (err) + return err; - ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); - if (WARN_ON(IS_ERR(ul_vr))) + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + NULL); + if (err) + goto err_clear_ingress; + + err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + if (err) + goto err_alloc_egress; + + /* Clear stale data. */ + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + NULL); + if (err) + goto err_clear_egress; + + return 0; + +err_clear_egress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); +err_alloc_egress: +err_clear_ingress: + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); + return err; +} + +static void +mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS); +} + +static void +mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) + return; + netdev_offload_xstats_report_used(info->report_used); +} + +static int +mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif, + struct rtnl_hw_stats64 *p_stats) +{ + struct mlxsw_sp_rif_counter_set_basic ingress; + struct mlxsw_sp_rif_counter_set_basic egress; + int err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_INGRESS, + &ingress); + if (err) + return err; + + err = mlxsw_sp_rif_counter_fetch_clear(rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &egress); + if (err) + return err; + +#define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX) \ + ((SET.good_unicast_ ## SFX) + \ + (SET.good_multicast_ ## SFX) + \ + (SET.good_broadcast_ ## SFX)) + + p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets); + p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets); + p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes); + p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes); + p_stats->rx_errors = ingress.error_packets; + p_stats->tx_errors = egress.error_packets; + p_stats->rx_dropped = ingress.discard_packets; + p_stats->tx_dropped = egress.discard_packets; + p_stats->multicast = ingress.good_multicast_packets + + ingress.good_broadcast_packets; + +#undef MLXSW_SP_ROUTER_ALL_GOOD + + return 0; +} + +static int +mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif, + struct netdev_notifier_offload_xstats_info *info) +{ + struct rtnl_hw_stats64 stats = {}; + int err; + + if (!mlxsw_sp_router_port_l3_stats_enabled(rif)) return 0; - return ul_vr->id; + err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats); + if (err) + return err; + + netdev_offload_xstats_report_delta(info->report_delta, &stats); + return 0; } -u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +struct mlxsw_sp_router_hwstats_notify_work { + struct work_struct work; + struct net_device *dev; + netdevice_tracker dev_tracker; +}; + +static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work) { - return lb_rif->ul_rif_id; + struct mlxsw_sp_router_hwstats_notify_work *hws_work = + container_of(work, struct mlxsw_sp_router_hwstats_notify_work, + work); + + rtnl_lock(); + rtnl_offload_xstats_notify(hws_work->dev); + rtnl_unlock(); + netdev_put(hws_work->dev, &hws_work->dev_tracker); + kfree(hws_work); +} + +static void +mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) +{ + struct mlxsw_sp_router_hwstats_notify_work *hws_work; + + /* To collect notification payload, the core ends up sending another + * notifier block message, which would deadlock on the attempt to + * acquire the router lock again. Just postpone the notification until + * later. + */ + + hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL); + if (!hws_work) + return; + + INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work); + netdev_hold(dev, &hws_work->dev_tracker, GFP_KERNEL); + hws_work->dev = dev; + mlxsw_core_schedule_work(&hws_work->work); } int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { - return rif->dev->ifindex; + return mlxsw_sp_rif_dev(rif)->ifindex; +} + +bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif) +{ + return !!mlxsw_sp_rif_dev(rif); } -const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif, + const struct net_device *dev) { - return rif->dev; + return mlxsw_sp_rif_dev(rif) == dev; } -struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif) +static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) { - return rif->fid; + struct rtnl_hw_stats64 stats = {}; + + if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats)) + netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif), + NETDEV_OFFLOAD_XSTATS_TYPE_L3, + &stats); } static struct mlxsw_sp_rif * @@ -6290,41 +8582,53 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { + u8 rif_entries = params->double_entry ? 2 : 1; u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp_fid *fid = NULL; enum mlxsw_sp_rif_type type; + struct mlxsw_sp_crif *crif; struct mlxsw_sp_rif *rif; struct mlxsw_sp_vr *vr; u16 rif_index; int i, err; type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); - ops = mlxsw_sp->rif_ops_arr[type]; + ops = mlxsw_sp->router->rif_ops_arr[type]; vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) return ERR_CAST(vr); vr->rif_count++; - err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); goto err_rif_index_alloc; } - rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev); + if (WARN_ON(!crif)) { + err = -ENOENT; + goto err_crif_lookup; + } + + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif); if (!rif) { err = -ENOMEM; goto err_rif_alloc; } - dev_hold(rif->dev); + netdev_hold(params->dev, &rif->dev_tracker, GFP_KERNEL); mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; + rif->rif_entries = rif_entries; + + if (ops->setup) + ops->setup(rif, params); if (ops->fid_get) { - fid = ops->fid_get(rif, extack); + fid = ops->fid_get(rif, params, extack); if (IS_ERR(fid)) { err = PTR_ERR(fid); goto err_fid_get; @@ -6332,10 +8636,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, rif->fid = fid; } - if (ops->setup) - ops->setup(rif, params); - - err = ops->configure(rif); + err = ops->configure(rif, extack); if (err) goto err_configure; @@ -6345,10 +8646,26 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_mr_rif_add; } - mlxsw_sp_rif_counters_alloc(rif); + err = mlxsw_sp_router_rif_made_sync(mlxsw_sp, rif); + if (err) + goto err_rif_made_sync; + + if (netdev_offload_xstats_enabled(params->dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + err = mlxsw_sp_router_port_l3_stats_enable(rif); + if (err) + goto err_stats_enable; + mlxsw_sp_router_hwstats_notify_schedule(params->dev); + } else { + mlxsw_sp_rif_counters_alloc(rif); + } + atomic_add(rif_entries, &mlxsw_sp->router->rifs_count); return rif; +err_stats_enable: + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); +err_rif_made_sync: err_mr_rif_add: for (i--; i >= 0; i--) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -6358,9 +8675,11 @@ err_configure: mlxsw_sp_fid_put(fid); err_fid_get: mlxsw_sp->router->rifs[rif_index] = NULL; - dev_put(rif->dev); - kfree(rif); + netdev_put(params->dev, &rif->dev_tracker); + mlxsw_sp_rif_free(rif); err_rif_alloc: +err_crif_lookup: + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); @@ -6369,16 +8688,28 @@ err_rif_index_alloc: static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_crif *crif = rif->crif; struct mlxsw_sp_fid *fid = rif->fid; + u8 rif_entries = rif->rif_entries; + u16 rif_index = rif->rif_index; struct mlxsw_sp_vr *vr; int i; + atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - mlxsw_sp_rif_counters_free(rif); + if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + mlxsw_sp_rif_push_l3_stats(rif); + mlxsw_sp_router_port_l3_stats_disable(rif); + mlxsw_sp_router_hwstats_notify_schedule(dev); + } else { + mlxsw_sp_rif_counters_free(rif); + } + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); ops->deconfigure(rif); @@ -6386,10 +8717,14 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); mlxsw_sp->router->rifs[rif->rif_index] = NULL; - dev_put(rif->dev); - kfree(rif); + netdev_put(dev, &rif->dev_tracker); + mlxsw_sp_rif_free(rif); + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); + + if (crif->can_destroy) + mlxsw_sp_crif_free(crif); } void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, @@ -6397,10 +8732,117 @@ void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_rif *rif; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) - return; + goto out; mlxsw_sp_rif_destroy(rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); +} + +static void mlxsw_sp_rif_destroy_vlan_upper(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + u16 vid) +{ + struct net_device *upper_dev; + struct mlxsw_sp_crif *crif; + + rcu_read_lock(); + upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), vid); + rcu_read_unlock(); + + if (!upper_dev) + return; + + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, upper_dev); + if (!crif || !crif->rif) + return; + + mlxsw_sp_rif_destroy(crif->rif); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + int lower_pvid, + unsigned long event, + struct netlink_ext_ack *extack); + +int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + u16 new_vid, bool is_pvid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *old_rif; + struct mlxsw_sp_rif *new_rif; + struct net_device *upper_dev; + u16 old_pvid = 0; + u16 new_pvid; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + old_rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev); + if (old_rif) { + /* If the RIF on the bridge is not a VLAN RIF, we shouldn't have + * gotten a PVID notification. + */ + if (WARN_ON(old_rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)) + old_rif = NULL; + else + old_pvid = mlxsw_sp_fid_8021q_vid(old_rif->fid); + } + + if (is_pvid) + new_pvid = new_vid; + else if (old_pvid == new_vid) + new_pvid = 0; + else + goto out; + + if (old_pvid == new_pvid) + goto out; + + if (new_pvid) { + struct mlxsw_sp_rif_params params = { + .dev = br_dev, + .vid = new_pvid, + }; + + /* If there is a VLAN upper with the same VID as the new PVID, + * kill its RIF, if there is one. + */ + mlxsw_sp_rif_destroy_vlan_upper(mlxsw_sp, br_dev, new_pvid); + + if (mlxsw_sp_dev_addr_list_empty(br_dev)) + goto out; + new_rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); + if (IS_ERR(new_rif)) { + err = PTR_ERR(new_rif); + goto out; + } + + if (old_pvid) + mlxsw_sp_rif_migrate_destroy(mlxsw_sp, old_rif, new_rif, + true); + } else { + mlxsw_sp_rif_destroy(old_rif); + } + + if (old_pvid) { + rcu_read_lock(); + upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), + old_pvid); + rcu_read_unlock(); + if (upper_dev) + err = mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, + upper_dev, + new_pvid, + NETDEV_UP, extack); + } + +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } static void @@ -6423,6 +8865,20 @@ mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif) return container_of(rif, struct mlxsw_sp_rif_subport, common); } +int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif, + u16 *port, bool *is_lag) +{ + struct mlxsw_sp_rif_subport *rif_subport; + + if (WARN_ON(rif->ops->type != MLXSW_SP_RIF_TYPE_SUBPORT)) + return -EINVAL; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + *is_lag = rif_subport->lag; + *port = *is_lag ? rif_subport->lag_id : rif_subport->system_port; + return 0; +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -6451,28 +8907,230 @@ static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif) mlxsw_sp_rif_destroy(rif); } +static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif_mac_profile *profile, + struct netlink_ext_ack *extack) +{ + u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile; + struct mlxsw_sp_router *router = mlxsw_sp->router; + int id; + + id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0, + max_rif_mac_profiles, GFP_KERNEL); + + if (id >= 0) { + profile->id = id; + return 0; + } + + if (id == -ENOSPC) + NL_SET_ERR_MSG_MOD(extack, + "Exceeded number of supported router interface MAC profiles"); + + return id; +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr, + mac_profile); + WARN_ON(!profile); + return profile; +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_alloc(const char *mac) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = kzalloc(sizeof(*profile), GFP_KERNEL); + if (!profile) + return NULL; + + ether_addr_copy(profile->mac_prefix, mac); + refcount_set(&profile->ref_count, 1); + return profile; +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_rif_mac_profile *profile; + int id; + + idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) { + if (ether_addr_equal_masked(profile->mac_prefix, mac, + mlxsw_sp->mac_mask)) + return profile; + } + + return NULL; +} + +static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + + return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count); +} + +static u64 mlxsw_sp_rifs_occ_get(void *priv) +{ + const struct mlxsw_sp *mlxsw_sp = priv; + + return atomic_read(&mlxsw_sp->router->rifs_count); +} + +static struct mlxsw_sp_rif_mac_profile * +mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_mac_profile *profile; + int err; + + profile = mlxsw_sp_rif_mac_profile_alloc(mac); + if (!profile) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack); + if (err) + goto profile_index_alloc_err; + + atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count); + return profile; + +profile_index_alloc_err: + kfree(profile); + return ERR_PTR(err); +} + +static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp, + u8 mac_profile) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count); + profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile); + kfree(profile); +} + +static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp, + const char *mac, u8 *p_mac_profile, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac); + if (profile) { + refcount_inc(&profile->ref_count); + goto out; + } + + profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack); + if (IS_ERR(profile)) + return PTR_ERR(profile); + +out: + *p_mac_profile = profile->id; + return 0; +} + +static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp, + u8 mac_profile) +{ + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr, + mac_profile); + if (WARN_ON(!profile)) + return; + + if (!refcount_dec_and_test(&profile->ref_count)) + return; + + mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile); +} + +static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr, + rif->mac_profile_id); + if (WARN_ON(!profile)) + return false; + + return refcount_read(&profile->ref_count) > 1; +} + +static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif, + const char *new_mac) +{ + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_mac_profile *profile; + + profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr, + rif->mac_profile_id); + if (WARN_ON(!profile)) + return -EINVAL; + + ether_addr_copy(profile->mac_prefix, new_mac); + return 0; +} + static int -mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev, - struct netlink_ext_ack *extack) +mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + const char *new_mac, + struct netlink_ext_ack *extack) +{ + u8 mac_profile; + int err; + + if (!mlxsw_sp_rif_mac_profile_is_shared(rif) && + !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac)) + return mlxsw_sp_rif_mac_profile_edit(rif, new_mac); + + err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac, + &mac_profile, extack); + if (err) + return err; + + mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id); + rif->mac_profile_id = mac_profile; + return 0; +} + +static int +__mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_rif_params params = { - .dev = l3_dev, - }; + struct mlxsw_sp_rif_params params; u16 vid = mlxsw_sp_port_vlan->vid; struct mlxsw_sp_rif *rif; struct mlxsw_sp_fid *fid; int err; + params = (struct mlxsw_sp_rif_params) { + .dev = l3_dev, + .vid = vid, + }; + mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); /* FID was already created, just take a reference */ - fid = rif->ops->fid_get(rif, extack); + fid = rif->ops->fid_get(rif, ¶ms, extack); err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid); if (err) goto err_fid_port_vid_map; @@ -6500,8 +9158,8 @@ err_fid_port_vid_map: return err; } -void -mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +static void +__mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; @@ -6519,6 +9177,32 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) mlxsw_sp_rif_subport_put(rif); } +static int +mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + + lockdep_assert_held(&mlxsw_sp->router->lock); + + if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev)) + return 0; + + return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, + extack); +} + +void +mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + mutex_unlock(&mlxsw_sp->router->lock); +} + static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, struct net_device *port_dev, unsigned long event, u16 vid, @@ -6533,10 +9217,10 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, switch (event) { case NETDEV_UP: - return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, - l3_dev, extack); + return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, + l3_dev, extack); case NETDEV_DOWN: - mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); break; } @@ -6544,12 +9228,11 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { - if (netif_is_bridge_port(port_dev) || - netif_is_lag_port(port_dev) || - netif_is_ovs_port(port_dev)) + if (!nomaster && (netif_is_any_bridge_port(port_dev) || + netif_is_lag_port(port_dev))) return 0; return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, @@ -6580,10 +9263,10 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { - if (netif_is_bridge_port(lag_dev)) + if (!nomaster && netif_is_bridge_port(lag_dev)) return 0; return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, @@ -6592,6 +9275,7 @@ static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, + int lower_pvid, unsigned long event, struct netlink_ext_ack *extack) { @@ -6599,9 +9283,33 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, .dev = l3_dev, }; struct mlxsw_sp_rif *rif; + int err; switch (event) { case NETDEV_UP: + if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) { + u16 proto; + + br_vlan_get_proto(l3_dev, &proto); + if (proto == ETH_P_8021AD) { + NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported"); + return -EOPNOTSUPP; + } + err = br_vlan_get_pvid(l3_dev, ¶ms.vid); + if (err) + return err; + if (!params.vid) + return 0; + } else if (is_vlan_dev(l3_dev)) { + params.vid = vlan_dev_vlan_id(l3_dev); + + /* If the VID matches PVID of the bridge below, the + * bridge owns the RIF for this VLAN. Don't do anything. + */ + if ((int)params.vid == lower_pvid) + return 0; + } + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); @@ -6617,24 +9325,32 @@ static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp, struct net_device *vlan_dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); + u16 lower_pvid; + int err; - if (netif_is_bridge_port(vlan_dev)) + if (!nomaster && netif_is_bridge_port(vlan_dev)) return 0; - if (mlxsw_sp_port_dev_check(real_dev)) + if (mlxsw_sp_port_dev_check(real_dev)) { return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, event, vid, extack); - else if (netif_is_lag_master(real_dev)) + } else if (netif_is_lag_master(real_dev)) { return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, vid, extack); - else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) - return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event, + } else if (netif_is_bridge_master(real_dev) && + br_vlan_enabled(real_dev)) { + err = br_vlan_get_pvid(real_dev, &lower_pvid); + if (err) + return err; + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, + lower_pvid, event, extack); + } return 0; } @@ -6688,10 +9404,8 @@ static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp, int err; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev); - if (!rif) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } + if (!rif) + return 0; err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); @@ -6717,8 +9431,8 @@ err_rif_vrrp_add: return err; } -void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, - const struct net_device *macvlan_dev) +static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) { struct macvlan_dev *vlan = netdev_priv(macvlan_dev); struct mlxsw_sp_rif *rif; @@ -6735,6 +9449,14 @@ void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_index(rif->fid), false); } +void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, + const struct net_device *macvlan_dev) +{ + mutex_lock(&mlxsw_sp->router->lock); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + mutex_unlock(&mlxsw_sp->router->lock); +} + static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, struct net_device *macvlan_dev, unsigned long event, @@ -6744,55 +9466,30 @@ static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, case NETDEV_UP: return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack); case NETDEV_DOWN: - mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); + __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev); break; } return 0; } -static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, - const unsigned char *dev_addr, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp_rif *rif; - int i; - - /* A RIF is not created for macvlan netdevs. Their MAC is used to - * populate the FDB - */ - if (netif_is_macvlan(dev)) - return 0; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { - rif = mlxsw_sp->router->rifs[i]; - if (rif && rif->dev && rif->dev != dev && - !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, - mlxsw_sp->mac_mask)) { - NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix"); - return -EINVAL; - } - } - - return 0; -} - static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, - unsigned long event, + unsigned long event, bool nomaster, struct netlink_ext_ack *extack) { if (mlxsw_sp_port_dev_check(dev)) - return mlxsw_sp_inetaddr_port_event(dev, event, extack); + return mlxsw_sp_inetaddr_port_event(dev, event, nomaster, + extack); else if (netif_is_lag_master(dev)) - return mlxsw_sp_inetaddr_lag_event(dev, event, extack); + return mlxsw_sp_inetaddr_lag_event(dev, event, nomaster, + extack); else if (netif_is_bridge_master(dev)) - return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event, + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, -1, event, extack); else if (is_vlan_dev(dev)) return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event, - extack); + nomaster, extack); else if (netif_is_macvlan(dev)) return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event, extack); @@ -6811,20 +9508,23 @@ static int mlxsw_sp_inetaddr_event(struct notifier_block *nb, /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */ if (event == NETDEV_UP) - goto out; + return NOTIFY_DONE; router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb); + mutex_lock(&router->lock); rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL); + err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, false, + NULL); out: + mutex_unlock(&router->lock); return notifier_from_errno(err); } -int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct in_validator_info *ivi = (struct in_validator_info *) ptr; struct net_device *dev = ivi->ivi_dev->dev; @@ -6834,19 +9534,17 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) - goto out; + return NOTIFY_DONE; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr, - ivi->extack); - if (err) - goto out; - - err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack); + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, + ivi->extack); out: + mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); } @@ -6854,6 +9552,7 @@ struct mlxsw_sp_inet6addr_event_work { struct work_struct work; struct mlxsw_sp *mlxsw_sp; struct net_device *dev; + netdevice_tracker dev_tracker; unsigned long event; }; @@ -6867,15 +9566,17 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) struct mlxsw_sp_rif *rif; rtnl_lock(); + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL); + __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, NULL); out: + mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); - dev_put(dev); + netdev_put(dev, &inet6addr_work->dev_tracker); kfree(inet6addr_work); } @@ -6901,14 +9602,14 @@ static int mlxsw_sp_inet6addr_event(struct notifier_block *nb, inet6addr_work->mlxsw_sp = router->mlxsw_sp; inet6addr_work->dev = dev; inet6addr_work->event = event; - dev_hold(dev); + netdev_hold(dev, &inet6addr_work->dev_tracker, GFP_ATOMIC); mlxsw_core_schedule_work(&inet6addr_work->work); return NOTIFY_DONE; } -int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr; struct net_device *dev = i6vi->i6vi_dev->dev; @@ -6918,24 +9619,22 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) - goto out; + return NOTIFY_DONE; + mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr, - i6vi->extack); - if (err) - goto out; - - err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack); + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, + i6vi->extack); out: + mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); } static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, - const char *mac, int mtu) + const char *mac, int mtu, u8 mac_profile) { char ritr_pl[MLXSW_REG_RITR_LEN]; int err; @@ -6947,15 +9646,18 @@ static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile); mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } static int mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif) + struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) { - struct net_device *dev = rif->dev; + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u8 old_mac_profile; u16 fid_index; int err; @@ -6965,8 +9667,14 @@ mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, if (err) return err; + old_mac_profile = rif->mac_profile_id; + err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr, + extack); + if (err) + goto err_rif_mac_profile_replace; + err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr, - dev->mtu); + dev->mtu, rif->mac_profile_id); if (err) goto err_rif_edit; @@ -6996,8 +9704,11 @@ mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, return 0; err_rif_fdb_op: - mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu); + mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu, + old_mac_profile); err_rif_edit: + mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack); +err_rif_mac_profile_replace: mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true); return err; } @@ -7005,16 +9716,201 @@ err_rif_edit: static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, struct netdev_notifier_pre_changeaddr_info *info) { + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif_mac_profile *profile; struct netlink_ext_ack *extack; + u8 max_rif_mac_profiles; + u64 occ; extack = netdev_notifier_info_to_extack(&info->info); - return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev, - info->dev_addr, extack); + + profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr); + if (profile) + return 0; + + max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile; + occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp); + if (occ < max_rif_mac_profiles) + return 0; + + if (!mlxsw_sp_rif_mac_profile_is_shared(rif)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles"); + return -ENOBUFS; } -int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, - unsigned long event, void *ptr) +static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct vlan_dev_priv *vlan; + + if (netif_is_lag_master(dev) || + netif_is_bridge_master(dev) || + mlxsw_sp_port_dev_check(dev) || + mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) || + netif_is_l3_master(dev)) + return true; + + if (!is_vlan_dev(dev)) + return false; + + vlan = vlan_dev_priv(dev); + return netif_is_lag_master(vlan->real_dev) || + netif_is_bridge_master(vlan->real_dev) || + mlxsw_sp_port_dev_check(vlan->real_dev); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev) { + struct mlxsw_sp_crif *crif; + int err; + + if (WARN_ON(mlxsw_sp_crif_lookup(router, dev))) + return NULL; + + crif = mlxsw_sp_crif_alloc(dev); + if (!crif) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_crif_insert(router, crif); + if (err) + goto err_netdev_insert; + + return crif; + +err_netdev_insert: + mlxsw_sp_crif_free(crif); + return ERR_PTR(err); +} + +static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + struct mlxsw_sp_nexthop *nh, *tmp; + + mlxsw_sp_crif_remove(router, crif); + + list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node) + mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh); + + if (crif->rif) + crif->can_destroy = true; + else + mlxsw_sp_crif_free(crif); +} + +static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return 0; + + crif = mlxsw_sp_crif_register(router, dev); + return PTR_ERR_OR_ZERO(crif); +} + +static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return; + + /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts + * the NETDEV_UNREGISTER message, so we can get here twice. If that's + * what happened, the netdevice state is NETREG_UNREGISTERED. In that + * case, we expect to have collected the CRIF already, and warn if it + * still exists. Otherwise we expect the CRIF to exist. + */ + crif = mlxsw_sp_crif_lookup(router, dev); + if (dev->reg_state == NETREG_UNREGISTERED) { + if (!WARN_ON(crif)) + return; + } + if (WARN_ON(!crif)) + return; + + mlxsw_sp_crif_unregister(router, crif); +} + +static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) +{ + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return true; + } + + return false; +} + +static int +mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + switch (info->type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + break; + default: + return 0; + } + + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + return mlxsw_sp_router_port_l3_stats_enable(rif); + case NETDEV_OFFLOAD_XSTATS_DISABLE: + mlxsw_sp_router_port_l3_stats_disable(rif); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + mlxsw_sp_router_port_l3_stats_report_used(rif, info); + return 0; + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return mlxsw_sp_router_port_l3_stats_report_delta(rif, info); + } + + WARN_ON_ONCE(1); + return 0; +} + +static int +mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info); +} + +static bool mlxsw_sp_is_router_event(unsigned long event) +{ + switch (event) { + case NETDEV_PRE_CHANGEADDR: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGEMTU: + return true; + default: + return false; + } +} + +static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif *rif; @@ -7027,11 +9923,14 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, return 0; switch (event) { - case NETDEV_CHANGEMTU: /* fall through */ + case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: - return mlxsw_sp_router_port_change_event(mlxsw_sp, rif); + return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack); case NETDEV_PRE_CHANGEADDR: return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + default: + WARN_ON_ONCE(1); + break; } return 0; @@ -7048,10 +9947,11 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, */ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (rif) - __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, extack); - return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack); + return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, false, + extack); } static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, @@ -7062,11 +9962,21 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (!rif) return; - __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL); + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, NULL); +} + +static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + + if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER) + return false; + return netif_is_l3_master(info->upper_dev); } -int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, - struct netdev_notifier_changeupper_info *info) +static int +mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); int err = 0; @@ -7079,7 +9989,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, switch (event) { case NETDEV_PRECHANGEUPPER: - return 0; + break; case NETDEV_CHANGEUPPER: if (info->linking) { struct netlink_ext_ack *extack; @@ -7095,9 +10005,316 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } -static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data) +struct mlxsw_sp_router_replay_inetaddr_up { + struct mlxsw_sp *mlxsw_sp; + struct netlink_ext_ack *extack; + unsigned int done; + bool deslavement; +}; + +static int mlxsw_sp_router_replay_inetaddr_up(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data; + bool nomaster = ctx->deslavement; + struct mlxsw_sp_crif *crif; + int err; + + if (mlxsw_sp_dev_addr_list_empty(dev)) + return 0; + + crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev); + if (!crif || crif->rif) + return 0; + + if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP)) + return 0; + + err = __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_UP, + nomaster, ctx->extack); + if (err) + return err; + + ctx->done++; + return 0; +} + +static int mlxsw_sp_router_unreplay_inetaddr_up(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data; + bool nomaster = ctx->deslavement; + struct mlxsw_sp_crif *crif; + + if (!ctx->done) + return 0; + + if (mlxsw_sp_dev_addr_list_empty(dev)) + return 0; + + crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev); + if (!crif || !crif->rif) + return 0; + + /* We are rolling back NETDEV_UP, so ask for that. */ + if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP)) + return 0; + + __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_DOWN, nomaster, + NULL); + + ctx->done--; + return 0; +} + +int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp, + struct net_device *upper_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_router_replay_inetaddr_up ctx = { + .mlxsw_sp = mlxsw_sp, + .extack = extack, + .deslavement = false, + }; + struct netdev_nested_priv priv = { + .data = &ctx, + }; + int err; + + err = mlxsw_sp_router_replay_inetaddr_up(upper_dev, &priv); + if (err) + return err; + + err = netdev_walk_all_upper_dev_rcu(upper_dev, + mlxsw_sp_router_replay_inetaddr_up, + &priv); + if (err) + goto err_replay_up; + + return 0; + +err_replay_up: + netdev_walk_all_upper_dev_rcu(upper_dev, + mlxsw_sp_router_unreplay_inetaddr_up, + &priv); + mlxsw_sp_router_unreplay_inetaddr_up(upper_dev, &priv); + return err; +} + +void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct mlxsw_sp_router_replay_inetaddr_up ctx = { + .mlxsw_sp = mlxsw_sp, + .deslavement = true, + }; + struct netdev_nested_priv priv = { + .data = &ctx, + }; + + mlxsw_sp_router_replay_inetaddr_up(dev, &priv); +} + +static int +mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, + vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; + + return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan, + dev, extack); +} + +static void +mlxsw_sp_port_vid_router_leave(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, + struct net_device *dev) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, + vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return; + + __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); +} + +static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + u16 default_vid = MLXSW_SP_DEFAULT_VID; + struct net_device *upper_dev; + struct list_head *iter; + int done = 0; + u16 vid; + int err; + + err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, default_vid, + lag_dev, extack); + if (err) + return err; + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, vid, + upper_dev, extack); + if (err) + goto err_router_join_dev; + + ++done; + } + + return 0; + +err_router_join_dev: + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + if (!done--) + break; + + vid = vlan_dev_vlan_id(upper_dev); + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev); + } + + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev); + return err; +} + +static void +__mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + u16 default_vid = MLXSW_SP_DEFAULT_VID; + struct net_device *upper_dev; + struct list_head *iter; + u16 vid; + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + vid = vlan_dev_vlan_id(upper_dev); + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev); + } + + mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev); +} + +int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock); + err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack); + mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock); + + return err; +} + +void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock); + __mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev); + mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock); +} + +static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_router *router; + struct mlxsw_sp *mlxsw_sp; + int err = 0; + + router = container_of(nb, struct mlxsw_sp_router, netdevice_nb); + mlxsw_sp = router->mlxsw_sp; + + mutex_lock(&mlxsw_sp->router->lock); + + if (event == NETDEV_REGISTER) { + err = mlxsw_sp_netdevice_register(router, dev); + if (err) + /* No need to roll this back, UNREGISTER will collect it + * anyhow. + */ + goto out; + } + + if (mlxsw_sp_is_offload_xstats_event(event)) + err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_is_router_event(event)) + err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); + else if (mlxsw_sp_is_vrf_event(event, ptr)) + err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + + if (event == NETDEV_UNREGISTER) + mlxsw_sp_netdevice_unregister(router, dev); + +out: + mutex_unlock(&mlxsw_sp->router->lock); + + return notifier_from_errno(err); +} + +struct mlxsw_sp_macvlan_replay { + struct mlxsw_sp *mlxsw_sp; + struct netlink_ext_ack *extack; +}; + +static int mlxsw_sp_macvlan_replay_upper(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + const struct mlxsw_sp_macvlan_replay *rms = priv->data; + struct netlink_ext_ack *extack = rms->extack; + struct mlxsw_sp *mlxsw_sp = rms->mlxsw_sp; + + if (!netif_is_macvlan(dev)) + return 0; + + return mlxsw_sp_rif_macvlan_add(mlxsw_sp, dev, extack); +} + +static int mlxsw_sp_macvlan_replay(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_macvlan_replay rms = { + .mlxsw_sp = rif->mlxsw_sp, + .extack = extack, + }; + struct netdev_nested_priv priv = { + .data = &rms, + }; + + return netdev_walk_all_upper_dev_rcu(mlxsw_sp_rif_dev(rif), + mlxsw_sp_macvlan_replay_upper, + &priv); +} + +static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct mlxsw_sp_rif *rif = data; + struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data; if (!netif_is_macvlan(dev)) return 0; @@ -7108,12 +10325,16 @@ static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data) static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) { - if (!netif_is_macvlan_port(rif->dev)) + struct net_device *dev = mlxsw_sp_rif_dev(rif); + struct netdev_nested_priv priv = { + .data = (void *)rif, + }; + + if (!netif_is_macvlan_port(dev)) return 0; - netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n"); - return netdev_walk_all_upper_dev_rcu(rif->dev, - __mlxsw_sp_rif_macvlan_flush, rif); + return netdev_walk_all_upper_dev_rcu(dev, + __mlxsw_sp_rif_macvlan_flush, &priv); } static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, @@ -7133,56 +10354,85 @@ static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_rif_subport *rif_subport; char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 efid; rif_subport = mlxsw_sp_rif_subport_rif(rif); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); - mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + rif->rif_index, rif->vr_id, dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr); + mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); + efid = mlxsw_sp_fid_index(rif->fid); mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, rif_subport->lag ? rif_subport->lag_id : rif_subport->system_port, - rif_subport->vid); - + efid, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } -static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif) +static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u8 mac_profile; int err; - err = mlxsw_sp_rif_subport_op(rif, true); + err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr, + &mac_profile, extack); if (err) return err; + rif->mac_profile_id = mac_profile; + + err = mlxsw_sp_rif_subport_op(rif, true); + if (err) + goto err_rif_subport_op; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_macvlan_replay(rif, extack); + if (err) + goto err_macvlan_replay; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; - mlxsw_sp_fid_rif_set(rif->fid, rif); + err = mlxsw_sp_fid_rif_set(rif->fid, rif); + if (err) + goto err_fid_rif_set; + return 0; +err_fid_rif_set: + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: + mlxsw_sp_rif_macvlan_flush(rif); +err_macvlan_replay: mlxsw_sp_rif_subport_op(rif, false); +err_rif_subport_op: + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile); return err; } static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_fid *fid = rif->fid; - mlxsw_sp_fid_rif_set(fid, NULL); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_rif_unset(fid); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_rif_subport_op(rif, false); + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); } static struct mlxsw_sp_fid * mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index); @@ -7197,35 +10447,45 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = { .fid_get = mlxsw_sp_rif_subport_fid_get, }; -static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, - enum mlxsw_reg_ritr_if_type type, - u16 vid_fid, bool enable) +static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable) { + enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF; + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, - rif->dev->mtu); - mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); - mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid); + dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr); + mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); + mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } -u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) { return mlxsw_core_max_ports(mlxsw_sp->core) + 1; } -static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) +static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + u16 fid_index = mlxsw_sp_fid_index(rif->fid); + u8 mac_profile; int err; - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true); + err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr, + &mac_profile, extack); if (err) return err; + rif->mac_profile_id = mac_profile; + + err = mlxsw_sp_rif_fid_op(rif, fid_index, true); + if (err) + goto err_rif_fid_op; err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), true); @@ -7237,74 +10497,121 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) if (err) goto err_fid_bc_flood_set; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_macvlan_replay(rif, extack); + if (err) + goto err_macvlan_replay; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; - mlxsw_sp_fid_rif_set(rif->fid, rif); + err = mlxsw_sp_fid_rif_set(rif->fid, rif); + if (err) + goto err_fid_rif_set; + return 0; +err_fid_rif_set: + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: + mlxsw_sp_rif_macvlan_flush(rif); +err_macvlan_replay: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + mlxsw_sp_rif_fid_op(rif, fid_index, false); +err_rif_fid_op: + mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile); return err; } -static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { - u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u16 fid_index = mlxsw_sp_fid_index(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; - mlxsw_sp_fid_rif_set(fid, NULL); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_rif_unset(fid); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); + mlxsw_sp_rif_fid_op(rif, fid_index, false); + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); } static struct mlxsw_sp_fid * +mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack) +{ + int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif); + + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex); +} + +static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) +{ + struct switchdev_notifier_fdb_info info = {}; + struct net_device *dev; + + dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0); + if (!dev) + return; + + info.addr = mac; + info.vid = 0; + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { + .type = MLXSW_SP_RIF_TYPE_FID, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_fid_configure, + .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .fid_get = mlxsw_sp_rif_fid_fid_get, + .fdb_del = mlxsw_sp_rif_fid_fdb_del, +}; + +static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { - struct net_device *br_dev = rif->dev; - u16 vid; - int err; + struct net_device *dev = mlxsw_sp_rif_dev(rif); + struct net_device *br_dev; - if (is_vlan_dev(rif->dev)) { - vid = vlan_dev_vlan_id(rif->dev); - br_dev = vlan_dev_real_dev(rif->dev); + if (WARN_ON(!params->vid)) + return ERR_PTR(-EINVAL); + + if (is_vlan_dev(dev)) { + br_dev = vlan_dev_real_dev(dev); if (WARN_ON(!netif_is_bridge_master(br_dev))) return ERR_PTR(-EINVAL); - } else { - err = br_vlan_get_pvid(rif->dev, &vid); - if (err < 0 || !vid) { - NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID"); - return ERR_PTR(-EINVAL); - } } - return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack); + return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, params->vid); } static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { + struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct switchdev_notifier_fdb_info info = {}; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); - struct switchdev_notifier_fdb_info info; struct net_device *br_dev; struct net_device *dev; - br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev; + br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev; dev = br_fdb_find_port(br_dev, mac, vid); if (!dev) return; @@ -7315,25 +10622,38 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) NULL); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { - .type = MLXSW_SP_RIF_TYPE_VLAN, - .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_vlan_configure, - .deconfigure = mlxsw_sp_rif_vlan_deconfigure, - .fid_get = mlxsw_sp_rif_vlan_fid_get, - .fdb_del = mlxsw_sp_rif_vlan_fdb_del, -}; +static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid, + bool enable) +{ + struct net_device *dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id, + dev->mtu, dev->dev_addr, + rif->mac_profile_id, vid, efid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} -static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) +static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, + struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - u16 fid_index = mlxsw_sp_fid_index(rif->fid); + u8 mac_profile; int err; - err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, - true); + err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr, + &mac_profile, extack); if (err) return err; + rif->mac_profile_id = mac_profile; + + err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true); + if (err) + goto err_rif_vlan_fid_op; err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), true); @@ -7345,78 +10665,85 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) if (err) goto err_fid_bc_flood_set; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_macvlan_replay(rif, extack); + if (err) + goto err_macvlan_replay; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; - mlxsw_sp_fid_rif_set(rif->fid, rif); + err = mlxsw_sp_fid_rif_set(rif->fid, rif); + if (err) + goto err_fid_rif_set; + return 0; +err_fid_rif_set: + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: + mlxsw_sp_rif_macvlan_flush(rif); +err_macvlan_replay: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); err_fid_mc_flood_set: - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); + mlxsw_sp_rif_vlan_op(rif, vid, 0, false); +err_rif_vlan_fid_op: + mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile); return err; } -static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) { - u16 fid_index = mlxsw_sp_fid_index(rif->fid); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; - struct mlxsw_sp_fid *fid = rif->fid; - mlxsw_sp_fid_rif_set(fid, NULL); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(fid), false); + mlxsw_sp_fid_rif_unset(rif->fid); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, mlxsw_sp_router_port(mlxsw_sp), false); - mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); + mlxsw_sp_rif_vlan_op(rif, vid, 0, false); + mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id); } -static struct mlxsw_sp_fid * -mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, - struct netlink_ext_ack *extack) +static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) { - return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack); + return mlxsw_sp_rif_vlan_configure(rif, 0, extack); } -static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) -{ - struct switchdev_notifier_fdb_info info; - struct net_device *dev; +static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = { + .type = MLXSW_SP_RIF_TYPE_VLAN, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp1_rif_vlan_configure, + .deconfigure = mlxsw_sp_rif_vlan_deconfigure, + .fid_get = mlxsw_sp_rif_vlan_fid_get, + .fdb_del = mlxsw_sp_rif_vlan_fdb_del, +}; - dev = br_fdb_find_port(rif->dev, mac, 0); - if (!dev) - return; +static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) +{ + u16 efid = mlxsw_sp_fid_index(rif->fid); - info.addr = mac; - info.vid = 0; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, - NULL); + return mlxsw_sp_rif_vlan_configure(rif, efid, extack); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { - .type = MLXSW_SP_RIF_TYPE_FID, - .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_fid_configure, - .deconfigure = mlxsw_sp_rif_fid_deconfigure, - .fid_get = mlxsw_sp_rif_fid_fid_get, - .fdb_del = mlxsw_sp_rif_fid_fdb_del, -}; - -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = { +static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = { .type = MLXSW_SP_RIF_TYPE_VLAN, .rif_size = sizeof(struct mlxsw_sp_rif), - .configure = mlxsw_sp_rif_fid_configure, - .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .configure = mlxsw_sp2_rif_vlan_configure, + .deconfigure = mlxsw_sp_rif_vlan_deconfigure, .fid_get = mlxsw_sp_rif_vlan_fid_get, .fdb_del = mlxsw_sp_rif_vlan_fdb_del, }; @@ -7441,15 +10768,17 @@ mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, } static int -mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_vr *ul_vr; int err; - ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL); + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack); if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); @@ -7488,9 +10817,9 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure, }; -const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { +static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, - [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp1_rif_vlan_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops, }; @@ -7511,47 +10840,61 @@ mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *ul_rif; + u8 rif_entries = 1; u16 rif_index; int err; - err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); return ERR_PTR(err); } - ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); - if (!ul_rif) - return ERR_PTR(-ENOMEM); + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, + ul_crif); + if (!ul_rif) { + err = -ENOMEM; + goto err_rif_alloc; + } mlxsw_sp->router->rifs[rif_index] = ul_rif; ul_rif->mlxsw_sp = mlxsw_sp; + ul_rif->rif_entries = rif_entries; err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); if (err) goto ul_rif_op_err; + atomic_add(rif_entries, &mlxsw_sp->router->rifs_count); return ul_rif; ul_rif_op_err: mlxsw_sp->router->rifs[rif_index] = NULL; - kfree(ul_rif); + mlxsw_sp_rif_free(ul_rif); +err_rif_alloc: + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); return ERR_PTR(err); } static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) { struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + u8 rif_entries = ul_rif->rif_entries; + u16 rif_index = ul_rif->rif_index; + atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; - kfree(ul_rif); + mlxsw_sp_rif_free(ul_rif); + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); } static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; @@ -7564,7 +10907,7 @@ mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) return vr->ul_rif; - vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack); if (IS_ERR(vr->ul_rif)) { err = PTR_ERR(vr->ul_rif); goto err_ul_rif_create; @@ -7599,40 +10942,46 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, u16 *ul_rif_index) { struct mlxsw_sp_rif *ul_rif; + int err = 0; - ASSERT_RTNL(); - - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); - if (IS_ERR(ul_rif)) - return PTR_ERR(ul_rif); + mutex_lock(&mlxsw_sp->router->lock); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL); + if (IS_ERR(ul_rif)) { + err = PTR_ERR(ul_rif); + goto out; + } *ul_rif_index = ul_rif->rif_index; - - return 0; +out: + mutex_unlock(&mlxsw_sp->router->lock); + return err; } void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index) { struct mlxsw_sp_rif *ul_rif; - ASSERT_RTNL(); - + mutex_lock(&mlxsw_sp->router->lock); ul_rif = mlxsw_sp->router->rifs[ul_rif_index]; if (WARN_ON(!ul_rif)) - return; + goto out; mlxsw_sp_ul_rif_put(ul_rif); +out: + mutex_unlock(&mlxsw_sp->router->lock); } static int -mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, + struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_rif *ul_rif; int err; - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack); if (IS_ERR(ul_rif)) return PTR_ERR(ul_rif); @@ -7669,16 +11018,55 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure, }; -const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { +static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, - [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp2_rif_vlan_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; +static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct gen_pool *rifs_table; + int err; + + rifs_table = gen_pool_create(0, -1); + if (!rifs_table) + return -ENOMEM; + + gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align, + NULL); + + err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET, + MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1); + if (err) + goto err_gen_pool_add; + + mlxsw_sp->router->rifs_table = rifs_table; + + return 0; + +err_gen_pool_add: + gen_pool_destroy(rifs_table); + return err; +} + +static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + gen_pool_destroy(mlxsw_sp->router->rifs_table); +} + static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) { u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_core *core = mlxsw_sp->core; + int err; + + if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES)) + return -EIO; + mlxsw_sp->router->max_rif_mac_profile = + MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES); mlxsw_sp->router->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), @@ -7686,16 +11074,45 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->router->rifs) return -ENOMEM; + err = mlxsw_sp_rifs_table_init(mlxsw_sp); + if (err) + goto err_rifs_table_init; + + idr_init(&mlxsw_sp->router->rif_mac_profiles_idr); + atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0); + atomic_set(&mlxsw_sp->router->rifs_count, 0); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_RIF_MAC_PROFILES, + mlxsw_sp_rif_mac_profiles_occ_get, + mlxsw_sp); + devl_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_RIFS, + mlxsw_sp_rifs_occ_get, + mlxsw_sp); + return 0; + +err_rifs_table_init: + kfree(mlxsw_sp->router->rifs); + return err; } static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) { + int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count)); + for (i = 0; i < max_rifs; i++) WARN_ON_ONCE(mlxsw_sp->router->rifs[i]); + devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS); + devl_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_RIF_MAC_PROFILES); + WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr)); + idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr); + mlxsw_sp_rifs_table_fini(mlxsw_sp); kfree(mlxsw_sp->router->rifs); } @@ -7710,11 +11127,32 @@ mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) { - mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; + int err; + INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + + err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp); + if (err) + return err; + err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp); + if (err) + return err; + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); } +static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr; + return mlxsw_sp_ipips_init(mlxsw_sp); +} + +static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr; + return mlxsw_sp_ipips_init(mlxsw_sp); +} + static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) { WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list)); @@ -7734,73 +11172,295 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) } #ifdef CONFIG_IP_ROUTE_MULTIPATH -static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header) +struct mlxsw_sp_mp_hash_config { + DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT); + DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT); + DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT); + DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT); + bool inc_parsing_depth; +}; + +#define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \ + bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1) + +#define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \ + bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1) + +#define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \ + bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr) + +static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config) { - mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true); + unsigned long *inner_headers = config->inner_headers; + unsigned long *inner_fields = config->inner_fields; + + /* IPv4 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4); + /* IPv6 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL); } -static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) +static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config) { - mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; + + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4); +} + +static void +mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config, + u32 hash_fields) +{ + unsigned long *inner_headers = config->inner_headers; + unsigned long *inner_fields = config->inner_fields; + + /* IPv4 Inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL); + /* IPv6 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL); + /* L4 inner */ + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4); + MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT); +} + +static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mp_hash_config *config) +{ + struct net *net = mlxsw_sp_net(mlxsw_sp); + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; + u32 hash_fields; + + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { + case 0: + mlxsw_sp_mp4_hash_outer_addr(config); + break; + case 1: + mlxsw_sp_mp4_hash_outer_addr(config); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + break; + case 2: + /* Outer */ + mlxsw_sp_mp4_hash_outer_addr(config); + /* Inner */ + mlxsw_sp_mp_hash_inner_l3(config); + break; + case 3: + hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); + /* Outer */ + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + /* Inner */ + mlxsw_sp_mp_hash_inner_custom(config, hash_fields); + break; + } } -static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config) { - bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; - mlxsw_sp_mp_hash_header_set(recr2_pl, - MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); - mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP); - mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl); - mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl); - if (only_l3) - return; - mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4); - mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL); - mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT); - mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8); } -static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mp_hash_config *config) { - bool only_l3 = !ip6_multipath_hash_policy(&init_net); + u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp)); + unsigned long *headers = config->headers; + unsigned long *fields = config->fields; - mlxsw_sp_mp_hash_header_set(recr2_pl, - MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); - mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); - mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); - mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); - mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); - if (only_l3) { - mlxsw_sp_mp_hash_field_set(recr2_pl, - MLXSW_REG_RECR2_IPV6_FLOW_LABEL); - } else { - mlxsw_sp_mp_hash_header_set(recr2_pl, - MLXSW_REG_RECR2_TCP_UDP_EN_IPV6); - mlxsw_sp_mp_hash_field_set(recr2_pl, - MLXSW_REG_RECR2_TCP_UDP_SPORT); - mlxsw_sp_mp_hash_field_set(recr2_pl, - MLXSW_REG_RECR2_TCP_UDP_DPORT); + switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) { + case 0: + mlxsw_sp_mp6_hash_outer_addr(config); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL); + break; + case 1: + mlxsw_sp_mp6_hash_outer_addr(config); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + break; + case 2: + /* Outer */ + mlxsw_sp_mp6_hash_outer_addr(config); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL); + /* Inner */ + mlxsw_sp_mp_hash_inner_l3(config); + config->inc_parsing_depth = true; + break; + case 3: + /* Outer */ + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP); + MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) { + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7); + MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8); + } + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT); + /* Inner */ + mlxsw_sp_mp_hash_inner_custom(config, hash_fields); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK) + config->inc_parsing_depth = true; + break; + } +} + +static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp, + bool old_inc_parsing_depth, + bool new_inc_parsing_depth) +{ + int err; + + if (!old_inc_parsing_depth && new_inc_parsing_depth) { + err = mlxsw_sp_parsing_depth_inc(mlxsw_sp); + if (err) + return err; + mlxsw_sp->router->inc_parsing_depth = true; + } else if (old_inc_parsing_depth && !new_inc_parsing_depth) { + mlxsw_sp_parsing_depth_dec(mlxsw_sp); + mlxsw_sp->router->inc_parsing_depth = false; } + + return 0; } static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) { + bool old_inc_parsing_depth, new_inc_parsing_depth; + struct mlxsw_sp_mp_hash_config config = {}; + struct net *net = mlxsw_sp_net(mlxsw_sp); char recr2_pl[MLXSW_REG_RECR2_LEN]; + unsigned long bit; u32 seed; + int err; + + seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).user_seed; + if (!seed) + seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0); - get_random_bytes(&seed, sizeof(seed)); mlxsw_reg_recr2_pack(recr2_pl, seed); - mlxsw_sp_mp4_hash_init(recr2_pl); - mlxsw_sp_mp6_hash_init(recr2_pl); + mlxsw_sp_mp4_hash_init(mlxsw_sp, &config); + mlxsw_sp_mp6_hash_init(mlxsw_sp, &config); + + old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth; + new_inc_parsing_depth = config.inc_parsing_depth; + err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, + old_inc_parsing_depth, + new_inc_parsing_depth); + if (err) + return err; + + for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT) + mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1); + for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT) + mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1); + for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT) + mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1); + for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT) + mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); + if (err) + goto err_reg_write; + + return 0; + +err_reg_write: + mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth, + old_inc_parsing_depth); + return err; +} + +static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp) +{ + bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth; + + mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth, + false); } #else static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) { return 0; } + +static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp) +{ +} #endif static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) @@ -7823,22 +11483,20 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { - bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority; + struct net *net = mlxsw_sp_net(mlxsw_sp); char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; - int err; + bool usp; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority); mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); mlxsw_reg_rgcr_usp_set(rgcr_pl, usp); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - if (err) - return err; - return 0; + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) @@ -7849,39 +11507,113 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_rif *lb_rif; + int err; + + router->lb_crif = mlxsw_sp_crif_alloc(NULL); + if (!router->lb_crif) + return -ENOMEM; + + /* Create a generic loopback RIF associated with the main table + * (default VRF). Any table can be used, but the main table exists + * anyway, so we do not waste resources. Loopback RIFs are usually + * created with a NULL CRIF, but this RIF is used as a fallback RIF + * for blackhole nexthops, and nexthops expect to have a valid CRIF. + */ + lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif, + extack); + if (IS_ERR(lb_rif)) { + err = PTR_ERR(lb_rif); + goto err_ul_rif_get; + } + + return 0; + +err_ul_rif_get: + mlxsw_sp_crif_free(router->lb_crif); + return err; +} + +static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif); + mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif); +} + +static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges); + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr; + mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges; + mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count; + + return 0; +} + +const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = { + .init = mlxsw_sp1_router_init, + .ipips_init = mlxsw_sp1_ipips_init, +}; + +static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges); + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges; + mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count; + + return 0; +} + +const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = { + .init = mlxsw_sp2_router_init, + .ipips_init = mlxsw_sp2_ipips_init, +}; + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { struct mlxsw_sp_router *router; + struct notifier_block *nb; int err; router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); if (!router) return -ENOMEM; + mutex_init(&router->lock); mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; - router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; - err = register_inetaddr_notifier(&router->inetaddr_nb); + err = mlxsw_sp->router_ops->init(mlxsw_sp); if (err) - goto err_register_inetaddr_notifier; - - router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event; - err = register_inet6addr_notifier(&router->inet6addr_nb); - if (err) - goto err_register_inet6addr_notifier; + goto err_router_ops_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list); + INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw, + mlxsw_sp_nh_grp_activity_work); INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) goto err_router_init; - err = mlxsw_sp_rifs_init(mlxsw_sp); + err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp); if (err) - goto err_rifs_init; + goto err_ipips_init; - err = mlxsw_sp_ipips_init(mlxsw_sp); + err = rhashtable_init(&mlxsw_sp->router->crif_ht, + &mlxsw_sp_crif_ht_params); if (err) - goto err_ipips_init; + goto err_crif_ht_init; + + err = mlxsw_sp_rifs_init(mlxsw_sp); + if (err) + goto err_rifs_init; err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, &mlxsw_sp_nexthop_ht_params); @@ -7906,39 +11638,98 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_vrs_init; + err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack); + if (err) + goto err_lb_rif_init; + err = mlxsw_sp_neigh_init(mlxsw_sp); if (err) goto err_neigh_init; + err = mlxsw_sp_mp_hash_init(mlxsw_sp); + if (err) + goto err_mp_hash_init; + + err = mlxsw_sp_dscp_init(mlxsw_sp); + if (err) + goto err_dscp_init; + + router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; + err = register_inetaddr_notifier(&router->inetaddr_nb); + if (err) + goto err_register_inetaddr_notifier; + + router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event; + err = register_inet6addr_notifier(&router->inet6addr_nb); + if (err) + goto err_register_inet6addr_notifier; + + router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event; + err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb); + if (err) + goto err_register_inetaddr_valid_notifier; + + nb = &router->inet6addr_valid_nb; + nb->notifier_call = mlxsw_sp_inet6addr_valid_event; + err = register_inet6addr_validator_notifier(nb); + if (err) + goto err_register_inet6addr_valid_notifier; + mlxsw_sp->router->netevent_nb.notifier_call = mlxsw_sp_router_netevent_event; err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); if (err) goto err_register_netevent_notifier; - err = mlxsw_sp_mp_hash_init(mlxsw_sp); + mlxsw_sp->router->netdevice_nb.notifier_call = + mlxsw_sp_router_netdevice_event; + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->netdevice_nb); if (err) - goto err_mp_hash_init; + goto err_register_netdev_notifier; - err = mlxsw_sp_dscp_init(mlxsw_sp); + mlxsw_sp->router->nexthop_nb.notifier_call = + mlxsw_sp_nexthop_obj_event; + err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb, + extack); if (err) - goto err_dscp_init; + goto err_register_nexthop_notifier; mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - err = register_fib_notifier(&mlxsw_sp->router->fib_nb, - mlxsw_sp_router_fib_dump_flush); + err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb, + mlxsw_sp_router_fib_dump_flush, extack); if (err) goto err_register_fib_notifier; return 0; err_register_fib_notifier: -err_dscp_init: -err_mp_hash_init: + unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->nexthop_nb); +err_register_nexthop_notifier: + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &router->netdevice_nb); +err_register_netdev_notifier: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: + unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); +err_register_inet6addr_valid_notifier: + unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); +err_register_inetaddr_valid_notifier: + unregister_inet6addr_notifier(&router->inet6addr_nb); +err_register_inet6addr_notifier: + unregister_inetaddr_notifier(&router->inetaddr_nb); +err_register_inetaddr_notifier: + mlxsw_core_flush_owq(); +err_dscp_init: + mlxsw_sp_mp_hash_fini(mlxsw_sp); +err_mp_hash_init: mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: + mlxsw_sp_lb_rif_fini(mlxsw_sp); +err_lb_rif_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: mlxsw_sp_mr_fini(mlxsw_sp); @@ -7949,34 +11740,49 @@ err_lpm_init: err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); err_nexthop_ht_init: - mlxsw_sp_ipips_fini(mlxsw_sp); -err_ipips_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: + rhashtable_destroy(&mlxsw_sp->router->crif_ht); +err_crif_ht_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: - unregister_inet6addr_notifier(&router->inet6addr_nb); -err_register_inet6addr_notifier: - unregister_inetaddr_notifier(&router->inetaddr_nb); -err_register_inetaddr_notifier: + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); +err_router_ops_init: + mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; } void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { - unregister_fib_notifier(&mlxsw_sp->router->fib_nb); - unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); + struct mlxsw_sp_router *router = mlxsw_sp->router; + + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb); + unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), + &router->nexthop_nb); + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &router->netdevice_nb); + unregister_netevent_notifier(&router->netevent_nb); + unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); + unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); + unregister_inet6addr_notifier(&router->inet6addr_nb); + unregister_inetaddr_notifier(&router->inetaddr_nb); + mlxsw_core_flush_owq(); + mlxsw_sp_mp_hash_fini(mlxsw_sp); mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_lb_rif_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); - rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); - rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); - mlxsw_sp_ipips_fini(mlxsw_sp); + rhashtable_destroy(&router->nexthop_group_ht); + rhashtable_destroy(&router->nexthop_ht); mlxsw_sp_rifs_fini(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->router->crif_ht); + mlxsw_sp_ipips_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); - unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); - unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); - kfree(mlxsw_sp->router); + cancel_delayed_work_sync(&router->nh_grp_activity_dw); + mutex_destroy(&router->lock); + kfree(router); } |
