diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlxsw/spectrum.c')
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 996 |
1 files changed, 560 insertions, 436 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 02a327744a61..9a2d64a0a858 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -107,82 +107,16 @@ static const unsigned char mlxsw_sp2_mac_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xf0, 0x00 }; -/* tx_hdr_version - * Tx header version. - * Must be set to 1. - */ -MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4); - -/* tx_hdr_ctl - * Packet control type. - * 0 - Ethernet control (e.g. EMADs, LACP) - * 1 - Ethernet data - */ -MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2); - -/* tx_hdr_proto - * Packet protocol type. Must be set to 1 (Ethernet). - */ -MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3); - -/* tx_hdr_rx_is_router - * Packet is sent from the router. Valid for data packets only. - */ -MLXSW_ITEM32(tx, hdr, rx_is_router, 0x00, 19, 1); - -/* tx_hdr_fid_valid - * Indicates if the 'fid' field is valid and should be used for - * forwarding lookup. Valid for data packets only. - */ -MLXSW_ITEM32(tx, hdr, fid_valid, 0x00, 16, 1); - -/* tx_hdr_swid - * Switch partition ID. Must be set to 0. - */ -MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3); - -/* tx_hdr_control_tclass - * Indicates if the packet should use the control TClass and not one - * of the data TClasses. - */ -MLXSW_ITEM32(tx, hdr, control_tclass, 0x00, 6, 1); - -/* tx_hdr_etclass - * Egress TClass to be used on the egress device on the egress port. - */ -MLXSW_ITEM32(tx, hdr, etclass, 0x00, 0, 4); - -/* tx_hdr_port_mid - * Destination local port for unicast packets. - * Destination multicast ID for multicast packets. - * - * Control packets are directed to a specific egress port, while data - * packets are transmitted through the CPU port (0) into the switch partition, - * where forwarding rules are applied. - */ -MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16); - -/* tx_hdr_fid - * Forwarding ID used for L2 forwarding lookup. Valid only if 'fid_valid' is - * set, otherwise calculated based on the packet's VID using VID to FID mapping. - * Valid for data packets only. - */ -MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16); - -/* tx_hdr_type - * 0 - Data packets - * 6 - Control packets - */ -MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); - int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, - unsigned int counter_index, u64 *packets, - u64 *bytes) + unsigned int counter_index, bool clear, + u64 *packets, u64 *bytes) { + enum mlxsw_reg_mgpc_opcode op = clear ? MLXSW_REG_MGPC_OPCODE_CLEAR : + MLXSW_REG_MGPC_OPCODE_NOP; char mgpc_pl[MLXSW_REG_MGPC_LEN]; int err; - mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, op, MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); if (err) @@ -231,61 +165,6 @@ void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, counter_index); } -void mlxsw_sp_txhdr_construct(struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); - - memset(txhdr, 0, MLXSW_TXHDR_LEN); - - mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); - mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); - mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); - mlxsw_tx_hdr_swid_set(txhdr, 0); - mlxsw_tx_hdr_control_tclass_set(txhdr, 1); - mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port); - mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); -} - -int -mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - char *txhdr; - u16 max_fid; - int err; - - if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { - err = -ENOMEM; - goto err_skb_cow_head; - } - - if (!MLXSW_CORE_RES_VALID(mlxsw_core, FID)) { - err = -EIO; - goto err_res_valid; - } - max_fid = MLXSW_CORE_RES_GET(mlxsw_core, FID); - - txhdr = skb_push(skb, MLXSW_TXHDR_LEN); - memset(txhdr, 0, MLXSW_TXHDR_LEN); - - mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); - mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); - mlxsw_tx_hdr_rx_is_router_set(txhdr, true); - mlxsw_tx_hdr_fid_valid_set(txhdr, true); - mlxsw_tx_hdr_fid_set(txhdr, max_fid + tx_info->local_port - 1); - mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA); - return 0; - -err_res_valid: -err_skb_cow_head: - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb); - return err; -} - static bool mlxsw_sp_skb_requires_ts(struct sk_buff *skb) { unsigned int type; @@ -297,30 +176,49 @@ static bool mlxsw_sp_skb_requires_ts(struct sk_buff *skb) return !!ptp_parse_header(skb, type); } -static int mlxsw_sp_txhdr_handle(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) +static void mlxsw_sp_txhdr_info_data_init(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, + struct mlxsw_txhdr_info *txhdr_info) { - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + /* Resource validation was done as part of PTP init. */ + u16 max_fid = MLXSW_CORE_RES_GET(mlxsw_core, FID); - /* In Spectrum-2 and Spectrum-3, PTP events that require a time stamp - * need special handling and cannot be transmitted as regular control - * packets. + txhdr_info->data = true; + txhdr_info->max_fid = max_fid; +} + +static struct sk_buff * +mlxsw_sp_vlan_tag_push(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb) +{ + /* In some Spectrum ASICs, in order for PTP event packets to have their + * correction field correctly set on the egress port they must be + * transmitted as data packets. Such packets ingress the ASIC via the + * CPU port and must have a VLAN tag, as the CPU port is not configured + * with a PVID. Push the default VLAN (4095), which is configured as + * egress untagged on all the ports. */ - if (unlikely(mlxsw_sp_skb_requires_ts(skb))) - return mlxsw_sp->ptp_ops->txhdr_construct(mlxsw_core, - mlxsw_sp_port, skb, - tx_info); + if (skb_vlan_tagged(skb)) + return skb; - if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb); - return -ENOMEM; - } + return vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q), + MLXSW_SP_DEFAULT_VID); +} - mlxsw_sp_txhdr_construct(skb, tx_info); - return 0; +static struct sk_buff * +mlxsw_sp_txhdr_preparations(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + struct mlxsw_txhdr_info *txhdr_info) +{ + if (likely(!mlxsw_sp_skb_requires_ts(skb))) + return skb; + + if (!mlxsw_sp->ptp_ops->tx_as_data) + return skb; + + /* Special handling for PTP events that require a time stamp and cannot + * be transmitted as regular control packets. + */ + mlxsw_sp_txhdr_info_data_init(mlxsw_sp->core, skb, txhdr_info); + return mlxsw_sp_vlan_tag_push(mlxsw_sp, skb); } enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state) @@ -403,29 +301,12 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port->dev->dev_addr); } -static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char pmtu_pl[MLXSW_REG_PMTU_LEN]; - int err; - - mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); - if (err) - return err; - - *p_max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); - return 0; -} - static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmtu_pl[MLXSW_REG_PMTU_LEN]; - mtu += MLXSW_TXHDR_LEN + ETH_HLEN; - if (mtu > mlxsw_sp_port->max_mtu) - return -EINVAL; + mtu += MLXSW_PORT_ETH_FRAME_HDR; mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); @@ -736,16 +617,16 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_pcpu_stats *pcpu_stats; - const struct mlxsw_tx_info tx_info = { - .local_port = mlxsw_sp_port->local_port, - .is_emad = false, + struct mlxsw_txhdr_info txhdr_info = { + .tx_info.local_port = mlxsw_sp_port->local_port, + .tx_info.is_emad = false, }; u64 len; int err; memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); - if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &txhdr_info.tx_info)) return NETDEV_TX_BUSY; if (eth_skb_pad(skb)) { @@ -753,10 +634,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - err = mlxsw_sp_txhdr_handle(mlxsw_sp->core, mlxsw_sp_port, skb, - &tx_info); - if (err) + skb = mlxsw_sp_txhdr_preparations(mlxsw_sp, skb, &txhdr_info); + if (!skb) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); return NETDEV_TX_OK; + } /* TX header is consumed by HW on the way so we shouldn't count its * bytes as being sent. @@ -766,7 +648,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ - err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &tx_info); + err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &txhdr_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); @@ -823,7 +705,7 @@ static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); if (err) goto err_port_mtu_set; - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); return 0; err_port_mtu_set: @@ -1132,8 +1014,8 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid)); } -static int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid) +int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; @@ -1277,63 +1159,31 @@ static int mlxsw_sp_set_features(struct net_device *dev, return 0; } -static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; - int err; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + config, extack); } -static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; - int err; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + config); } static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) { - struct hwtstamp_config config = {0}; - - mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); -} + struct kernel_hwtstamp_config config = {}; -static int -mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - - switch (cmd) { - case SIOCSHWTSTAMP: - return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); - case SIOCGHWTSTAMP: - return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); - default: - return -EOPNOTSUPP; - } + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config, + NULL); } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -1350,7 +1200,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, - .ndo_eth_ioctl = mlxsw_sp_port_ioctl, + .ndo_hwtstamp_get = mlxsw_sp_port_hwtstamp_get, + .ndo_hwtstamp_set = mlxsw_sp_port_hwtstamp_set, }; static int @@ -1691,12 +1542,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); - dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | - NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; - dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_TC | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->lltx = true; + dev->netns_immutable = true; - dev->min_mtu = 0; - dev->max_mtu = ETH_MAX_MTU; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR; /* Each packet needs to have a Tx header (metadata) on top all other * headers. @@ -1725,13 +1580,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, goto err_max_speed_get; } - err = mlxsw_sp_port_max_mtu_get(mlxsw_sp_port, &mlxsw_sp_port->max_mtu); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum MTU\n", - mlxsw_sp_port->local_port); - goto err_port_max_mtu_get; - } - err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", @@ -1875,7 +1723,6 @@ err_port_ets_init: err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: -err_port_max_mtu_get: err_max_speed_get: err_port_speed_by_width_set: err_port_system_port_mapping_set: @@ -2470,7 +2317,7 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, u64_stats_update_end(&pcpu_stats->syncp); skb->protocol = eth_type_trans(skb, skb->dev); - netif_receive_skb(skb); + napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb); } static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u16 local_port, @@ -2528,11 +2375,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { ROUTER_EXP, false), MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), /* Multicast Router Traps */ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), - /* NVE traps */ - MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false), }; static const struct mlxsw_listener mlxsw_sp1_listener[] = { @@ -2692,12 +2539,62 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->trap); } +static int mlxsw_sp_lag_pgt_init(struct mlxsw_sp *mlxsw_sp) +{ + char sgcr_pl[MLXSW_REG_SGCR_LEN]; + int err; + + if (mlxsw_core_lag_mode(mlxsw_sp->core) != + MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW) + return 0; + + /* In DDD mode, which we by default use, each LAG entry is 8 PGT + * entries. The LAG table address needs to be 8-aligned, but that ought + * to be the case, since the LAG table is allocated first. + */ + err = mlxsw_sp_pgt_mid_alloc_range(mlxsw_sp, &mlxsw_sp->lag_pgt_base, + mlxsw_sp->max_lag * 8); + if (err) + return err; + if (WARN_ON_ONCE(mlxsw_sp->lag_pgt_base % 8)) { + err = -EINVAL; + goto err_mid_alloc_range; + } + + mlxsw_reg_sgcr_pack(sgcr_pl, mlxsw_sp->lag_pgt_base); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sgcr), sgcr_pl); + if (err) + goto err_mid_alloc_range; + + return 0; + +err_mid_alloc_range: + mlxsw_sp_pgt_mid_free_range(mlxsw_sp, mlxsw_sp->lag_pgt_base, + mlxsw_sp->max_lag * 8); + return err; +} + +static void mlxsw_sp_lag_pgt_fini(struct mlxsw_sp *mlxsw_sp) +{ + if (mlxsw_core_lag_mode(mlxsw_sp->core) != + MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW) + return; + + mlxsw_sp_pgt_mid_free_range(mlxsw_sp, mlxsw_sp->lag_pgt_base, + mlxsw_sp->max_lag * 8); +} + #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe +struct mlxsw_sp_lag { + struct net_device *dev; + refcount_t ref_count; + u16 lag_id; +}; + static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { char slcr_pl[MLXSW_REG_SLCR_LEN]; - u16 max_lag; u32 seed; int err; @@ -2716,23 +2613,34 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) if (err) return err; - err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag); + err = mlxsw_core_max_lag(mlxsw_sp->core, &mlxsw_sp->max_lag); if (err) return err; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG_MEMBERS)) return -EIO; - mlxsw_sp->lags = kcalloc(max_lag, sizeof(struct mlxsw_sp_upper), + err = mlxsw_sp_lag_pgt_init(mlxsw_sp); + if (err) + return err; + + mlxsw_sp->lags = kcalloc(mlxsw_sp->max_lag, sizeof(struct mlxsw_sp_lag), GFP_KERNEL); - if (!mlxsw_sp->lags) - return -ENOMEM; + if (!mlxsw_sp->lags) { + err = -ENOMEM; + goto err_kcalloc; + } return 0; + +err_kcalloc: + mlxsw_sp_lag_pgt_fini(mlxsw_sp); + return err; } static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) { + mlxsw_sp_lag_pgt_fini(mlxsw_sp); kfree(mlxsw_sp->lags); } @@ -2746,11 +2654,12 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get, .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, .shaper_work = mlxsw_sp1_ptp_shaper_work, +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) .get_ts_info = mlxsw_sp1_ptp_get_ts_info, +#endif .get_stats_count = mlxsw_sp1_get_stats_count, .get_stats_strings = mlxsw_sp1_get_stats_strings, .get_stats = mlxsw_sp1_get_stats, - .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { @@ -2763,11 +2672,13 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, .shaper_work = mlxsw_sp2_ptp_shaper_work, +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) .get_ts_info = mlxsw_sp2_ptp_get_ts_info, +#endif .get_stats_count = mlxsw_sp2_get_stats_count, .get_stats_strings = mlxsw_sp2_get_stats_strings, .get_stats = mlxsw_sp2_get_stats, - .txhdr_construct = mlxsw_sp2_ptp_txhdr_construct, + .tx_as_data = true, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = { @@ -2780,11 +2691,12 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = { .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, .shaper_work = mlxsw_sp2_ptp_shaper_work, +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) .get_ts_info = mlxsw_sp2_ptp_get_ts_info, +#endif .get_stats_count = mlxsw_sp2_get_stats_count, .get_stats_strings = mlxsw_sp2_get_stats_strings, .get_stats = mlxsw_sp2_get_stats, - .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, }; struct mlxsw_sp_sample_trigger_node { @@ -3113,10 +3025,19 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_pgt_init; } - err = mlxsw_sp_fids_init(mlxsw_sp); + /* Initialize before FIDs so that the LAG table is at the start of PGT + * and 8-aligned without overallocation. + */ + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + + err = mlxsw_sp->fid_core_ops->init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n"); - goto err_fids_init; + goto err_fid_core_init; } err = mlxsw_sp_policers_init(mlxsw_sp); @@ -3143,12 +3064,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_buffers_init; } - err = mlxsw_sp_lag_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); - goto err_lag_init; - } - /* Initialize SPAN before router and switchdev, so that those components * can call mlxsw_sp_span_respin(). */ @@ -3188,6 +3103,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_nve_init; } + err = mlxsw_sp_port_range_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize port ranges\n"); + goto err_port_range_init; + } + err = mlxsw_sp_acl_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); @@ -3280,6 +3201,8 @@ err_ptp_clock_init: err_router_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: + mlxsw_sp_port_range_fini(mlxsw_sp); +err_port_range_init: mlxsw_sp_nve_fini(mlxsw_sp); err_nve_init: mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp); @@ -3292,8 +3215,6 @@ err_counter_pool_init: err_switchdev_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: - mlxsw_sp_lag_fini(mlxsw_sp); -err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: mlxsw_sp_devlink_traps_fini(mlxsw_sp); @@ -3302,8 +3223,10 @@ err_devlink_traps_init: err_traps_init: mlxsw_sp_policers_fini(mlxsw_sp); err_policers_init: - mlxsw_sp_fids_fini(mlxsw_sp); -err_fids_init: + mlxsw_sp->fid_core_ops->fini(mlxsw_sp); +err_fid_core_init: + mlxsw_sp_lag_fini(mlxsw_sp); +err_lag_init: mlxsw_sp_pgt_fini(mlxsw_sp); err_pgt_init: mlxsw_sp_kvdl_fini(mlxsw_sp); @@ -3337,7 +3260,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp1_router_ops; mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); - mlxsw_sp->fid_family_arr = mlxsw_sp1_fid_family_arr; + mlxsw_sp->fid_core_ops = &mlxsw_sp1_fid_core_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1; mlxsw_sp->pgt_smpe_index_valid = true; @@ -3371,7 +3294,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->listeners = mlxsw_sp2_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); - mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; + mlxsw_sp->fid_core_ops = &mlxsw_sp2_fid_core_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2; mlxsw_sp->pgt_smpe_index_valid = false; @@ -3405,7 +3328,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->listeners = mlxsw_sp2_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); - mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; + mlxsw_sp->fid_core_ops = &mlxsw_sp2_fid_core_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; mlxsw_sp->pgt_smpe_index_valid = false; @@ -3439,7 +3362,7 @@ static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->listeners = mlxsw_sp2_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp2_listener); - mlxsw_sp->fid_family_arr = mlxsw_sp2_fid_family_arr; + mlxsw_sp->fid_core_ops = &mlxsw_sp2_fid_core_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP4; mlxsw_sp->pgt_smpe_index_valid = false; @@ -3462,18 +3385,19 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) } mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); + mlxsw_sp_port_range_fini(mlxsw_sp); mlxsw_sp_nve_fini(mlxsw_sp); mlxsw_sp_ipv6_addr_ht_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); - mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_devlink_traps_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_policers_fini(mlxsw_sp); - mlxsw_sp_fids_fini(mlxsw_sp); + mlxsw_sp->fid_core_ops->fini(mlxsw_sp); + mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_pgt_fini(mlxsw_sp); mlxsw_sp_kvdl_fini(mlxsw_sp); mlxsw_sp_parsing_fini(mlxsw_sp); @@ -3517,6 +3441,8 @@ static const struct mlxsw_config_profile mlxsw_sp2_config_profile = { }, .used_cqe_time_stamp_type = 1, .cqe_time_stamp_type = MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_UTC, + .lag_mode_prefer_sw = true, + .flood_mode_prefer_cff = true, }; /* Reduce number of LAGs from full capacity (256) to the maximum supported LAGs @@ -3544,6 +3470,8 @@ static const struct mlxsw_config_profile mlxsw_sp4_config_profile = { }, .used_cqe_time_stamp_type = 1, .cqe_time_stamp_type = MLXSW_CMD_MBOX_CONFIG_PROFILE_CQE_TIME_STAMP_TYPE_UTC, + .lag_mode_prefer_sw = true, + .flood_mode_prefer_cff = true, }; static void @@ -3730,6 +3658,26 @@ static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core) &size_params); } +static int +mlxsw_sp_resources_port_range_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params size_params; + u64 max; + + if (!MLXSW_CORE_RES_VALID(mlxsw_core, ACL_MAX_L4_PORT_RANGE)) + return -EIO; + + max = MLXSW_CORE_RES_GET(mlxsw_core, ACL_MAX_L4_PORT_RANGE); + devlink_resource_size_params_init(&size_params, max, max, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_resource_register(devlink, "port_range_registers", max, + MLXSW_SP_RESOURCE_PORT_RANGE_REGISTERS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) { int err; @@ -3758,8 +3706,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_rifs_register; + err = mlxsw_sp_resources_port_range_register(mlxsw_core); + if (err) + goto err_resources_port_range_register; + return 0; +err_resources_port_range_register: err_resources_rifs_register: err_resources_rif_mac_profile_register: err_policer_resources_register: @@ -3797,8 +3750,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core) if (err) goto err_resources_rifs_register; + err = mlxsw_sp_resources_port_range_register(mlxsw_core); + if (err) + goto err_resources_port_range_register; + return 0; +err_resources_port_range_register: err_resources_rifs_register: err_resources_rif_mac_profile_register: err_policer_resources_register: @@ -3900,11 +3858,9 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .sdq_supports_cqe_v2 = false, }; @@ -3938,10 +3894,8 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .sdq_supports_cqe_v2 = true, }; @@ -3975,10 +3929,8 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .sdq_supports_cqe_v2 = true, }; @@ -4010,10 +3962,8 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp4_config_profile, .sdq_supports_cqe_v2 = true, }; @@ -4073,23 +4023,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) return (struct mlxsw_sp_port *)priv.data; } -struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) -{ - struct mlxsw_sp_port *mlxsw_sp_port; - - rcu_read_lock(); - mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); - if (mlxsw_sp_port) - dev_hold(mlxsw_sp_port->dev); - rcu_read_unlock(); - return mlxsw_sp_port; -} - -void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) -{ - dev_put(mlxsw_sp_port->dev); -} - int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp) { char mprs_pl[MLXSW_REG_MPRS_LEN]; @@ -4172,19 +4105,48 @@ mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port, } } -static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +static struct mlxsw_sp_lag * +mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, + struct netlink_ext_ack *extack) { char sldr_pl[MLXSW_REG_SLDR_LEN]; + struct mlxsw_sp_lag *lag; + u16 lag_id; + int i, err; + for (i = 0; i < mlxsw_sp->max_lag; i++) { + if (!mlxsw_sp->lags[i].dev) + break; + } + + if (i == mlxsw_sp->max_lag) { + NL_SET_ERR_MSG_MOD(extack, + "Exceeded number of supported LAG devices"); + return ERR_PTR(-EBUSY); + } + + lag_id = i; mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); + if (err) + return ERR_PTR(err); + + lag = &mlxsw_sp->lags[lag_id]; + lag->lag_id = lag_id; + lag->dev = lag_dev; + refcount_set(&lag->ref_count, 1); + + return lag; } -static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +static int +mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lag *lag) { char sldr_pl[MLXSW_REG_SLDR_LEN]; - mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + lag->dev = NULL; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag->lag_id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); } @@ -4232,34 +4194,44 @@ static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); } -static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *lag_dev, - u16 *p_lag_id) +static struct mlxsw_sp_lag * +mlxsw_sp_lag_find(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev) { - struct mlxsw_sp_upper *lag; - int free_lag_id = -1; - u16 max_lag; - int err, i; + int i; - err = mlxsw_core_max_lag(mlxsw_sp->core, &max_lag); - if (err) - return err; + for (i = 0; i < mlxsw_sp->max_lag; i++) { + if (!mlxsw_sp->lags[i].dev) + continue; - for (i = 0; i < max_lag; i++) { - lag = mlxsw_sp_lag_get(mlxsw_sp, i); - if (lag->ref_count) { - if (lag->dev == lag_dev) { - *p_lag_id = i; - return 0; - } - } else if (free_lag_id < 0) { - free_lag_id = i; - } + if (mlxsw_sp->lags[i].dev == lag_dev) + return &mlxsw_sp->lags[i]; } - if (free_lag_id < 0) - return -EBUSY; - *p_lag_id = free_lag_id; - return 0; + + return NULL; +} + +static struct mlxsw_sp_lag * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_lag *lag; + + lag = mlxsw_sp_lag_find(mlxsw_sp, lag_dev); + if (lag) { + refcount_inc(&lag->ref_count); + return lag; + } + + return mlxsw_sp_lag_create(mlxsw_sp, lag_dev, extack); +} + +static void +mlxsw_sp_lag_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lag *lag) +{ + if (!refcount_dec_and_test(&lag->ref_count)) + return; + + mlxsw_sp_lag_destroy(mlxsw_sp, lag); } static bool @@ -4268,12 +4240,6 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, struct netdev_lag_upper_info *lag_upper_info, struct netlink_ext_ack *extack) { - u16 lag_id; - - if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { - NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices"); - return false; - } if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); return false; @@ -4298,30 +4264,112 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, return -EBUSY; } +static int mlxsw_sp_lag_uppers_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *upper_dev; + struct net_device *master; + struct list_head *iter; + int done = 0; + int err; + + master = netdev_master_upper_dev_get(lag_dev); + if (master && netif_is_bridge_master(master)) { + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lag_dev, master, + extack); + if (err) + return err; + } + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + master = netdev_master_upper_dev_get(upper_dev); + if (master && netif_is_bridge_master(master)) { + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + upper_dev, master, + extack); + if (err) + goto err_port_bridge_join; + } + + ++done; + } + + return 0; + +err_port_bridge_join: + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + master = netdev_master_upper_dev_get(upper_dev); + if (!master || !netif_is_bridge_master(master)) + continue; + + if (!done--) + break; + + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, master); + } + + master = netdev_master_upper_dev_get(lag_dev); + if (master && netif_is_bridge_master(master)) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, master); + + return err; +} + +static void +mlxsw_sp_lag_uppers_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct net_device *upper_dev; + struct net_device *master; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!is_vlan_dev(upper_dev)) + continue; + + master = netdev_master_upper_dev_get(upper_dev); + if (!master) + continue; + + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, master); + } + + master = netdev_master_upper_dev_get(lag_dev); + if (master) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, master); +} + static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_upper *lag; + struct mlxsw_sp_lag *lag; u16 lag_id; u8 port_index; int err; - err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); - if (err) - return err; - lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); - if (!lag->ref_count) { - err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); - if (err) - return err; - lag->dev = lag_dev; - } + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_dev, extack); + if (IS_ERR(lag)) + return PTR_ERR(lag); + lag_id = lag->lag_id; err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); if (err) return err; + + err = mlxsw_sp_lag_uppers_bridge_join(mlxsw_sp_port, lag_dev, + extack); + if (err) + goto err_lag_uppers_bridge_join; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; @@ -4330,29 +4378,40 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->local_port); mlxsw_sp_port->lag_id = lag_id; mlxsw_sp_port->lagged = 1; - lag->ref_count++; + + err = mlxsw_sp_fid_port_join_lag(mlxsw_sp_port); + if (err) + goto err_fid_port_join_lag; /* Port is no longer usable as a router interface */ if (mlxsw_sp_port->default_vlan->fid) mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); /* Join a router interface configured on the LAG, if exists */ - err = mlxsw_sp_port_vlan_router_join(mlxsw_sp_port->default_vlan, - lag_dev, extack); + err = mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, + extack); if (err) goto err_router_join; + err = mlxsw_sp_netdevice_enslavement_replay(mlxsw_sp, lag_dev, extack); + if (err) + goto err_replay; + return 0; +err_replay: + mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev); err_router_join: - lag->ref_count--; + mlxsw_sp_fid_port_leave_lag(mlxsw_sp_port); +err_fid_port_join_lag: mlxsw_sp_port->lagged = 0; mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, mlxsw_sp_port->local_port); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: - if (!lag->ref_count) - mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + mlxsw_sp_lag_uppers_bridge_leave(mlxsw_sp_port, lag_dev); +err_lag_uppers_bridge_join: + mlxsw_sp_lag_put(mlxsw_sp, lag); return err; } @@ -4361,12 +4420,11 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u16 lag_id = mlxsw_sp_port->lag_id; - struct mlxsw_sp_upper *lag; + struct mlxsw_sp_lag *lag; if (!mlxsw_sp_port->lagged) return; - lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); - WARN_ON(lag->ref_count == 0); + lag = &mlxsw_sp->lags[lag_id]; mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); @@ -4378,13 +4436,13 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, */ mlxsw_sp_port_lag_uppers_cleanup(mlxsw_sp_port, lag_dev); - if (lag->ref_count == 1) - mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + mlxsw_sp_fid_port_leave_lag(mlxsw_sp_port); + + mlxsw_sp_lag_put(mlxsw_sp, lag); mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, mlxsw_sp_port->local_port); mlxsw_sp_port->lagged = 0; - lag->ref_count--; /* Make sure untagged frames are allowed to ingress */ mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID, @@ -4600,9 +4658,62 @@ static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, return true; } +static bool mlxsw_sp_netdev_is_master(struct net_device *upper_dev, + struct net_device *dev) +{ + return upper_dev == netdev_master_upper_dev_get(dev); +} + +static int __mlxsw_sp_netdevice_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, void *ptr, + bool process_foreign); + +static int mlxsw_sp_netdevice_validate_uppers(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct net_device *upper_dev; + struct list_head *iter; + int err; + + netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) { + struct netdev_notifier_changeupper_info info = { + .info = { + .dev = dev, + .extack = extack, + }, + .master = mlxsw_sp_netdev_is_master(upper_dev, dev), + .upper_dev = upper_dev, + .linking = true, + + /* upper_info is relevant for LAG devices. But we would + * only need this if LAG were a valid upper above + * another upper (e.g. a bridge that is a member of a + * LAG), and that is never a valid configuration. So we + * can keep this as NULL. + */ + .upper_info = NULL, + }; + + err = __mlxsw_sp_netdevice_event(mlxsw_sp, + NETDEV_PRECHANGEUPPER, + &info, true); + if (err) + return err; + + err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, upper_dev, + extack); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, struct net_device *dev, - unsigned long event, void *ptr) + unsigned long event, void *ptr, + bool replay_deslavement) { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; @@ -4640,8 +4751,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); - return -EINVAL; + err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, + upper_dev, + extack); + if (err) + return err; } if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, @@ -4656,11 +4770,6 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port"); return -EINVAL; } - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, lower_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN"); return -EINVAL; @@ -4707,15 +4816,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lower_dev, upper_dev, extack); - else + } else { mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lower_dev, upper_dev); + if (!replay_deslavement) + break; + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, + lower_dev); + } } else if (netif_is_lag_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_lag_join(mlxsw_sp_port, @@ -4724,6 +4838,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, + dev); } } else if (netif_is_ovs_master(upper_dev)) { if (info->linking) @@ -4776,13 +4892,15 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev, struct net_device *port_dev, - unsigned long event, void *ptr) + unsigned long event, void *ptr, + bool replay_deslavement) { switch (event) { case NETDEV_PRECHANGEUPPER: case NETDEV_CHANGEUPPER: return mlxsw_sp_netdevice_port_upper_event(lower_dev, port_dev, - event, ptr); + event, ptr, + replay_deslavement); case NETDEV_CHANGELOWERSTATE: return mlxsw_sp_netdevice_port_lower_event(port_dev, event, ptr); @@ -4791,6 +4909,30 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev, return 0; } +/* Called for LAG or its upper VLAN after the per-LAG-lower processing was done, + * to do any per-LAG / per-LAG-upper processing. + */ +static int mlxsw_sp_netdevice_post_lag_event(struct net_device *dev, + unsigned long event, + void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(dev); + struct netdev_notifier_changeupper_info *info = ptr; + + if (!mlxsw_sp) + return 0; + + switch (event) { + case NETDEV_CHANGEUPPER: + if (info->linking) + break; + if (netif_is_bridge_master(info->upper_dev)) + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, dev); + break; + } + return 0; +} + static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, unsigned long event, void *ptr) { @@ -4801,19 +4943,19 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, netdev_for_each_lower_dev(lag_dev, dev, iter) { if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_port_event(lag_dev, dev, event, - ptr); + ptr, false); if (ret) return ret; } } - return 0; + return mlxsw_sp_netdevice_post_lag_event(lag_dev, event, ptr); } static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, struct net_device *dev, unsigned long event, void *ptr, - u16 vid) + u16 vid, bool replay_deslavement) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -4844,27 +4986,30 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); - return -EINVAL; - } - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; + err = mlxsw_sp_netdevice_validate_uppers(mlxsw_sp, + upper_dev, + extack); + if (err) + return err; } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (netif_is_bridge_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, vlan_dev, upper_dev, extack); - else + } else { mlxsw_sp_port_bridge_leave(mlxsw_sp_port, vlan_dev, upper_dev); + if (!replay_deslavement) + break; + mlxsw_sp_netdevice_deslavement_replay(mlxsw_sp, + vlan_dev); + } } else if (netif_is_macvlan(upper_dev)) { if (!info->linking) mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); @@ -4888,26 +5033,26 @@ static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev, if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_port_vlan_event(vlan_dev, dev, event, ptr, - vid); + vid, false); if (ret) return ret; } } - return 0; + return mlxsw_sp_netdevice_post_lag_event(vlan_dev, event, ptr); } -static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, +static int mlxsw_sp_netdevice_bridge_vlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev, struct net_device *br_dev, unsigned long event, void *ptr, - u16 vid) + u16 vid, bool process_foreign) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; - if (!mlxsw_sp) + if (!process_foreign && !mlxsw_sp_lower_get(vlan_dev)) return 0; extack = netdev_notifier_info_to_extack(&info->info); @@ -4920,13 +5065,6 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EOPNOTSUPP; } - if (!info->linking) - break; - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, vlan_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4940,36 +5078,42 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, return 0; } -static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, - unsigned long event, void *ptr) +static int mlxsw_sp_netdevice_vlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev, + unsigned long event, void *ptr, + bool process_foreign) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); if (mlxsw_sp_port_dev_check(real_dev)) return mlxsw_sp_netdevice_port_vlan_event(vlan_dev, real_dev, - event, ptr, vid); + event, ptr, vid, + true); else if (netif_is_lag_master(real_dev)) return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev, real_dev, event, ptr, vid); else if (netif_is_bridge_master(real_dev)) - return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, real_dev, - event, ptr, vid); + return mlxsw_sp_netdevice_bridge_vlan_event(mlxsw_sp, vlan_dev, + real_dev, event, + ptr, vid, + process_foreign); return 0; } -static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, - unsigned long event, void *ptr) +static int mlxsw_sp_netdevice_bridge_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev, + unsigned long event, void *ptr, + bool process_foreign) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev); struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; u16 proto; - if (!mlxsw_sp) + if (!process_foreign && !mlxsw_sp_lower_get(br_dev)) return 0; extack = netdev_notifier_info_to_extack(&info->info); @@ -4997,11 +5141,6 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, NL_SET_ERR_MSG_MOD(extack, "VLAN uppers are only supported with 802.1q VLAN protocol"); return -EOPNOTSUPP; } - if (netif_is_macvlan(upper_dev) && - !mlxsw_sp_rif_exists(mlxsw_sp, br_dev)) { - NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); - return -EOPNOTSUPP; - } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -5062,25 +5201,13 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) return -EOPNOTSUPP; - if (cu_info->linking) { - if (!netif_running(dev)) - return 0; - /* When the bridge is VLAN-aware, the VNI of the VxLAN - * device needs to be mapped to a VLAN, but at this - * point no VLANs are configured on the VxLAN device - */ - if (br_vlan_enabled(upper_dev)) - return 0; + if (!netif_running(dev)) + return 0; + if (cu_info->linking) return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, 0, extack); - } else { - /* VLANs were already flushed, which triggered the - * necessary cleanup - */ - if (br_vlan_enabled(upper_dev)) - return 0; + else mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); - } break; case NETDEV_PRE_UP: upper_dev = netdev_master_upper_dev_get(dev); @@ -5107,45 +5234,50 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) +static int __mlxsw_sp_netdevice_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, void *ptr, + bool process_foreign) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mlxsw_sp_span_entry *span_entry; - struct mlxsw_sp *mlxsw_sp; int err = 0; - mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); if (event == NETDEV_UNREGISTER) { span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev); if (span_entry) mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry); } - mlxsw_sp_span_respin(mlxsw_sp); if (netif_is_vxlan(dev)) err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) - err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); + err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr, true); else if (netif_is_lag_master(dev)) err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); else if (is_vlan_dev(dev)) - err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); + err = mlxsw_sp_netdevice_vlan_event(mlxsw_sp, dev, event, ptr, + process_foreign); else if (netif_is_bridge_master(dev)) - err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); + err = mlxsw_sp_netdevice_bridge_event(mlxsw_sp, dev, event, ptr, + process_foreign); else if (netif_is_macvlan(dev)) err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr); - return notifier_from_errno(err); + return err; } -static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { - .notifier_call = mlxsw_sp_inetaddr_valid_event, -}; +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp; + int err; -static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { - .notifier_call = mlxsw_sp_inet6addr_valid_event, -}; + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + mlxsw_sp_span_respin(mlxsw_sp); + err = __mlxsw_sp_netdevice_event(mlxsw_sp, event, ptr, false); + + return notifier_from_errno(err); +} static const struct pci_device_id mlxsw_sp1_pci_id_table[] = { {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, @@ -5191,12 +5323,9 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); - register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - err = mlxsw_core_driver_register(&mlxsw_sp1_driver); if (err) - goto err_sp1_core_driver_register; + return err; err = mlxsw_core_driver_register(&mlxsw_sp2_driver); if (err) @@ -5242,9 +5371,6 @@ err_sp3_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp2_driver); err_sp2_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp1_driver); -err_sp1_core_driver_register: - unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); return err; } @@ -5258,8 +5384,6 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_core_driver_unregister(&mlxsw_sp3_driver); mlxsw_core_driver_unregister(&mlxsw_sp2_driver); mlxsw_core_driver_unregister(&mlxsw_sp1_driver); - unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); } module_init(mlxsw_sp_module_init); |
