diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
46 files changed, 1788 insertions, 624 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 09dd3776db76..85fe17e4dcfb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -146,16 +146,25 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (err) goto free_eq; - cq->mcq.comp = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->type != RX) + switch (cq->type) { + case TX: + cq->mcq.comp = mlx4_en_tx_irq; netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, NAPI_POLL_WEIGHT); - else + napi_enable(&cq->napi); + break; + case RX: + cq->mcq.comp = mlx4_en_rx_irq; netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - - napi_enable(&cq->napi); + napi_enable(&cq->napi); + break; + case TX_XDP: + /* nothing regarding napi, it's shared with rx ring */ + cq->xdp_busy = false; + break; + } return 0; @@ -184,8 +193,10 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { - napi_disable(&cq->napi); - netif_napi_del(&cq->napi); + if (cq->type != TX_XDP) { + napi_disable(&cq->napi); + netif_napi_del(&cq->napi); + } mlx4_cq_free(priv->mdev->dev, &cq->mcq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index d94f981eafc4..56cdf38d150e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -125,9 +125,9 @@ void mlx4_en_update_loopback_state(struct net_device *dev, priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; mutex_lock(&priv->mdev->state_lock); - if (priv->mdev->dev->caps.flags2 & - MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB && - priv->rss_map.indir_qp.qpn) { + if ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB) && + priv->rss_map.indir_qp && priv->rss_map.indir_qp->qpn) { int i; int err = 0; int loopback = !!(features & NETIF_F_LOOPBACK); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c1de75fc399a..18252a79a074 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -596,6 +596,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return err; } + en_info(priv, "Steering Mode %d\n", dev->caps.steering_mode); + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; @@ -1010,7 +1012,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, - &priv->rss_map.indir_qp, + priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); @@ -1032,7 +1034,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, - &priv->rss_map.indir_qp, + priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, @@ -1677,13 +1679,15 @@ int mlx4_en_start_port(struct net_device *dev) if (t != TX_XDP) { tx_ring->tx_queue = netdev_get_tx_queue(dev, i); tx_ring->recycle_ring = NULL; + + /* Arm CQ for TX completions */ + mlx4_en_arm_cq(priv, cq); + } else { mlx4_en_init_recycle_ring(priv, i); + /* XDP TX CQ should never be armed */ } - /* Arm CQ for TX completions */ - mlx4_en_arm_cq(priv, cq); - /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *)(tx_ring->buf + j)) = 0xffffffff; @@ -1742,7 +1746,7 @@ int mlx4_en_start_port(struct net_device *dev) /* Attach rx QP to bradcast address */ eth_broadcast_addr(&mc_list[10]); mc_list[5] = priv->port; /* needed for B0 steering support */ - if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, + if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); @@ -1866,12 +1870,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Detach All multicasts */ eth_broadcast_addr(&mc_list[10]); mc_list[5] = priv->port; /* needed for B0 steering support */ - mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, + mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(mclist, &priv->curr_list, list) { memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; - mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, + mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); if (mclist->tunnel_reg_id) mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); @@ -2821,11 +2825,25 @@ out: return err; } -static bool mlx4_xdp_attached(struct net_device *dev) +static u32 mlx4_xdp_query(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + const struct bpf_prog *xdp_prog; + u32 prog_id = 0; + + if (!priv->tx_ring_num[TX_XDP]) + return prog_id; + + mutex_lock(&mdev->state_lock); + xdp_prog = rcu_dereference_protected( + priv->rx_ring[0]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + if (xdp_prog) + prog_id = xdp_prog->aux->id; + mutex_unlock(&mdev->state_lock); - return !!priv->tx_ring_num[TX_XDP]; + return prog_id; } static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -2834,7 +2852,8 @@ static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) case XDP_SETUP_PROG: return mlx4_xdp_set(dev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = mlx4_xdp_attached(dev); + xdp->prog_id = mlx4_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 77abd1813047..e5fb89505a13 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -134,10 +134,11 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index, gfp_t gfp) { - struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); + struct mlx4_en_rx_desc *rx_desc = ring->buf + + (index << ring->log_stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (ring->page_cache.index > 0) { + if (likely(ring->page_cache.index > 0)) { /* XDP uses a single page per frame */ if (!frags->page) { ring->page_cache.index--; @@ -178,6 +179,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, } } +/* Function not in fast-path */ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; @@ -539,14 +541,14 @@ static void validate_loopback(struct mlx4_en_priv *priv, void *va) priv->loopback_ok = 1; } -static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, +static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { u32 missing = ring->actual_size - (ring->prod - ring->cons); /* Try to batch allocations, but not too much. */ if (missing < 8) - return false; + return; do { if (mlx4_en_prepare_rx_desc(priv, ring, ring->prod & ring->size_mask, @@ -554,9 +556,9 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, __GFP_MEMALLOC)) break; ring->prod++; - } while (--missing); + } while (likely(--missing)); - return true; + mlx4_en_update_rx_prod_db(ring); } /* When hardware doesn't strip the vlan, we need to calculate the checksum @@ -637,21 +639,14 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_cqe *cqe; - struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; - struct mlx4_en_rx_alloc *frags; + int factor = priv->cqe_factor; + struct mlx4_en_rx_ring *ring; struct bpf_prog *xdp_prog; - int doorbell_pending; - struct sk_buff *skb; - int index; - int nr; - unsigned int length; + int cq_ring = cq->ring; + bool doorbell_pending; + struct mlx4_cqe *cqe; int polled = 0; - int ip_summed; - int factor = priv->cqe_factor; - u64 timestamp; - bool l2_tunnel; + int index; if (unlikely(!priv->port_up)) return 0; @@ -659,6 +654,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (unlikely(budget <= 0)) return polled; + ring = priv->rx_ring[cq_ring]; + /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ rcu_read_lock(); xdp_prog = rcu_dereference(ring->xdp_prog); @@ -673,10 +670,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { + struct mlx4_en_rx_alloc *frags; + enum pkt_hash_types hash_type; + struct sk_buff *skb; + unsigned int length; + int ip_summed; void *va; + int nr; frags = ring->rx_info + (index << priv->log_rx_info); va = page_address(frags[0].page) + frags[0].page_offset; + prefetchw(va); /* * make sure we read the CQE after we read the ownership bit */ @@ -768,7 +772,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud break; case XDP_TX: if (likely(!mlx4_en_xmit_frame(ring, frags, dev, - length, cq->ring, + length, cq_ring, &doorbell_pending))) { frags[0].page = NULL; goto next; @@ -790,24 +794,27 @@ xdp_drop_no_cnt: ring->packets++; skb = napi_get_frags(&cq->napi); - if (!skb) + if (unlikely(!skb)) goto next; if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), + u64 timestamp = mlx4_en_get_cqe_ts(cqe); + + mlx4_en_fill_hwtstamps(priv->mdev, skb_hwtstamps(skb), timestamp); } - skb_record_rx_queue(skb, cq->ring); + skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && cqe->checksum == cpu_to_be16(0xffff)) { - ip_summed = CHECKSUM_UNNECESSARY; - l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + bool l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + + ip_summed = CHECKSUM_UNNECESSARY; + hash_type = PKT_HASH_TYPE_L4; if (l2_tunnel) skb->csum_level = 1; ring->csum_ok++; @@ -822,6 +829,7 @@ xdp_drop_no_cnt: goto csum_none; } else { ip_summed = CHECKSUM_COMPLETE; + hash_type = PKT_HASH_TYPE_L3; ring->csum_complete++; } } else { @@ -831,16 +839,14 @@ xdp_drop_no_cnt: } else { csum_none: ip_summed = CHECKSUM_NONE; + hash_type = PKT_HASH_TYPE_L3; ring->csum_none++; } skb->ip_summed = ip_summed; if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), - (ip_summed == CHECKSUM_UNNECESSARY) ? - PKT_HASH_TYPE_L4 : - PKT_HASH_TYPE_L3); - + hash_type); if ((cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && @@ -867,15 +873,17 @@ next: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; - if (++polled == budget) + if (unlikely(++polled == budget)) break; } rcu_read_unlock(); - if (polled) { - if (doorbell_pending) - mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]); + if (likely(polled)) { + if (doorbell_pending) { + priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true; + mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq_ring]); + } mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -883,8 +891,7 @@ next: } AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); - if (mlx4_en_refill_rx_buffers(priv, ring)) - mlx4_en_update_rx_prod_db(ring); + mlx4_en_refill_rx_buffers(priv, ring); return polled; } @@ -907,16 +914,30 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_cq *xdp_tx_cq = NULL; + bool clean_complete = true; int done; + if (priv->tx_ring_num[TX_XDP]) { + xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring]; + if (xdp_tx_cq->xdp_busy) { + clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq, + budget); + xdp_tx_cq->xdp_busy = !clean_complete; + } + } + done = mlx4_en_process_rx_cq(dev, cq, budget); /* If we used up all the quota - we're probably not done yet... */ - if (done == budget) { + if (done == budget || !clean_complete) { const struct cpumask *aff; struct irq_data *idata; int cpu_curr; + /* in case we got here because of !clean_complete */ + done = budget; + INC_PERF_COUNTER(priv->pstats.napi_quota); cpu_curr = smp_processor_id(); @@ -936,7 +957,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) done--; } /* Done for now */ - if (napi_complete_done(napi, done)) + if (likely(napi_complete_done(napi, done))) mlx4_en_arm_cq(priv, cq); return done; } @@ -1099,11 +1120,14 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) int i, qpn; int err = 0; int good_qps = 0; + u8 flags; en_dbg(DRV, priv, "Configuring rss steering\n"); + + flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn, 0); + &rss_map->base_qpn, flags); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; @@ -1120,13 +1144,28 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) ++good_qps; } + if (priv->rx_ring_num == 1) { + rss_map->indir_qp = &rss_map->qps[0]; + priv->base_qpn = rss_map->indir_qp->qpn; + en_info(priv, "Optimized Non-RSS steering\n"); + return 0; + } + + rss_map->indir_qp = kzalloc(sizeof(*rss_map->indir_qp), GFP_KERNEL); + if (!rss_map->indir_qp) { + err = -ENOMEM; + goto rss_err; + } + /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp, + GFP_KERNEL); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; } - rss_map->indir_qp.event = mlx4_en_sqp_event; + + rss_map->indir_qp->event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0]->cqn, -1, &context); @@ -1164,8 +1203,9 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) err = -EINVAL; goto indir_err; } + err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, - &rss_map->indir_qp, &rss_map->indir_state); + rss_map->indir_qp, &rss_map->indir_state); if (err) goto indir_err; @@ -1173,9 +1213,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) indir_err: mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, - MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); - mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); - mlx4_qp_free(mdev->dev, &rss_map->indir_qp); + MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, rss_map->indir_qp); + mlx4_qp_free(mdev->dev, rss_map->indir_qp); + kfree(rss_map->indir_qp); + rss_map->indir_qp = NULL; rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], @@ -1193,10 +1235,15 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) struct mlx4_en_rss_map *rss_map = &priv->rss_map; int i; - mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, - MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); - mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); - mlx4_qp_free(mdev->dev, &rss_map->indir_qp); + if (priv->rx_ring_num > 1) { + mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, + MLX4_QP_STATE_RST, NULL, 0, 0, + rss_map->indir_qp); + mlx4_qp_remove(mdev->dev, rss_map->indir_qp); + mlx4_qp_free(mdev->dev, rss_map->indir_qp); + kfree(rss_map->indir_qp); + rss_map->indir_qp = NULL; + } for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 17112faafbcc..88699b181946 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -63,8 +63,8 @@ static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) skb_reserve(skb, NET_IP_ALIGN); - ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); - packet = (unsigned char *)skb_put(skb, packet_size); + ethh = skb_put(skb, sizeof(struct ethhdr)); + packet = skb_put(skb, packet_size); memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); eth_zero_addr(ethh->h_source); ethh->h_proto = htons(ETH_P_ARP); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 6ffd1849a604..7d69d939ee2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -234,23 +234,24 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, u8 owner) { __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); - struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; void *end = ring->buf + ring->buf_size; __be32 *ptr = (__be32 *)tx_desc; int i; /* Optimize the common case when there are no wraparounds */ - if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; } } else { /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; @@ -265,11 +266,11 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; - struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE); struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; void *end = ring->buf + ring->buf_size; struct sk_buff *skb = tx_info->skb; @@ -288,19 +289,20 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, skb_tstamp_tx(skb, &hwts); } - /* Optimize the common case when there are no wraparounds */ - if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { - if (!tx_info->inl) { - if (tx_info->linear) - dma_unmap_single(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); - else - dma_unmap_page(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); + if (!tx_info->inl) { + if (tx_info->linear) + dma_unmap_single(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + PCI_DMA_TODEVICE); + else + dma_unmap_page(priv->ddev, + tx_info->map0_dma, + tx_info->map0_byte_count, + PCI_DMA_TODEVICE); + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + + (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) { for (i = 1; i < nr_maps; i++) { data++; dma_unmap_page(priv->ddev, @@ -308,23 +310,10 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); } - } - } else { - if (!tx_info->inl) { - if ((void *) data >= end) { + } else { + if ((void *)data >= end) data = ring->buf + ((void *)data - end); - } - if (tx_info->linear) - dma_unmap_single(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); - else - dma_unmap_page(priv->ddev, - tx_info->map0_dma, - tx_info->map0_byte_count, - PCI_DMA_TODEVICE); for (i = 1; i < nr_maps; i++) { data++; /* Check for wraparound before unmapping */ @@ -344,7 +333,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; @@ -381,8 +370,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) while (ring->cons != ring->prod) { ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, - !!(ring->cons & ring->size), 0, - 0 /* Non-NAPI caller */); + 0, 0 /* Non-NAPI caller */); ring->cons += ring->last_nr_txbb; cnt++; } @@ -396,15 +384,14 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -static bool mlx4_en_process_tx_cq(struct net_device *dev, - struct mlx4_en_cq *cq, int napi_budget) +bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; struct mlx4_cqe *cqe; - u16 index; - u16 new_index, ring_index, stamp_index; + u16 index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; @@ -419,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, u32 last_nr_txbb; u32 ring_cons; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return true; netdev_txq_bql_complete_prefetchw(ring->tx_queue); @@ -434,6 +421,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size) && (done < budget)) { + u16 new_index; + /* * make sure we read the CQE after we read the * ownership bit @@ -464,8 +453,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, /* free next descriptor */ last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, - !!((ring_cons + txbbs_skipped) & - ring->size), timestamp, napi_budget); + timestamp, napi_budget); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring_cons + txbbs_stamp) & @@ -481,7 +469,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; } - /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. @@ -494,7 +481,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; - if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + if (cq->type == TX_XDP) return done < budget; netdev_tx_completed_queue(ring->tx_queue, packets, bytes); @@ -506,6 +493,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } + return done < budget; } @@ -526,7 +514,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); - int clean_complete; + bool clean_complete; clean_complete = mlx4_en_process_tx_cq(dev, cq, budget); if (!clean_complete) @@ -543,7 +531,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, u32 index, unsigned int desc_size) { - u32 copy = (ring->size - index) * TXBB_SIZE; + u32 copy = (ring->size - index) << LOG_TXBB_SIZE; int i; for (i = desc_size - copy - 4; i >= 0; i -= 4) { @@ -558,12 +546,12 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, if ((i & (TXBB_SIZE - 1)) == 0) wmb(); - *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = + *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) = *((u32 *) (ring->bounce_buf + i)); } /* Return real descriptor location */ - return ring->buf + index * TXBB_SIZE; + return ring->buf + (index << LOG_TXBB_SIZE); } /* Decide if skb can be inlined in tx descriptor to avoid dma mapping @@ -775,37 +763,101 @@ static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, } } +static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv, + struct skb_shared_info *shinfo, + struct mlx4_wqe_data_seg *data, + struct sk_buff *skb, + int lso_header_size, + __be32 mr_key, + struct mlx4_en_tx_info *tx_info) +{ + struct device *ddev = priv->ddev; + dma_addr_t dma = 0; + u32 byte_count = 0; + int i_frag; + + /* Map fragments if any */ + for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { + const struct skb_frag_struct *frag; + + frag = &shinfo->frags[i_frag]; + byte_count = skb_frag_size(frag); + dma = skb_frag_dma_map(ddev, frag, + 0, byte_count, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + --data; + } + + /* Map linear part if needed */ + if (tx_info->linear) { + byte_count = skb_headlen(skb) - lso_header_size; + + dma = dma_map_single(ddev, skb->data + + lso_header_size, byte_count, + PCI_DMA_TODEVICE); + if (dma_mapping_error(ddev, dma)) + goto tx_drop_unmap; + + data->addr = cpu_to_be64(dma); + data->lkey = mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(byte_count); + } + /* tx completion can avoid cache line miss for common cases */ + tx_info->map0_dma = dma; + tx_info->map0_byte_count = byte_count; + + return true; + +tx_drop_unmap: + en_err(priv, "DMA mapping error\n"); + + while (++i_frag < shinfo->nr_frags) { + ++data; + dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + } + + return false; +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; - struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; - int tx_ind = 0; + int tx_ind; int nr_txbb; int desc_size; int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_proto = 0; - int i_frag; int lso_header_size; void *fragptr = NULL; bool bounce = false; bool send_doorbell; bool stop_queue; bool inline_ok; + u8 data_offset; u32 ring_cons; bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[TX][tx_ind]; - if (!priv->port_up) + if (unlikely(!priv->port_up)) goto tx_drop; /* fetch ring->cons far ahead before needing it to avoid stall */ @@ -818,7 +870,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); - nr_txbb = desc_size / TXBB_SIZE; + nr_txbb = desc_size >> LOG_TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); @@ -827,6 +879,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { + u16 vlan_proto; + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); if (vlan_proto == ETH_P_8021AD) @@ -851,7 +905,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ if (likely(index + nr_txbb <= ring->size)) - tx_desc = ring->buf + index * TXBB_SIZE; + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; @@ -863,64 +917,31 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->skb = skb; tx_info->nr_txbb = nr_txbb; - data = &tx_desc->data; - if (lso_header_size) - data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, - DS_SIZE)); + if (!lso_header_size) { + data = &tx_desc->data; + data_offset = offsetof(struct mlx4_en_tx_desc, data); + } else { + int lso_align = ALIGN(lso_header_size + 4, DS_SIZE); + + data = (void *)&tx_desc->lso + lso_align; + data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align; + } /* valid only for none inline segments */ - tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->data_offset = data_offset; tx_info->inl = inline_ok; - tx_info->linear = (lso_header_size < skb_headlen(skb) && - !inline_ok) ? 1 : 0; + tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok; tx_info->nr_maps = shinfo->nr_frags + tx_info->linear; data += tx_info->nr_maps - 1; - if (!tx_info->inl) { - dma_addr_t dma = 0; - u32 byte_count = 0; - - /* Map fragments if any */ - for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) { - const struct skb_frag_struct *frag; - - frag = &shinfo->frags[i_frag]; - byte_count = skb_frag_size(frag); - dma = skb_frag_dma_map(ddev, frag, - 0, byte_count, - DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) - goto tx_drop_unmap; - - data->addr = cpu_to_be64(dma); - data->lkey = ring->mr_key; - dma_wmb(); - data->byte_count = cpu_to_be32(byte_count); - --data; - } - - /* Map linear part if needed */ - if (tx_info->linear) { - byte_count = skb_headlen(skb) - lso_header_size; - - dma = dma_map_single(ddev, skb->data + - lso_header_size, byte_count, - PCI_DMA_TODEVICE); - if (dma_mapping_error(ddev, dma)) - goto tx_drop_unmap; - - data->addr = cpu_to_be64(dma); - data->lkey = ring->mr_key; - dma_wmb(); - data->byte_count = cpu_to_be32(byte_count); - } - /* tx completion can avoid cache line miss for common cases */ - tx_info->map0_dma = dma; - tx_info->map0_byte_count = byte_count; - } + if (!tx_info->inl) + if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb, + lso_header_size, ring->mr_key, + tx_info)) + goto tx_drop_count; /* * For timestamping add flag to skb_shinfo and @@ -1056,16 +1077,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) } return NETDEV_TX_OK; -tx_drop_unmap: - en_err(priv, "DMA mapping error\n"); - - while (++i_frag < shinfo->nr_frags) { - ++data; - dma_unmap_page(ddev, (dma_addr_t) be64_to_cpu(data->addr), - be32_to_cpu(data->byte_count), - PCI_DMA_TODEVICE); - } - tx_drop_count: ring->tx_dropped++; tx_drop: @@ -1073,52 +1084,41 @@ tx_drop: return NETDEV_TX_OK; } +#define MLX4_EN_XDP_TX_NRTXBB 1 +#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ + / 16) & 0x3f) + netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, - int tx_ind, int *doorbell_pending) + int tx_ind, bool *doorbell_pending) { struct mlx4_en_priv *priv = netdev_priv(dev); union mlx4_wqe_qpn_vlan qpn_vlan = {}; - struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; - struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; - int index, bf_index; - bool send_doorbell; - int nr_txbb = 1; - bool stop_queue; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_ring *ring; dma_addr_t dma; - int real_size; __be32 op_own; - u32 ring_cons; - bool bf_ok; + int index; - BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, - "mlx4_en_xmit_frame requires minimum size tx desc"); + if (unlikely(!priv->port_up)) + goto tx_drop; ring = priv->tx_ring[TX_XDP][tx_ind]; - if (!priv->port_up) - goto tx_drop; - - if (mlx4_en_is_tx_ring_full(ring)) + if (unlikely(mlx4_en_is_tx_ring_full(ring))) goto tx_drop_count; - /* fetch ring->cons far ahead before needing it to avoid stall */ - ring_cons = READ_ONCE(ring->cons); - index = ring->prod & ring->size_mask; tx_info = &ring->tx_info[index]; - bf_ok = ring->bf_enabled; - /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, - (u32)(ring->prod - ring_cons - 1)); + (u32)(ring->prod - READ_ONCE(ring->cons) - 1)); - bf_index = ring->prod; - tx_desc = ring->buf + index * TXBB_SIZE; + tx_desc = ring->buf + (index << LOG_TXBB_SIZE); data = &tx_desc->data; dma = frame->dma; @@ -1127,9 +1127,9 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, frame->page = NULL; tx_info->map0_dma = dma; tx_info->map0_byte_count = PAGE_SIZE; - tx_info->nr_txbb = nr_txbb; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); - tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); tx_info->ts_requested = 0; tx_info->nr_maps = 1; tx_info->linear = 1; @@ -1153,28 +1153,19 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, rx_ring->xdp_tx++; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); - ring->prod += nr_txbb; - - stop_queue = mlx4_en_is_tx_ring_full(ring); - send_doorbell = stop_queue || - *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; - bf_ok &= send_doorbell; + ring->prod += MLX4_EN_XDP_TX_NRTXBB; - real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; - if (bf_ok) - qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); - else - qpn_vlan.fence_size = real_size; - - mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, - op_own, bf_ok, send_doorbell); - *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0, + op_own, false, false); + *doorbell_pending = true; return NETDEV_TX_OK; tx_drop_count: rx_ring->xdp_tx_full++; + *doorbell_pending = true; tx_drop: return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index ccae3c6593c4..457e070bca46 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2356,8 +2356,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) MLX4_A0_STEERING_TABLE_SIZE; } - mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n", - dmfs_high_rate_steering_mode_str( + mlx4_info(dev, "DMFS high rate steer mode is: %s\n", + dmfs_high_rate_steering_mode_str( dev->caps.dmfs_high_steer_mode)); } } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 8c4f63946b14..963b77d51b48 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -72,7 +72,8 @@ #define DEF_RX_RINGS 16 #define MAX_RX_RINGS 128 #define MIN_RX_RINGS 4 -#define TXBB_SIZE 64 +#define LOG_TXBB_SIZE 6 +#define TXBB_SIZE BIT(LOG_TXBB_SIZE) #define HEADROOM (2048 / TXBB_SIZE + 1) #define STAMP_STRIDE 64 #define STAMP_DWORDS (STAMP_STRIDE / 4) @@ -115,13 +116,12 @@ #define MLX4_EN_MIN_TX_RING_P_UP 1 #define MLX4_EN_MAX_TX_RING_P_UP 32 #define MLX4_EN_NUM_UP 8 -#define MLX4_EN_DEF_TX_RING_SIZE 512 #define MLX4_EN_DEF_RX_RING_SIZE 1024 +#define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ MLX4_EN_NUM_UP) #define MLX4_EN_DEFAULT_TX_WORK 256 -#define MLX4_EN_DOORBELL_BUDGET 8 /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 @@ -276,7 +276,7 @@ struct mlx4_en_tx_ring { struct netdev_queue *tx_queue; u32 (*free_tx_desc)(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, + int index, u64 timestamp, int napi_mode); struct mlx4_en_rx_ring *recycle_ring; @@ -359,7 +359,10 @@ struct mlx4_en_cq { struct mlx4_hwq_resources wqres; int ring; struct net_device *dev; - struct napi_struct napi; + union { + struct napi_struct napi; + bool xdp_busy; + }; int size; int buf_size; int vector; @@ -431,7 +434,7 @@ struct mlx4_en_rss_map { int base_qpn; struct mlx4_qp qps[MAX_RX_RINGS]; enum mlx4_qp_state state[MAX_RX_RINGS]; - struct mlx4_qp indir_qp; + struct mlx4_qp *indir_qp; enum mlx4_qp_state indir_state; }; @@ -689,7 +692,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, struct net_device *dev, unsigned int length, - int tx_ind, int *doorbell_pending); + int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_alloc *frame); @@ -721,13 +724,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, int napi_budget); u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode); u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, + int index, u64 timestamp, int napi_mode); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 12556c03eec4..5ad093a21a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,4 +15,4 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o -mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib.o +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 66bd213f35ce..3c95f7f53802 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -274,7 +274,6 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) } EXPORT_SYMBOL_GPL(mlx5_db_free); - void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) { u64 addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 10d282841f5b..4d5bd01f1ebb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -217,7 +217,6 @@ static void free_cmd(struct mlx5_cmd_work_ent *ent) kfree(ent); } - static int verify_signature(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd_mailbox *next = ent->out->next; @@ -786,6 +785,8 @@ static void cmd_work_handler(struct work_struct *work) struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; + bool poll_cmd = ent->polling; + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); @@ -846,7 +847,7 @@ static void cmd_work_handler(struct work_struct *work) iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); mmiowb(); /* if not in polling don't use ent after this point */ - if (cmd->mode == CMD_MODE_POLLING) { + if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); @@ -874,7 +875,7 @@ static const char *deliv_status_to_str(u8 status) case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: return "command input length error"; case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: - return "command ouput length error"; + return "command output length error"; case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: return "reserved fields not cleared"; case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: @@ -890,7 +891,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) struct mlx5_cmd *cmd = &dev->cmd; int err; - if (cmd->mode == CMD_MODE_POLLING) { + if (cmd->mode == CMD_MODE_POLLING || ent->polling) { wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; @@ -918,7 +919,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, void *context, int page_queue, u8 *status, - u8 token) + u8 token, bool force_polling) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -936,6 +937,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, return PTR_ERR(ent); ent->token = token; + ent->polling = force_polling; if (!callback) init_completion(&ent->done); @@ -1001,7 +1003,6 @@ static ssize_t dbg_write(struct file *filp, const char __user *buf, return err ? err : count; } - static const struct file_operations fops = { .owner = THIS_MODULE, .open = simple_open, @@ -1153,7 +1154,7 @@ err_alloc: } static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, - struct mlx5_cmd_msg *msg) + struct mlx5_cmd_msg *msg) { struct mlx5_cmd_mailbox *head = msg->next; struct mlx5_cmd_mailbox *next; @@ -1537,7 +1538,8 @@ static int is_manage_pages(void *in) } static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, - int out_size, mlx5_cmd_cbk_t callback, void *context) + int out_size, mlx5_cmd_cbk_t callback, void *context, + bool force_polling) { struct mlx5_cmd_msg *inb; struct mlx5_cmd_msg *outb; @@ -1582,7 +1584,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status, token); + pages_queue, &status, token, force_polling); if (err) goto out_out; @@ -1610,7 +1612,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, { int err; - err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); return err ? : mlx5_cmd_check(dev, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); @@ -1619,10 +1621,22 @@ int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context); + return cmd_exec(dev, in, in_size, out, out_size, callback, context, + false); } EXPORT_SYMBOL(mlx5_cmd_exec_cb); +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err; + + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + + return err ? : mlx5_cmd_check(dev, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct cmd_msg_cache *ch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index de40b6cfee95..7ecadb501743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -168,7 +168,6 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count, return ret; } - static ssize_t average_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { @@ -466,7 +465,6 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, return -EINVAL; } - if (is_str) ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field); else @@ -562,7 +560,6 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) rem_res_tree(qp->dbg); } - int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f4b95dbd1c7f..eef0a50e2388 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -52,8 +52,10 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) -#define MLX5E_HW2SW_MTU(hwmtu) ((hwmtu) - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) -#define MLX5E_SW2HW_MTU(swmtu) ((swmtu) + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + +#define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu)) +#define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu)) #define MLX5E_MAX_NUM_TC 8 @@ -70,6 +72,8 @@ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 #define MLX5_RX_HEADROOM NET_SKB_PAD +#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ @@ -213,6 +217,7 @@ struct mlx5e_cq_moder { struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; + u16 rq_headroom; u8 mpwqe_log_stride_sz; u8 mpwqe_log_num_strides; u8 log_rq_size; @@ -443,6 +448,11 @@ struct mlx5e_dma_info { dma_addr_t addr; }; +struct mlx5e_wqe_frag_info { + struct mlx5e_dma_info di; + u32 offset; +}; + struct mlx5e_umr_dma_info { __be64 *mtt; dma_addr_t mtt_addr; @@ -458,13 +468,15 @@ struct mlx5e_mpw_info { struct mlx5e_rx_am_stats { int ppms; /* packets per msec */ + int bpms; /* bytes per msec */ int epms; /* events per msec */ }; struct mlx5e_rx_am_sample { - ktime_t time; - unsigned int pkt_ctr; - u16 event_ctr; + ktime_t time; + u32 pkt_ctr; + u32 byte_ctr; + u16 event_ctr; }; struct mlx5e_rx_am { /* Adaptive Moderation */ @@ -502,7 +514,12 @@ struct mlx5e_rq { struct mlx5_wq_ll wq; union { - struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_wqe_frag_info *frag_info; + u32 frag_sz; /* max possible skb frag_sz */ + bool page_reuse; + bool xdp_xmit; + } wqe; struct { struct mlx5e_mpw_info *info; void *mtt_no_align; @@ -745,6 +762,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; + int hard_mtu; struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; @@ -780,6 +798,7 @@ struct mlx5e_profile { void (*enable)(struct mlx5e_priv *priv); void (*disable)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); + void (*update_carrier)(struct mlx5e_priv *priv); int (*max_nch)(struct mlx5_core_dev *mdev); struct { mlx5e_fp_handle_rx_cqe handle_rx_cqe; @@ -820,7 +839,7 @@ void mlx5e_rx_am(struct mlx5e_rq *rq); void mlx5e_rx_am_work(struct work_struct *work); struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode); -void mlx5e_update_stats(struct mlx5e_priv *priv); +void mlx5e_update_stats(struct mlx5e_priv *priv, bool full); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); @@ -847,8 +866,8 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event); -int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); -int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, @@ -1019,6 +1038,31 @@ int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); +/* ethtool helpers */ +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo); +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, + uint32_t stringset, uint8_t *data); +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data); +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal); +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal); +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); + /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index f4017c06ddd2..12d3ced61114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -178,6 +178,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct mlx5_flow_destination dest; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; + enum mlx5e_traffic_types tt; int err = 0; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -187,24 +188,16 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - switch (type) { - case ARFS_IPV4_TCP: - dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; - break; - case ARFS_IPV4_UDP: - dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn; - break; - case ARFS_IPV6_TCP: - dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn; - break; - case ARFS_IPV6_UDP: - dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn; - break; - default: + tt = arfs_get_tt(type); + if (tt == -EINVAL) { + netdev_err(priv->netdev, "%s: bad arfs_type: %d\n", + __func__, type); err = -EINVAL; goto out; } + dest.tir_num = tir[tt].tirn; + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, &flow_act, &dest, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index e29494464cae..66f432385dbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -86,9 +86,8 @@ static void mlx5e_timestamp_overflow(struct work_struct *work) schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period); } -int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { - struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config config; int err; @@ -130,10 +129,10 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(dev, "Disabling cqe compression"); + netdev_warn(priv->netdev, "Disabling cqe compression"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { - netdev_err(dev, "Failed disabling cqe compression err=%d\n", err); + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); mutex_unlock(&priv->state_lock); return err; } @@ -151,9 +150,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) sizeof(config)) ? -EFAULT : 0; } -int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr) +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) { - struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 46e56ec4c26f..ece3fb147e3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -145,7 +145,6 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) int inlen; void *in; - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); if (!in) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b4514f247402..16b1e96a7050 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,10 +32,9 @@ #include "en.h" -static void mlx5e_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo) { - struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver)); @@ -49,6 +48,14 @@ static void mlx5e_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); +} + struct ptys2ethtool_config { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); @@ -135,6 +142,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) u8 pfc_en_rx; int err; + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); return err ? 0 : pfc_en_tx | pfc_en_rx; @@ -147,6 +157,9 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) u32 tx_pause; int err; + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); return err ? false : rx_pause | tx_pause; @@ -160,9 +173,8 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ NUM_PPORT_PER_PRIO_PFC_COUNTERS) -static int mlx5e_get_sset_count(struct net_device *dev, int sset) +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { - struct mlx5e_priv *priv = netdev_priv(dev); switch (sset) { case ETH_SS_STATS: @@ -186,6 +198,13 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) } } +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) { int i, j, tc, prio, idx = 0; @@ -273,10 +292,9 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) priv->channel_tc2txq[i][tc]); } -static void mlx5e_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, + uint32_t stringset, uint8_t *data) { - struct mlx5e_priv *priv = netdev_priv(dev); int i; switch (stringset) { @@ -297,10 +315,17 @@ static void mlx5e_get_strings(struct net_device *dev, } } -static void mlx5e_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) +static void mlx5e_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) { struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data) +{ struct mlx5e_channels *channels; struct mlx5_priv *mlx5_priv; int i, j, tc, prio, idx = 0; @@ -311,7 +336,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_update_stats(priv); + mlx5e_update_stats(priv, true); channels = &priv->channels; mutex_unlock(&priv->state_lock); @@ -395,6 +420,15 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, sq_stats_desc, j); } +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, int num_wqe) { @@ -439,10 +473,9 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, return 1 << (order_base_2(num_wqes)); } -static void mlx5e_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) { - struct mlx5e_priv *priv = netdev_priv(dev); int rq_wq_type = priv->channels.params.rq_wq_type; param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, @@ -453,10 +486,17 @@ static void mlx5e_get_ringparam(struct net_device *dev, param->tx_pending = 1 << priv->channels.params.log_sq_size; } -static int mlx5e_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) +{ int rq_wq_type = priv->channels.params.rq_wq_type; struct mlx5e_channels new_channels = {}; u32 rx_pending_wqes; @@ -468,12 +508,12 @@ static int mlx5e_set_ringparam(struct net_device *dev, int err = 0; if (param->rx_jumbo_pending) { - netdev_info(dev, "%s: rx_jumbo_pending not supported\n", + netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n", __func__); return -EINVAL; } if (param->rx_mini_pending) { - netdev_info(dev, "%s: rx_mini_pending not supported\n", + netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n", __func__); return -EINVAL; } @@ -486,13 +526,13 @@ static int mlx5e_set_ringparam(struct net_device *dev, param->rx_pending); if (param->rx_pending < min_rq_size) { - netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", + netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, min_rq_size); return -EINVAL; } if (param->rx_pending > max_rq_size) { - netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", + netdev_info(priv->netdev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, max_rq_size); return -EINVAL; @@ -501,19 +541,19 @@ static int mlx5e_set_ringparam(struct net_device *dev, num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { - netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", + netdev_info(priv->netdev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", __func__, param->rx_pending); return -EINVAL; } if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { - netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", + netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n", __func__, param->tx_pending, 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); return -EINVAL; } if (param->tx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE)) { - netdev_info(dev, "%s: tx_pending (%d) > max (%d)\n", + netdev_info(priv->netdev, "%s: tx_pending (%d) > max (%d)\n", __func__, param->tx_pending, 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE); return -EINVAL; @@ -549,26 +589,39 @@ unlock: return err; } -static void mlx5e_get_channels(struct net_device *dev, - struct ethtool_channels *ch) +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + return mlx5e_ethtool_set_ringparam(priv, param); +} + +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ ch->max_combined = priv->profile->max_nch(priv->mdev); ch->combined_count = priv->channels.params.num_channels; } -static int mlx5e_set_channels(struct net_device *dev, - struct ethtool_channels *ch) +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ unsigned int count = ch->combined_count; struct mlx5e_channels new_channels = {}; bool arfs_enabled; int err = 0; if (!count) { - netdev_info(dev, "%s: combined_count=0 not supported\n", + netdev_info(priv->netdev, "%s: combined_count=0 not supported\n", __func__); return -EINVAL; } @@ -593,7 +646,7 @@ static int mlx5e_set_channels(struct net_device *dev, if (err) goto out; - arfs_enabled = dev->features & NETIF_F_NTUPLE; + arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); @@ -603,7 +656,7 @@ static int mlx5e_set_channels(struct net_device *dev, if (arfs_enabled) { err = mlx5e_arfs_enable(priv); if (err) - netdev_err(dev, "%s: mlx5e_arfs_enable failed: %d\n", + netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", __func__, err); } @@ -613,11 +666,17 @@ out: return err; } -static int mlx5e_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_priv *priv = netdev_priv(dev); + return mlx5e_ethtool_set_channels(priv, ch); +} + +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal) +{ if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -630,6 +689,14 @@ static int mlx5e_get_coalesce(struct net_device *netdev, return 0; } +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + static void mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { @@ -653,10 +720,9 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc } } -static int mlx5e_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -699,6 +765,14 @@ out: return err; } +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + static void ptys2ethtool_supported_link(unsigned long *supported_modes, u32 eth_proto_cap) { @@ -1298,13 +1372,12 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, return err; } -static int mlx5e_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info) { - struct mlx5e_priv *priv = netdev_priv(dev); int ret; - ret = ethtool_op_get_ts_info(dev, info); + ret = ethtool_op_get_ts_info(priv->netdev, info); if (ret) return ret; @@ -1318,15 +1391,23 @@ static int mlx5e_get_ts_info(struct net_device *dev, SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; - info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) | - (BIT(1) << HWTSTAMP_TX_ON); + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); - info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) | - (BIT(1) << HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); return 0; } +static int mlx5e_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) { __u32 ret = 0; @@ -1714,6 +1795,40 @@ static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1734,6 +1849,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, + .flash_device = mlx5e_flash_device, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 7acc4fba7ece..dfccb5305e9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -170,7 +170,6 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: rule_p = &priv->fs.vlan.untagged_rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5afec0f4a658..9f99f624004f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -96,9 +96,12 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + params->rq_headroom = params->xdp_prog ? + XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; + params->rq_headroom += NET_IP_ALIGN; /* Extra room needed for build_skb */ - params->lro_wqe_sz -= MLX5_RX_HEADROOM + + params->lro_wqe_sz -= params->rq_headroom + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } @@ -124,7 +127,8 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, + 0); if (port_state == VPORT_STATE_UP) { netdev_info(priv->netdev, "Link up\n"); @@ -142,7 +146,8 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_update_carrier(priv); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); mutex_unlock(&priv->state_lock); } @@ -195,6 +200,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_page_reuse += rq_stats->page_reuse; s->rx_cache_reuse += rq_stats->cache_reuse; s->rx_cache_full += rq_stats->cache_full; s->rx_cache_empty += rq_stats->cache_empty; @@ -243,18 +249,14 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); int prio; void *out; - u32 *in; - - in = kvzalloc(sz, GFP_KERNEL); - if (!in) - return; MLX5_SET(ppcnt_reg, in, local_port, 1); @@ -262,6 +264,9 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + if (!full) + return; + out = pstats->RFC_2863_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); @@ -287,52 +292,55 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } - - kvfree(in); } static void mlx5e_update_q_counter(struct mlx5e_priv *priv) { struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)]; + int err; if (!priv->q_counter) return; - mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, - &qcnt->rx_out_of_buffer); + err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out)); + if (err) + return; + + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer); } static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) { struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); void *out; - u32 *in; if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) return; - in = kvzalloc(sz, GFP_KERNEL); - if (!in) - return; - out = pcie_stats->pcie_perf_counters; MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); - - kvfree(in); } -void mlx5e_update_stats(struct mlx5e_priv *priv) +void mlx5e_update_stats(struct mlx5e_priv *priv, bool full) { - mlx5e_update_pcie_counters(priv); - mlx5e_update_pport_counters(priv); + if (full) + mlx5e_update_pcie_counters(priv); + mlx5e_update_pport_counters(priv, full); mlx5e_update_vport_counters(priv); mlx5e_update_q_counter(priv); mlx5e_update_sw_counters(priv); } +static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_update_stats(priv, false); +} + void mlx5e_update_stats_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); @@ -543,7 +551,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; - u32 frag_sz; int npages; int wq_sz; int err; @@ -575,13 +582,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - if (rq->xdp_prog) { - rq->buff.map_dir = DMA_BIDIRECTIONAL; - rq->rx_headroom = XDP_PACKET_HEADROOM; - } else { - rq->buff.map_dir = DMA_FROM_DEVICE; - rq->rx_headroom = MLX5_RX_HEADROOM; - } + rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->rx_headroom = params->rq_headroom; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -612,9 +614,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_destroy_umr_mkey; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->dma_info) { + rq->wqe.frag_info = + kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe.frag_info) { err = -ENOMEM; goto err_rq_wq_destroy; } @@ -623,7 +626,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; if (!rq->handle_rx_cqe) { - kfree(rq->dma_info); + kfree(rq->wqe.frag_info); err = -EINVAL; netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err); goto err_rq_wq_destroy; @@ -631,16 +634,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->buff.wqe_sz = params->lro_en ? params->lro_wqe_sz : - MLX5E_SW2HW_MTU(c->netdev->mtu); + MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); + rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en; byte_count = rq->buff.wqe_sz; /* calc the required page order */ - frag_sz = rq->rx_headroom + - byte_count /* packet data */ + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - frag_sz = SKB_DATA_ALIGN(frag_sz); - - npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count); + npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE); rq->buff.page_order = order_base_2(npages); byte_count |= MLX5_HW_START_PADDING; @@ -685,7 +685,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->dma_info); + kfree(rq->wqe.frag_info); } for (i = rq->page_cache.head; i != rq->page_cache.tail; @@ -867,6 +867,16 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, &wqe->next.next_wqe_index); } + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST && rq->wqe.page_reuse) { + /* Clean outstanding pages on handled WQEs that decided to do page-reuse, + * but yet to be re-posted. + */ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + + for (wqe_ix = 0; wqe_ix < wq_sz; wqe_ix++) + rq->dealloc_wqe(rq, wqe_ix); + } } static int mlx5e_open_rq(struct mlx5e_channel *c, @@ -2464,7 +2474,7 @@ free_in: static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; - u16 hw_mtu = MLX5E_SW2HW_MTU(mtu); + u16 hw_mtu = MLX5E_SW2HW_MTU(priv, mtu); int err; err = mlx5_set_port_mtu(mdev, hw_mtu, 1); @@ -2486,7 +2496,7 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) if (err || !hw_mtu) /* fallback to port oper mtu */ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); - *mtu = MLX5E_HW2SW_MTU(hw_mtu); + *mtu = MLX5E_HW2SW_MTU(priv, hw_mtu); } static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) @@ -2595,9 +2605,10 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, { struct net_device *netdev = priv->netdev; int new_num_txqs; - + int carrier_ok; new_num_txqs = new_chs->num * new_chs->params.num_tc; + carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); if (new_num_txqs < netdev->real_num_tx_queues) @@ -2615,7 +2626,9 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); - mlx5e_update_carrier(priv); + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); } int mlx5e_open_locked(struct net_device *netdev) @@ -2631,7 +2644,8 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); - mlx5e_update_carrier(priv); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); mlx5e_timestamp_init(priv); if (priv->profile->update_stats) @@ -3067,7 +3081,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) */ stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); - } static void mlx5e_set_rx_mode(struct net_device *dev) @@ -3310,11 +3323,13 @@ out: static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (cmd) { case SIOCSHWTSTAMP: - return mlx5e_hwstamp_set(dev, ifr); + return mlx5e_hwstamp_set(priv, ifr); case SIOCGHWTSTAMP: - return mlx5e_hwstamp_get(dev, ifr); + return mlx5e_hwstamp_get(priv, ifr); default: return -EOPNOTSUPP; } @@ -3599,11 +3614,19 @@ unlock: return err; } -static bool mlx5e_xdp_attached(struct net_device *dev) +static u32 mlx5e_xdp_query(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + const struct bpf_prog *xdp_prog; + u32 prog_id = 0; - return !!priv->channels.params.xdp_prog; + mutex_lock(&priv->state_lock); + xdp_prog = priv->channels.params.xdp_prog; + if (xdp_prog) + prog_id = xdp_prog->aux->id; + mutex_unlock(&priv->state_lock); + + return prog_id; } static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -3612,7 +3635,8 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) case XDP_SETUP_PROG: return mlx5e_xdp_set(dev, xdp->prog); case XDP_QUERY_PROG: - xdp->prog_attached = mlx5e_xdp_attached(dev); + xdp->prog_id = mlx5e_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; @@ -3718,7 +3742,7 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); if (!MLX5_CAP_GEN(mdev, cq_moderation)) - mlx5_core_warn(mdev, "CQ modiration is not supported\n"); + mlx5_core_warn(mdev, "CQ moderation is not supported\n"); return 0; } @@ -3851,7 +3875,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* set CQE compression */ params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && - MLX5_CAP_GEN(mdev, vport_group_manager)) + MLX5_CAP_GEN(mdev, vport_group_manager)) params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); @@ -3860,6 +3884,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_set_rq_params(mdev, params); /* HW LRO */ + /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) params->lro_en = hw_lro_heuristic(link_speed, pci_bw); @@ -3900,6 +3925,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; + priv->hard_mtu = MLX5E_ETH_HARD_MTU; mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); @@ -4145,7 +4171,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); + netdev->max_mtu = MLX5E_HW2SW_MTU(priv, max_mtu); mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); @@ -4202,8 +4228,9 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, - .update_stats = mlx5e_update_stats, + .update_stats = mlx5e_update_ndo_stats, .max_nch = mlx5e_get_max_num_channels, + .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, @@ -4244,7 +4271,8 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return netdev; err_cleanup_nic: - profile->cleanup(priv); + if (profile->cleanup) + profile->cleanup(priv); free_netdev(netdev); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 70c2b8d020bd..45e60be9c277 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -796,6 +796,8 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->num_tc = 1; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } static void mlx5e_build_rep_netdev(struct net_device *netdev) @@ -833,6 +835,9 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); priv->channels.params.num_channels = profile->max_nch(mdev); + + priv->hard_mtu = MLX5E_ETH_HARD_MTU; + mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); } @@ -916,6 +921,7 @@ static struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .update_stats = mlx5e_rep_update_stats, .max_nch = mlx5e_get_rep_max_num_channels, + .update_carrier = NULL, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, .max_tc = 1, @@ -1019,7 +1025,6 @@ err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); kfree(rpriv); return err; - } static void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 66b5fec15313..5f3c138c948d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -40,7 +40,7 @@ #include "en_tc.h" #include "eswitch.h" #include "en_rep.h" -#include "ipoib.h" +#include "ipoib/ipoib.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -160,6 +160,11 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) +static inline bool mlx5e_page_is_reserved(struct page *page) +{ + return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id(); +} + static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { @@ -238,22 +243,54 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, put_page(dma_info->page); } +static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) +{ + return rq->wqe.page_reuse && wi->di.page && + (wi->offset + rq->wqe.frag_sz <= RQ_PAGE_SIZE(rq)) && + !mlx5e_page_is_reserved(wi->di.page); +} + int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - struct mlx5e_dma_info *di = &rq->dma_info[ix]; + struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; - if (unlikely(mlx5e_page_alloc_mapped(rq, di))) - return -ENOMEM; + /* check if page exists, hence can be reused */ + if (!wi->di.page) { + if (unlikely(mlx5e_page_alloc_mapped(rq, &wi->di))) + return -ENOMEM; + wi->offset = 0; + } - wqe->data.addr = cpu_to_be64(di->addr + rq->rx_headroom); + wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + + rq->rx_headroom); return 0; } +static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) +{ + mlx5e_page_release(rq, &wi->di, true); + wi->di.page = NULL; +} + +static inline void mlx5e_free_rx_wqe_reuse(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) +{ + if (mlx5e_page_reuse(rq, wi)) { + rq->stats.page_reuse++; + return; + } + + mlx5e_free_rx_wqe(rq, wi); +} + void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_dma_info *di = &rq->dma_info[ix]; + struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; - mlx5e_page_release(rq, di, true); + if (wi->di.page) + mlx5e_free_rx_wqe(rq, wi); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) @@ -648,9 +685,8 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, prefetchw(wqe); if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || - MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { + MLX5E_SW2HW_MTU(rq->channel->priv, rq->netdev->mtu) < dma_len)) { rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); return false; } @@ -661,7 +697,6 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, sq->db.doorbell = false; } rq->stats.xdp_tx_full++; - mlx5e_page_release(rq, di, true); return false; } @@ -686,10 +721,15 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + /* move page to reference to sq responsibility, + * and mark so it's not put back in page-cache. + */ + rq->wqe.xdp_xmit = true; sq->db.di[pi] = *di; sq->pc++; sq->db.doorbell = true; + rq->stats.xdp_tx++; return true; } @@ -726,35 +766,34 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, trace_xdp_exception(rq->netdev, prog, act); case XDP_DROP: rq->stats.xdp_drop++; - mlx5e_page_release(rq, di, true); return true; } } static inline struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, - u16 wqe_counter, u32 cqe_bcnt) + struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { - struct mlx5e_dma_info *di; + struct mlx5e_dma_info *di = &wi->di; struct sk_buff *skb; void *va, *data; u16 rx_headroom = rq->rx_headroom; bool consumed; + u32 frag_size; - di = &rq->dma_info[wqe_counter]; - va = page_address(di->page); + va = page_address(di->page) + wi->offset; data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); dma_sync_single_range_for_cpu(rq->pdev, - di->addr, - rx_headroom, - rq->buff.wqe_sz, + di->addr + wi->offset, + 0, frag_size, DMA_FROM_DEVICE); prefetch(data); + wi->offset += frag_size; if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { rq->stats.wqe_err++; - mlx5e_page_release(rq, di, true); return NULL; } @@ -764,16 +803,14 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, if (consumed) return NULL; /* page/packet was consumed by XDP */ - skb = build_skb(va, RQ_PAGE_SIZE(rq)); + skb = build_skb(va, frag_size); if (unlikely(!skb)) { rq->stats.buff_alloc_err++; - mlx5e_page_release(rq, di, true); return NULL; } - /* queue up for recycling ..*/ + /* queue up for recycling/reuse */ page_ref_inc(di->page); - mlx5e_page_release(rq, di, true); skb_reserve(skb, rx_headroom); skb_put(skb, cqe_bcnt); @@ -783,6 +820,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_wqe_frag_info *wi; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; struct sk_buff *skb; @@ -792,15 +830,27 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); - if (!skb) + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (rq->wqe.xdp_xmit) { + wi->di.page = NULL; + rq->wqe.xdp_xmit = false; + /* do not return page to cache, it will be returned on XDP_TX completion */ + goto wq_ll_pop; + } + /* probably an XDP_DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi); goto wq_ll_pop; + } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); + mlx5e_free_rx_wqe_reuse(rq, wi); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); @@ -812,6 +862,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5e_wqe_frag_info *wi; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -821,11 +872,21 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); - if (!skb) + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); + if (!skb) { + if (rq->wqe.xdp_xmit) { + wi->di.page = NULL; + rq->wqe.xdp_xmit = false; + /* do not return page to cache, it will be returned on XDP_TX completion */ + goto wq_ll_pop; + } + /* probably an XDP_DROP, save the page-reuse checks */ + mlx5e_free_rx_wqe(rq, wi); goto wq_ll_pop; + } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -834,6 +895,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); + mlx5e_free_rx_wqe_reuse(rq, wi); wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); @@ -1038,11 +1100,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) #ifdef CONFIG_MLX5_CORE_IPOIB #define MLX5_IB_GRH_DGID_OFFSET 24 -#define MLX5_IB_GRH_BYTES 40 -#define MLX5_IPOIB_ENCAP_LEN 4 #define MLX5_GID_SIZE 16 -#define MLX5_IPOIB_PSEUDO_LEN 20 -#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -1050,6 +1108,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; + struct mlx5e_tstamp *tstamp = rq->tstamp; char *pseudo_header; u8 *dgid; u8 g; @@ -1074,6 +1133,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) @@ -1094,6 +1156,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_wqe_frag_info *wi; struct mlx5e_rx_wqe *wqe; __be16 wqe_counter_be; struct sk_buff *skb; @@ -1103,16 +1166,18 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + wi = &rq->wqe.frag_info[wqe_counter]; cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt); if (!skb) - goto wq_ll_pop; + goto wq_free_wqe; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); napi_gro_receive(rq->cq.napi, skb); -wq_ll_pop: +wq_free_wqe: + mlx5e_free_rx_wqe_reuse(rq, wi); mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index 02dd3a95ed8f..acf32fe952cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -183,28 +183,27 @@ static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am) mlx5e_am_step(am); } +#define IS_SIGNIFICANT_DIFF(val, ref) \ + (((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */ + static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr, struct mlx5e_rx_am_stats *prev) { - int diff; - - if (!prev->ppms) - return curr->ppms ? MLX5E_AM_STATS_BETTER : + if (!prev->bpms) + return curr->bpms ? MLX5E_AM_STATS_BETTER : MLX5E_AM_STATS_SAME; - diff = curr->ppms - prev->ppms; - if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */ - return (diff > 0) ? MLX5E_AM_STATS_BETTER : - MLX5E_AM_STATS_WORSE; + if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) + return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; - if (!prev->epms) - return curr->epms ? MLX5E_AM_STATS_WORSE : - MLX5E_AM_STATS_SAME; + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) + return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; - diff = curr->epms - prev->epms; - if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */ - return (diff < 0) ? MLX5E_AM_STATS_BETTER : - MLX5E_AM_STATS_WORSE; + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) + return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; return MLX5E_AM_STATS_SAME; } @@ -266,10 +265,13 @@ static void mlx5e_am_sample(struct mlx5e_rq *rq, { s->time = ktime_get(); s->pkt_ctr = rq->stats.packets; + s->byte_ctr = rq->stats.bytes; s->event_ctr = rq->cq.event_ctr; } #define MLX5E_AM_NEVENTS 64 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1)) static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, struct mlx5e_rx_am_sample *end, @@ -277,13 +279,17 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, { /* u32 holds up to 71 minutes, should be enough */ u32 delta_us = ktime_us_delta(end->time, start->time); - unsigned int npkts = end->pkt_ctr - start->pkt_ctr; + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, + start->byte_ctr); if (!delta_us) return; - curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; - curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); + curr_stats->epms = DIV_ROUND_UP(MLX5E_AM_NEVENTS * USEC_PER_MSEC, + delta_us); } void mlx5e_rx_am_work(struct work_struct *work) @@ -308,7 +314,8 @@ void mlx5e_rx_am(struct mlx5e_rq *rq) switch (am->state) { case MLX5E_AM_MEASURE_IN_PROGRESS: - nevents = rq->cq.event_ctr - am->start_sample.event_ctr; + nevents = BIT_GAP(BITS_PER_TYPE(u16), rq->cq.event_ctr, + am->start_sample.event_ctr); if (nevents < MLX5E_AM_NEVENTS) break; mlx5e_am_sample(rq, &end_sample); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5225f2226a67..898759fcf9ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -132,14 +132,14 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) skb_reserve(skb, NET_IP_ALIGN); /* Reserve for ethernet and IP header */ - ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); + ethh = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); - iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)); + iph = skb_put(skb, sizeof(struct iphdr)); skb_set_transport_header(skb, skb->len); - udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); + udph = skb_put(skb, sizeof(struct udphdr)); /* Fill ETH header */ ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr); @@ -167,12 +167,12 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) ip_send_check(iph); /* Fill test header and data */ - mlxh = (struct mlx5ehdr *)skb_put(skb, sizeof(*mlxh)); + mlxh = skb_put(skb, sizeof(*mlxh)); mlxh->version = 0; mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC); strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text)); datalen -= sizeof(*mlxh); - memset(skb_put(skb, datalen), 0, datalen); + skb_put_zero(skb, datalen); skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 53e4992d6511..e65517eafc58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -79,6 +79,7 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; + u64 rx_page_reuse; u64 rx_cache_reuse; u64 rx_cache_full; u64 rx_cache_empty; @@ -117,6 +118,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, @@ -268,7 +270,7 @@ static const struct counter_desc pport_2819_stats_desc[] = { }; static const struct counter_desc pport_phy_statistical_stats_desc[] = { - { "rx_symbol_errors_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, }; @@ -319,6 +321,7 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; + u64 page_reuse; u64 cache_reuse; u64 cache_full; u64 cache_empty; @@ -341,6 +344,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, @@ -417,20 +421,13 @@ struct mlx5e_stats { }; static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_plug", 0 }, { "module_unplug", 8 }, }; static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_pwr_budget_exd", 0 }, /* power budget exceed */ - { "module_long_range", 8 }, /* long range for non MLNX cable */ - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_no_eeprom", 24 }, /* no eeprom/retry time out */ - { "module_enforce_part", 32 }, /* enforce part number list */ - { "module_unknown_id", 40 }, /* unknown identifier */ - { "module_high_temp", 48 }, /* high temperature */ + { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ + { "module_high_temp", 48 }, /* high temperature */ { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ - { "module_unknown_status", 64 }, }; #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f5afacfbe914..3c536f560dd2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -888,6 +888,34 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *min_inline = MLX5_INLINE_MODE_IP; } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + struct flow_dissector_key_ip *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + + if (mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_ipv4_ttl)) + return -EOPNOTSUPP; + + if (mask->tos || mask->ttl) + *min_inline = MLX5_INLINE_MODE_IP; + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_dissector_key_ports *key = skb_flow_dissector_target(f->dissector, @@ -931,29 +959,6 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *min_inline = MLX5_INLINE_MODE_TCP_UDP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); - - if (mask->tos) - *min_inline = MLX5_INLINE_MODE_IP; - - if (mask->ttl) /* currently not supported */ - return -EOPNOTSUPP; - } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { struct flow_dissector_key_tcp *key = skb_flow_dissector_target(f->dissector, @@ -1053,33 +1058,37 @@ struct mlx5_fields { u32 offset; }; +#define OFFLOAD(fw_field, size, field, off) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)} + static struct mlx5_fields fields[] = { - {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])}, - {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])}, - {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])}, - {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])}, - {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)}, - - {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)}, - {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)}, - - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])}, - {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])}, - {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])}, - - {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)}, - {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)}, - {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5}, - - {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)}, - {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)}, + OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0), + OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0), + OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0), + OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0), + OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0), + OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0), + + OFFLOAD(IP_TTL, 1, ip4.ttl, 0), + OFFLOAD(SIPV4, 4, ip4.saddr, 0), + OFFLOAD(DIPV4, 4, ip4.daddr, 0), + + OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0), + OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0), + OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0), + OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0), + OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0), + OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0), + OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0), + OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0), + OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0), + + OFFLOAD(TCP_SPORT, 2, tcp.source, 0), + OFFLOAD(TCP_DPORT, 2, tcp.dest, 0), + OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5), + + OFFLOAD(UDP_SPORT, 2, udp.source, 0), + OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ab3bb026ff9e..0433d69429f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -33,7 +33,7 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> #include "en.h" -#include "ipoib.h" +#include "ipoib/ipoib.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ @@ -245,7 +245,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, - DMA_TO_DEVICE); + DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) return -ENOMEM; @@ -557,11 +557,16 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (skb_is_gso(skb)) { opcode = MLX5_OPCODE_LSO; ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + sq->stats.packets += skb_shinfo(skb)->gso_segs; } else { ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + sq->stats.packets++; } + sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; if (ihs) { memcpy(eseg->inline_hdr.start, skb_data, ihs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0ed8e90ba54f..af51a5d2b912 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -157,6 +157,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_PAGE_FAULT"; case MLX5_EVENT_TYPE_PPS_EVENT: return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; case MLX5_EVENT_TYPE_FPGA_ERROR: return "MLX5_EVENT_TYPE_FPGA_ERROR"; default: @@ -189,7 +191,7 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) { __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); - __raw_writel((__force u32) cpu_to_be32(val), addr); + __raw_writel((__force u32)cpu_to_be32(val), addr); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } @@ -675,7 +677,6 @@ int mlx5_eq_init(struct mlx5_core_dev *dev) return err; } - void mlx5_eq_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); @@ -687,7 +688,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && MLX5_CAP_GEN(dev, vport_group_manager) && mlx5_core_is_pf(dev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 37927156f258..89bfda419efe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1217,7 +1217,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", vport->vport); return -EPERM; - } esw_vport_cleanup_ingress_rules(esw, vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3795943ef2d1..95b64025ce36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -691,7 +691,7 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, - &flow_act, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; @@ -903,21 +903,34 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +static int mlx5_devlink_eswitch_check(struct devlink *devlink) { - struct mlx5_core_dev *dev; - u16 cur_mlx5_mode, mlx5_mode = 0; + struct mlx5_core_dev *dev = devlink_priv(devlink); - dev = devlink_priv(devlink); + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - cur_mlx5_mode = dev->priv.eswitch->mode; - - if (cur_mlx5_mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE) return -EOPNOTSUPP; + return 0; +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u16 cur_mlx5_mode, mlx5_mode = 0; + int err; + + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; + + cur_mlx5_mode = dev->priv.eswitch->mode; + if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; @@ -934,15 +947,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { - struct mlx5_core_dev *dev; - - dev = devlink_priv(devlink); - - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; - if (dev->priv.eswitch->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); } @@ -951,15 +961,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; - int num_vports = esw->enabled_vports; int err, vport; u8 mlx5_mode; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: @@ -982,7 +989,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) if (err) goto out; - for (vport = 1; vport < num_vports; vport++) { + for (vport = 1; vport < esw->enabled_vports; vport++) { err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); if (err) { esw_warn(dev, "Failed to set min inline on vport %d\n", @@ -1007,12 +1014,11 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } @@ -1059,11 +1065,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) struct mlx5_eswitch *esw = dev->priv.eswitch; int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || @@ -1093,7 +1097,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) if (err) { esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); esw->offloads.encap = !encap; - (void) esw_create_offloads_fast_fdb_table(esw); + (void)esw_create_offloads_fast_fdb_table(esw); } return err; } @@ -1102,12 +1106,11 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + err = mlx5_devlink_eswitch_check(devlink); + if (err) + return err; *encap = esw->offloads.encap; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6c636c21d24f..e8690fe46bf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -104,6 +104,7 @@ struct node_caps { size_t arr_sz; long *caps; }; + static struct init_tree_node { enum fs_node_type type; struct init_tree_node *children; @@ -860,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace ft_attr.level = level; ft_attr.prio = prio; - return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0); + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport); } struct mlx5_flow_table* @@ -1858,7 +1859,6 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) static int init_root_ns(struct mlx5_flow_steering *steering) { - steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); if (!steering->root_ns) goto cleanup; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 1bc14d0fded8..fa33d59ab485 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -34,6 +34,7 @@ #include <linux/mlx5/cmd.h> #include <linux/module.h> #include "mlx5_core.h" +#include "../../mlxfw/mlxfw.h" static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) @@ -195,3 +196,298 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + int force_state; + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE); + + ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + force_state = MLX5_GET(teardown_hca_out, out, force_state); + if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_err(dev, "teardown with force mode failed\n"); + return -EIO; + } + + return 0; +} + +enum mlxsw_reg_mcc_instruction { + MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(mcc_reg, in, instruction, instr); + MLX5_SET(mcc_reg, in, component_index, component_index); + MLX5_SET(mcc_reg, in, update_handle, update_handle); + MLX5_SET(mcc_reg, in, component_size, component_size); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 1); +} + +static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev, + u32 *update_handle, u8 *error_code, + u8 *control_state) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + MLX5_SET(mcc_reg, in, update_handle, *update_handle); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 0); + if (err) + goto out; + + *update_handle = MLX5_GET(mcc_reg, out, update_handle); + *error_code = MLX5_GET(mcc_reg, out, error_code); + *control_state = MLX5_GET(mcc_reg, out, control_state); + +out: + return err; +} + +static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, + u32 update_handle, + u32 offset, u16 size, + u8 *data) +{ + int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size; + u32 out[MLX5_ST_SZ_DW(mcda_reg)]; + int i, j, dw_size = size >> 2; + __be32 data_element; + u32 *in; + + in = kzalloc(in_size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(mcda_reg, in, update_handle, update_handle); + MLX5_SET(mcda_reg, in, offset, offset); + MLX5_SET(mcda_reg, in, size, size); + + for (i = 0; i < dw_size; i++) { + j = i * 4; + data_element = htonl(*(u32 *)&data[j]); + memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4); + } + + err = mlx5_core_access_reg(dev, in, in_size, out, + sizeof(out), MLX5_REG_MCDA, 0, 1); + kfree(in); + return err; +} + +static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, + u16 component_index, + u32 *max_component_size, + u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)]; + int offset = MLX5_ST_SZ_DW(mcqi_reg); + u32 in[MLX5_ST_SZ_DW(mcqi_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(mcqi_reg, in, component_index, component_index); + MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCQI, 0, 0); + if (err) + goto out; + + *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size); + +out: + return err; +} + +struct mlx5_mlxfw_dev { + struct mlxfw_dev mlxfw_dev; + struct mlx5_core_dev *mlx5_core_dev; +}; + +static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcqi_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); +} + +static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + *fwhandle = 0; + err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state); + if (err) + return err; + + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); +} + +static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); +} + +static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data); +} + +static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); +} + +static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state); + if (err) + return err; + + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); +} + +static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, + fwhandle, 0); +} + +static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { + .component_query = mlx5_component_query, + .fsm_lock = mlx5_fsm_lock, + .fsm_component_update = mlx5_fsm_component_update, + .fsm_block_download = mlx5_fsm_block_download, + .fsm_component_verify = mlx5_fsm_component_verify, + .fsm_activate = mlx5_fsm_activate, + .fsm_query_state = mlx5_fsm_query_state, + .fsm_cancel = mlx5_fsm_cancel, + .fsm_release = mlx5_fsm_release +}; + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, + const struct firmware *firmware) +{ + struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { + .mlxfw_dev = { + .ops = &mlx5_mlxfw_dev_ops, + .psid = dev->board_id, + .psid_size = strlen(dev->board_id), + }, + .mlx5_core_dev = dev + }; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || + !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcc) || + !MLX5_CAP_MCAM_REG(dev, mcda)) { + pr_info("%s flashing isn't supported by the running FW\n", __func__); + return -EOPNOTSUPP; + } + + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 80b23333de7a..0648a659b21d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -111,14 +111,14 @@ static int in_fatal(struct mlx5_core_dev *dev) return 0; } -void mlx5_enter_error_state(struct mlx5_core_dev *dev) +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { + if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; trigger_cmd_completions(dev); } @@ -290,10 +290,8 @@ static void poll_health(unsigned long data) struct mlx5_core_health *health = &dev->priv.health; u32 count; - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - mod_timer(&health->timer, get_next_poll_jiffies()); - return; - } + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; count = ioread32be(health->health_counter); if (count == health->prev) @@ -305,8 +303,6 @@ static void poll_health(unsigned long data) if (health->miss_counter == MAX_MISSES) { dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n"); print_health_info(dev); - } else { - mod_timer(&health->timer, get_next_poll_jiffies()); } if (in_fatal(dev) && !health->sick) { @@ -314,6 +310,9 @@ static void poll_health(unsigned long data) print_health_info(dev); mlx5_trigger_health_work(dev); } + +out: + mod_timer(&health->timer, get_next_poll_jiffies()); } void mlx5_start_health_poll(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c new file mode 100644 index 000000000000..eb04e97d8765 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "ipoib.h" + +static void mlx5i_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); +} + +static void mlx5i_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +static int mlx5i_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5i_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5i_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5i_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +static int mlx5i_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +static void mlx5i_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5i_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + +static int mlx5i_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +static int mlx5i_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +const struct ethtool_ops mlx5i_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 22ca59145e6c..1ee5bce85901 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -36,20 +36,38 @@ #include "ipoib.h" #define IB_DEFAULT_Q_KEY 0xb1b +#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9 static int mlx5i_open(struct net_device *netdev); static int mlx5i_close(struct net_device *netdev); static int mlx5i_dev_init(struct net_device *dev); static void mlx5i_dev_cleanup(struct net_device *dev); +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); +static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static const struct net_device_ops mlx5i_netdev_ops = { .ndo_open = mlx5i_open, .ndo_stop = mlx5i_close, .ndo_init = mlx5i_dev_init, .ndo_uninit = mlx5i_dev_cleanup, + .ndo_change_mtu = mlx5i_change_mtu, + .ndo_do_ioctl = mlx5i_ioctl, }; /* IPoIB mlx5 netdev profile */ +static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ + mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); + + /* RQ size in ipoib by default is 512 */ + params->log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; + + params->lro_en = false; +} /* Called directly after IPoIB netdevice was created to initialize SW structs */ static void mlx5i_init(struct mlx5_core_dev *mdev, @@ -59,19 +77,18 @@ static void mlx5i_init(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + /* priv init */ priv->mdev = mdev; priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; + priv->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + mutex_init(&priv->state_lock); mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + mlx5i_build_nic_params(mdev, &priv->channels.params); - /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - mlx5e_set_rq_type_params(mdev, &priv->channels.params, MLX5_WQ_TYPE_LINKED_LIST); - priv->channels.params.lro_en = false; - - mutex_init(&priv->state_lock); - + /* netdev init */ netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; netdev->hw_features |= NETIF_F_IPV6_CSUM; @@ -82,6 +99,7 @@ static void mlx5i_init(struct mlx5_core_dev *mdev, netdev->hw_features |= NETIF_F_RXHASH; netdev->netdev_ops = &mlx5i_netdev_ops; + netdev->ethtool_ops = &mlx5i_ethtool_ops; } /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ @@ -290,6 +308,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .disable = NULL, /* mlx5i_disable */ .update_stats = NULL, /* mlx5i_update_stats */ .max_nch = mlx5e_get_max_num_channels, + .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ .max_tc = MLX5I_MAX_NUM_TC, @@ -297,6 +316,35 @@ static const struct mlx5e_profile mlx5i_nic_profile = { /* mlx5i netdev NDos */ +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_channels new_channels = {}; + int curr_mtu; + int err = 0; + + mutex_lock(&priv->state_lock); + + curr_mtu = netdev->mtu; + netdev->mtu = new_mtu; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + new_channels.params = priv->channels.params; + err = mlx5e_open_channels(priv, &new_channels); + if (err) { + netdev->mtu = curr_mtu; + goto out; + } + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +out: + mutex_unlock(&priv->state_lock); + return err; +} + static int mlx5i_dev_init(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -310,6 +358,20 @@ static int mlx5i_dev_init(struct net_device *dev) return 0; } +static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + static void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -336,6 +398,8 @@ static int mlx5i_open(struct net_device *netdev) mlx5e_refresh_tirs(priv, false); mlx5e_activate_priv_channels(priv); + mlx5e_timestamp_init(priv); + mutex_unlock(&priv->state_lock); return 0; @@ -359,6 +423,7 @@ static int mlx5i_close(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); @@ -510,4 +575,3 @@ void mlx5_rdma_netdev_free(struct net_device *netdev) mlx5e_destroy_mdev_resources(priv->mdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 213191a78464..a0f405f520f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -38,6 +38,13 @@ #define MLX5I_MAX_NUM_TC 1 +extern const struct ethtool_ops mlx5i_ethtool_ops; + +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) + /* ipoib rdma netdev's private data structure */ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index b5d5519542e8..a3a836bdcfd2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -61,6 +61,11 @@ struct mlx5_lag { struct lag_tracker tracker; struct delayed_work bond_work; struct notifier_block nb; + + /* Admin state. Allow lag only if allowed is true + * even if network conditions for lag were met + */ + bool allowed; }; /* General purpose, use for short periods of time. @@ -214,6 +219,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct lag_tracker tracker; u8 v2p_port1, v2p_port2; int i, err; + bool do_bond; if (!dev0 || !dev1) return; @@ -222,13 +228,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) tracker = ldev->tracker; mutex_unlock(&lag_mutex); - if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) { - if (mlx5_sriov_is_enabled(dev0) || - mlx5_sriov_is_enabled(dev1)) { - mlx5_core_warn(dev0, "LAG is not supported with SRIOV"); - return; - } + do_bond = tracker.is_bonded && ldev->allowed; + if (do_bond && !mlx5_lag_is_bonded(ldev)) { for (i = 0; i < MLX5_MAX_PORTS; i++) mlx5_remove_dev_by_protocol(ldev->pf[i].dev, MLX5_INTERFACE_PROTOCOL_IB); @@ -237,7 +239,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_enable_roce(dev1); - } else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + } else if (do_bond && mlx5_lag_is_bonded(ldev)) { mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, &v2p_port2); @@ -252,7 +254,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) "Failed to modify LAG (%d)\n", err); } - } else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_disable_roce(dev1); @@ -411,6 +413,15 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || + (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) + return false; + else + return true; +} + static struct mlx5_lag *mlx5_lag_dev_alloc(void) { struct mlx5_lag *ldev; @@ -420,6 +431,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) return NULL; INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + ldev->allowed = mlx5_lag_check_prereq(ldev); return ldev; } @@ -444,7 +456,9 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; + ldev->allowed = mlx5_lag_check_prereq(ldev); dev->priv.lag = ldev; + mutex_unlock(&lag_mutex); } @@ -464,10 +478,10 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; + ldev->allowed = mlx5_lag_check_prereq(ldev); mutex_unlock(&lag_mutex); } - /* Must be called with intf_mutex held */ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { @@ -543,6 +557,44 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +{ + struct mlx5_lag *ldev; + int ret = 0; + bool lag_active; + + mlx5_dev_list_lock(); + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) { + ret = -ENODEV; + goto unlock; + } + lag_active = mlx5_lag_is_bonded(ldev); + if (!mlx5_lag_check_prereq(ldev) && allow) { + ret = -EINVAL; + goto unlock; + } + if (ldev->allowed == allow) + goto unlock; + ldev->allowed = allow; + if ((lag_active && !allow) || allow) + mlx5_do_bond(ldev); +unlock: + mlx5_dev_list_unlock(); + return ret; +} + +int mlx5_lag_forbid(struct mlx5_core_dev *dev) +{ + return mlx5_lag_set_state(dev, false); +} + +int mlx5_lag_allow(struct mlx5_core_dev *dev) +{ + return mlx5_lag_set_state(dev, true); +} + struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; @@ -586,4 +638,3 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) /* If bonded, we do not add an IB device for PF1. */ return false; } - diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 9274d93d3183..c7f75e12c13b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -176,8 +176,9 @@ static struct mlx5_profile profile[] = { }, }; -#define FW_INIT_TIMEOUT_MILI 2000 -#define FW_INIT_WAIT_MS 2 +#define FW_INIT_TIMEOUT_MILI 2000 +#define FW_INIT_WAIT_MS 2 +#define FW_PRE_INIT_TIMEOUT_MILI 10000 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) { @@ -356,12 +357,11 @@ static void mlx5_disable_msix(struct mlx5_core_dev *dev) kfree(priv->msix_arr); } -struct mlx5_reg_host_endianess { +struct mlx5_reg_host_endianness { u8 he; u8 rsvd[15]; }; - #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos)) enum { @@ -475,7 +475,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) req_endianness = MLX5_CAP_ATOMIC(dev, - supported_atomic_req_8B_endianess_mode_1); + supported_atomic_req_8B_endianness_mode_1); if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) return 0; @@ -487,7 +487,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); /* Set requestor to host endianness */ - MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode, + MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); @@ -538,8 +538,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); - /* If the HCA supports 4K UARs use it */ - if (MLX5_CAP_GEN_MAX(dev, uar_4k)) + /* Enable 4K UAR only when HCA supports it and page size is bigger + * than 4K. + */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); @@ -560,8 +562,8 @@ query_ex: static int set_hca_ctrl(struct mlx5_core_dev *dev) { - struct mlx5_reg_host_endianess he_in; - struct mlx5_reg_host_endianess he_out; + struct mlx5_reg_host_endianness he_in; + struct mlx5_reg_host_endianness he_out; int err; if (!mlx5_core_is_pf(dev)) @@ -1012,6 +1014,15 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, */ dev->state = MLX5_DEVICE_STATE_UP; + /* wait for firmware to accept initialization segments configurations + */ + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", + FW_PRE_INIT_TIMEOUT_MILI); + goto out; + } + err = mlx5_cmd_init(dev); if (err) { dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); @@ -1417,7 +1428,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); - mlx5_enter_error_state(dev); + mlx5_enter_error_state(dev, false); mlx5_unload_one(dev, priv, false); /* In case of kernel call drain the health wq */ if (state) { @@ -1504,24 +1515,52 @@ static const struct pci_error_handlers mlx5_err_handler = { .resume = mlx5_pci_resume }; +static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) +{ + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); + return -EAGAIN; + } + + ret = mlx5_cmd_force_teardown_hca(dev); + if (ret) { + mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + return ret; + } + + mlx5_enter_error_state(dev, true); + + return 0; +} + static void shutdown(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_priv *priv = &dev->priv; + int err; dev_info(&pdev->dev, "Shutdown was called\n"); /* Notify mlx5 clients that the kernel is being shut down */ set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv, false); + err = mlx5_try_fast_unload(dev); + if (err) + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cf69b42278df..6a3d6bef7dd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -37,6 +37,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/if_link.h> +#include <linux/firmware.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" @@ -83,12 +84,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_core_page_fault(struct mlx5_core_dev *dev, struct mlx5_pagefault *pfault); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); -void mlx5_enter_error_state(struct mlx5_core_dev *dev); +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); @@ -152,6 +154,8 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); + void mlx5e_init(void); void mlx5e_cleanup(void); @@ -167,4 +171,7 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } +int mlx5_lag_allow(struct mlx5_core_dev *dev); +int mlx5_lag_forbid(struct mlx5_core_dev *dev); + #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index efcded7ca27a..e36d3e3675f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -403,7 +403,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, for (i = 0; i < num_claimed; i++) free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); - if (nclaimed) *nclaimed = num_claimed; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 573a6b27fed8..340f281c9801 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -30,7 +30,6 @@ * SOFTWARE. */ - #include <linux/gfp.h> #include <linux/export.h> #include <linux/mlx5/cmd.h> @@ -519,23 +518,3 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); - -int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, - u32 *out_of_buffer) -{ - int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); - void *out; - int err; - - out = kvzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); - if (!err) - *out_of_buffer = MLX5_GET(query_q_counter_out, out, - out_of_buffer); - - kfree(out); - return err; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index e08627785590..bcdf7779c48d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -175,15 +175,20 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; - if (num_vfs && mlx5_lag_is_active(dev)) { - mlx5_core_warn(dev, "can't turn sriov on while LAG is active"); - return -EINVAL; + if (num_vfs) { + int ret; + + ret = mlx5_lag_forbid(dev); + if (ret && (ret != -ENODEV)) + return ret; } - if (num_vfs) + if (num_vfs) { err = mlx5_sriov_enable(pdev, num_vfs); - else + } else { mlx5_sriov_disable(pdev); + mlx5_lag_allow(dev); + } return err ? err : num_vfs; } diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig index 2b21af8a2b1d..186ebe783f97 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig @@ -3,5 +3,11 @@ # config MLXFW - tristate "mlxfw" if COMPILE_TEST + tristate "Mellanox Technologies firmware flash module" + ---help--- + This driver supports Mellanox Technologies Firmware + flashing common logic. + + To compile this driver as a module, choose M here: the + module will be called mlxfw. select XZ_DEC diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index beea4ba83495..9ca85383aa35 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -96,7 +96,16 @@ struct mlxfw_dev { u16 psid_size; }; +#if IS_ENABLED(CONFIG_MLXFW) int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, const struct firmware *firmware); +#else +static inline +int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, + const struct firmware *firmware) +{ + return -EOPNOTSUPP; +} +#endif #endif diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 628150d28061..993cb5ba934e 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -594,6 +594,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, if (memcmp(comp_data->buff, mlxfw_mfa2_comp_magic, mlxfw_mfa2_comp_magic_len) != 0) { pr_err("Component has wrong magic\n"); + err = -EINVAL; goto err_out; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 157b9b6f8485..1bd34d9a7b9e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5491,6 +5491,81 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MCIA - Management Cable Info Access + * ----------------------------------- + * MCIA register is used to access the SFP+ and QSFP connector's EPROM. + */ + +#define MLXSW_REG_MCIA_ID 0x9014 +#define MLXSW_REG_MCIA_LEN 0x40 + +MLXSW_REG_DEFINE(mcia, MLXSW_REG_MCIA_ID, MLXSW_REG_MCIA_LEN); + +/* reg_mcia_l + * Lock bit. Setting this bit will lock the access to the specific + * cable. Used for updating a full page in a cable EPROM. Any access + * other then subsequence writes will fail while the port is locked. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1); + +/* reg_mcia_module + * Module number. + * Access: Index + */ +MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8); + +/* reg_mcia_status + * Module status. + * Access: RO + */ +MLXSW_ITEM32(reg, mcia, status, 0x00, 0, 8); + +/* reg_mcia_i2c_device_address + * I2C device address. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, i2c_device_address, 0x04, 24, 8); + +/* reg_mcia_page_number + * Page number. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, page_number, 0x04, 16, 8); + +/* reg_mcia_device_address + * Device address. + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); + +/* reg_mcia_size + * Number of bytes to read/write (up to 48 bytes). + * Access: RW + */ +MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); + +#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48 + +/* reg_mcia_eeprom + * Bytes to read/write. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + +static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, + u8 page_number, u16 device_addr, + u8 size, u8 i2c_device_addr) +{ + MLXSW_REG_ZERO(mcia, payload); + mlxsw_reg_mcia_module_set(payload, module); + mlxsw_reg_mcia_l_set(payload, lock); + mlxsw_reg_mcia_page_number_set(payload, page_number); + mlxsw_reg_mcia_device_address_set(payload, device_addr); + mlxsw_reg_mcia_size_set(payload, size); + mlxsw_reg_mcia_i2c_device_address_set(payload, i2c_device_addr); +} + /* MPAT - Monitoring Port Analyzer Table * ------------------------------------- * MPAT Register is used to query and configure the Switch PortAnalyzer Table. @@ -6433,6 +6508,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfsl), MLXSW_REG(mtcap), MLXSW_REG(mtmp), + MLXSW_REG(mcia), MLXSW_REG(mpat), MLXSW_REG(mpar), MLXSW_REG(mlcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 0e51c3693243..60bf8f27cc00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2519,6 +2519,135 @@ out: return err; } +#define MLXSW_SP_QSFP_I2C_ADDR 0x50 + +static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, + u16 offset, u16 size, void *data, + unsigned int *p_read_size) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; + int status; + int err; + + size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, + 0, 0, offset, size, MLXSW_SP_QSFP_I2C_ADDR); + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + status = mlxsw_reg_mcia_status_get(mcia_pl); + if (status) + return -EIO; + + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + memcpy(data, eeprom_tmp, size); + *p_read_size = size; + + return 0; +} + +enum mlxsw_sp_eeprom_module_info_rev_id { + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, +}; + +enum mlxsw_sp_eeprom_module_info_id { + MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP = 0x03, + MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, + MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, + MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, +}; + +enum mlxsw_sp_eeprom_module_info { + MLXSW_SP_EEPROM_MODULE_INFO_ID, + MLXSW_SP_EEPROM_MODULE_INFO_REV_ID, + MLXSW_SP_EEPROM_MODULE_INFO_SIZE, +}; + +static int mlxsw_sp_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE]; + u8 module_rev_id, module_id; + unsigned int read_size; + int err; + + err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, + MLXSW_SP_EEPROM_MODULE_INFO_SIZE, + module_info, &read_size); + if (err) + return err; + + if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE) + return -EIO; + + module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID]; + module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID]; + + switch (module_id) { + case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS: + case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28: + if (module_id == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 || + module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + int offset = ee->offset; + unsigned int read_size; + int i = 0; + int err; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, offset, + ee->len - i, data + i, + &read_size); + if (err) { + netdev_err(mlxsw_sp_port->dev, "Eeprom query failed\n"); + return err; + } + + i += read_size; + offset += read_size; + } + + return 0; +} + static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, @@ -2531,6 +2660,8 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, .flash_device = mlxsw_sp_flash_device, + .get_module_info = mlxsw_sp_get_module_info, + .get_module_eeprom = mlxsw_sp_get_module_eeprom, }; static int |