diff options
-rw-r--r-- | drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 17 | ||||
-rw-r--r-- | drivers/net/dsa/microchip/ksz8.c | 3 | ||||
-rw-r--r-- | drivers/net/dsa/microchip/ksz8_reg.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/igb/igb_xsk.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 | ||||
-rw-r--r-- | drivers/net/phy/micrel.c | 2 | ||||
-rw-r--r-- | drivers/net/usb/usbnet.c | 11 | ||||
-rw-r--r-- | drivers/net/vrf.c | 2 | ||||
-rw-r--r-- | include/linux/usb/usbnet.h | 1 | ||||
-rw-r--r-- | net/core/neighbour.c | 88 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 24 | ||||
-rw-r--r-- | net/ipv6/route.c | 71 | ||||
-rwxr-xr-x | tools/testing/selftests/drivers/net/hw/tso.py | 99 |
18 files changed, 247 insertions, 121 deletions
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 4d85b29a17b7..ebefc274b50a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -49,7 +49,7 @@ struct __packed pcan_ufd_fw_info { __le32 ser_no; /* S/N */ __le32 flags; /* special functions */ - /* extended data when type == PCAN_USBFD_TYPE_EXT */ + /* extended data when type >= PCAN_USBFD_TYPE_EXT */ u8 cmd_out_ep; /* ep for cmd */ u8 cmd_in_ep; /* ep for replies */ u8 data_out_ep[2]; /* ep for CANx TX */ @@ -982,10 +982,11 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev) dev->can.ctrlmode |= CAN_CTRLMODE_FD_NON_ISO; } - /* if vendor rsp is of type 2, then it contains EP numbers to - * use for cmds pipes. If not, then default EP should be used. + /* if vendor rsp type is greater than or equal to 2, then it + * contains EP numbers to use for cmds pipes. If not, then + * default EP should be used. */ - if (fw_info->type != cpu_to_le16(PCAN_USBFD_TYPE_EXT)) { + if (le16_to_cpu(fw_info->type) < PCAN_USBFD_TYPE_EXT) { fw_info->cmd_out_ep = PCAN_USBPRO_EP_CMDOUT; fw_info->cmd_in_ep = PCAN_USBPRO_EP_CMDIN; } @@ -1018,11 +1019,11 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev) dev->can_channel_id = le32_to_cpu(pdev->usb_if->fw_info.dev_id[dev->ctrl_idx]); - /* if vendor rsp is of type 2, then it contains EP numbers to - * use for data pipes. If not, then statically defined EP are used - * (see peak_usb_create_dev()). + /* if vendor rsp type is greater than or equal to 2, then it contains EP + * numbers to use for data pipes. If not, then statically defined EP are + * used (see peak_usb_create_dev()). */ - if (fw_info->type == cpu_to_le16(PCAN_USBFD_TYPE_EXT)) { + if (le16_to_cpu(fw_info->type) >= PCAN_USBFD_TYPE_EXT) { dev->ep_msg_in = fw_info->data_in_ep; dev->ep_msg_out = fw_info->data_out_ep[dev->ctrl_idx]; } diff --git a/drivers/net/dsa/microchip/ksz8.c b/drivers/net/dsa/microchip/ksz8.c index c400e1c0369e..76e490070e9c 100644 --- a/drivers/net/dsa/microchip/ksz8.c +++ b/drivers/net/dsa/microchip/ksz8.c @@ -384,6 +384,9 @@ static void ksz8863_r_mib_pkt(struct ksz_device *dev, int port, u16 addr, addr -= dev->info->reg_mib_cnt; ctrl_addr = addr ? KSZ8863_MIB_PACKET_DROPPED_TX_0 : KSZ8863_MIB_PACKET_DROPPED_RX_0; + if (ksz_is_8895_family(dev) && + ctrl_addr == KSZ8863_MIB_PACKET_DROPPED_RX_0) + ctrl_addr = KSZ8895_MIB_PACKET_DROPPED_RX_0; ctrl_addr += port; ctrl_addr |= IND_ACC_TABLE(TABLE_MIB | TABLE_READ); diff --git a/drivers/net/dsa/microchip/ksz8_reg.h b/drivers/net/dsa/microchip/ksz8_reg.h index 491aa1e50175..332408567b47 100644 --- a/drivers/net/dsa/microchip/ksz8_reg.h +++ b/drivers/net/dsa/microchip/ksz8_reg.h @@ -833,7 +833,9 @@ #define KSZ8795_MIB_TOTAL_TX_1 0x105 #define KSZ8863_MIB_PACKET_DROPPED_TX_0 0x100 -#define KSZ8863_MIB_PACKET_DROPPED_RX_0 0x105 +#define KSZ8863_MIB_PACKET_DROPPED_RX_0 0x103 + +#define KSZ8895_MIB_PACKET_DROPPED_RX_0 0x105 #define MIB_PACKET_DROPPED 0x0000FFFF diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c index 5cf67ba29269..30ce5fbb5b77 100644 --- a/drivers/net/ethernet/intel/igb/igb_xsk.c +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c @@ -482,7 +482,7 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool) if (!nb_pkts) return true; - while (nb_pkts-- > 0) { + for (; i < nb_pkts; i++) { dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); @@ -512,7 +512,6 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool) total_bytes += descs[i].len; - i++; tx_ring->next_to_use++; tx_buffer_info->next_to_watch = tx_desc; if (tx_ring->next_to_use == tx_ring->count) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 60d24d8a2242..0dd3bc0f4caa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -726,6 +726,7 @@ struct mlx5e_rq { struct xsk_buff_pool *xsk_pool; struct work_struct recover_work; + struct work_struct rx_timeout_work; /* control */ struct mlx5_wq_ctrl wq_ctrl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 8e25f4ef5ccc..5ae787656a7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -331,6 +331,9 @@ static int port_set_buffer(struct mlx5e_priv *priv, if (err) goto out; + /* RO bits should be set to 0 on write */ + MLX5_SET(pbmc_reg, in, port_buffer_size, 0); + err = mlx5e_port_set_pbmc(mdev, in); out: kfree(in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index e75759533ae0..16c44d628eda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; + struct mlx5e_priv *priv; struct mlx5e_rq *rq; int err; rq = ctx; + priv = rq->priv; + + mutex_lock(&priv->state_lock); + eq = rq->cq.mcq.eq; err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); if (err && rq->icosq) clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); + mutex_unlock(&priv->state_lock); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 727fa7c18523..6056106edcc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -327,6 +327,10 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, if (unlikely(!sa_entry)) { rcu_read_unlock(); atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + /* Clear secpath to prevent invalid dereference + * in downstream XFRM policy checks. + */ + secpath_reset(skb); return; } xfrm_state_hold(sa_entry->x); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 33bdb7f1e03f..21bb88c5d3dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -676,6 +676,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_rq_cqe_err(rq); } +static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) +{ + struct mlx5e_rq *rq = container_of(timeout_work, + struct mlx5e_rq, + rx_timeout_work); + + /* Acquire netdev instance lock to synchronize with channel close and + * reopen flows. Either successfully obtain the lock, or detect that + * channels are closing for another reason, making this work no longer + * necessary. + */ + while (!netdev_trylock(rq->netdev)) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) + return; + msleep(20); + } + + mlx5e_reporter_rx_timeout(rq); + netdev_unlock(rq->netdev); +} + static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) { rq->wqe_overflow.page = alloc_page(GFP_KERNEL); @@ -876,6 +897,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); + INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); @@ -1261,7 +1283,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); - mlx5e_reporter_rx_timeout(rq); + queue_work(rq->priv->wq, &rq->rx_timeout_work); + return -ETIMEDOUT; } @@ -1432,6 +1455,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq) if (rq->dim) cancel_work_sync(&rq->dim->work); cancel_work_sync(&rq->recover_work); + cancel_work_sync(&rq->rx_timeout_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_free_rq(rq); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f350a6662880..f1abf4242cd2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2596,7 +2596,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) budget = min(budget, stmmac_tx_avail(priv, queue)); - while (budget-- > 0) { + for (; budget > 0; budget--) { struct stmmac_metadata_request meta_req; struct xsk_tx_metadata *meta = NULL; dma_addr_t dma_addr; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index f678c1bdacdf..605b0315b4cb 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -477,6 +477,8 @@ static const struct kszphy_type ksz8051_type = { static const struct kszphy_type ksz8081_type = { .led_mode_reg = MII_KSZPHY_CTRL_2, + .cable_diag_reg = KSZ8081_LMD, + .pair_mask = KSZPHY_WIRE_PAIR_MASK, .has_broadcast_disable = true, .has_nand_tree_disable = true, .has_rmii_ref_clk_sel = true, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index a8d50dd93d12..a38ffbf4b3f0 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1129,6 +1129,9 @@ static void __handle_link_change(struct usbnet *dev) * tx queue is stopped by netcore after link becomes off */ } else { + if (test_and_clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags)) + netif_carrier_on(dev->net); + /* submitting URBs for reading packets */ queue_work(system_bh_wq, &dev->bh_work); } @@ -2015,10 +2018,12 @@ EXPORT_SYMBOL(usbnet_manage_power); void usbnet_link_change(struct usbnet *dev, bool link, bool need_reset) { /* update link after link is reseted */ - if (link && !need_reset) - netif_carrier_on(dev->net); - else + if (link && !need_reset) { + set_bit(EVENT_LINK_CARRIER_ON, &dev->flags); + } else { + clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags); netif_carrier_off(dev->net); + } if (need_reset && link) usbnet_defer_kevent(dev, EVENT_LINK_RESET); diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9a4beea6ee0c..3ccd649913b5 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1302,6 +1302,8 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; + skb_dst_drop(skb); + rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 208682f77179..a2d54122823d 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -76,6 +76,7 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 +# define EVENT_LINK_CARRIER_ON 14 /* This one is special, as it indicates that the device is going away * there are cyclic dependencies between tasklet, timer and bh * that must be broken diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4316ca3d9872..bddfa389effa 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -379,6 +379,43 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, } } +static void neigh_flush_one(struct neighbour *n) +{ + hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); + + write_lock(&n->lock); + + neigh_del_timer(n); + neigh_mark_dead(n); + + if (refcount_read(&n->refcnt) != 1) { + /* The most unpleasant situation. + * We must destroy neighbour entry, + * but someone still uses it. + * + * The destroy will be delayed until + * the last user releases us, but + * we must kill timers etc. and move + * it to safe state. + */ + __skb_queue_purge(&n->arp_queue); + n->arp_queue_len_bytes = 0; + WRITE_ONCE(n->output, neigh_blackhole); + + if (n->nud_state & NUD_VALID) + n->nud_state = NUD_NOARP; + else + n->nud_state = NUD_NONE; + + neigh_dbg(2, "neigh %p is stray\n", n); + } + + write_unlock(&n->lock); + + neigh_cleanup_and_release(n); +} + static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { @@ -394,32 +431,24 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, n->flags & NTF_EXT_VALIDATED)) continue; - hlist_del_rcu(&n->hash); - hlist_del_rcu(&n->dev_list); - write_lock(&n->lock); - neigh_del_timer(n); - neigh_mark_dead(n); - if (refcount_read(&n->refcnt) != 1) { - /* The most unpleasant situation. - * We must destroy neighbour entry, - * but someone still uses it. - * - * The destroy will be delayed until - * the last user releases us, but - * we must kill timers etc. and move - * it to safe state. - */ - __skb_queue_purge(&n->arp_queue); - n->arp_queue_len_bytes = 0; - WRITE_ONCE(n->output, neigh_blackhole); - if (n->nud_state & NUD_VALID) - n->nud_state = NUD_NOARP; - else - n->nud_state = NUD_NONE; - neigh_dbg(2, "neigh %p is stray\n", n); - } - write_unlock(&n->lock); - neigh_cleanup_and_release(n); + neigh_flush_one(n); + } +} + +static void neigh_flush_table(struct neigh_table *tbl) +{ + struct neigh_hash_table *nht; + int i; + + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + + for (i = 0; i < (1 << nht->hash_shift); i++) { + struct hlist_node *tmp; + struct neighbour *n; + + neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) + neigh_flush_one(n); } } @@ -435,7 +464,12 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { write_lock_bh(&tbl->lock); - neigh_flush_dev(tbl, dev, skip_perm); + if (likely(dev)) { + neigh_flush_dev(tbl, dev, skip_perm); + } else { + DEBUG_NET_WARN_ON_ONCE(skip_perm); + neigh_flush_table(tbl); + } write_unlock_bh(&tbl->lock); pneigh_ifdown(tbl, dev, skip_perm); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7272d7e0fc36..02c16909f618 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -445,15 +445,17 @@ struct fib6_dump_arg { static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE; + unsigned int nsiblings; int err; if (!rt || rt == arg->net->ipv6.fib6_null_entry) return 0; - if (rt->fib6_nsiblings) + nsiblings = READ_ONCE(rt->fib6_nsiblings); + if (nsiblings) err = call_fib6_multipath_entry_notifier(arg->nb, fib_event, rt, - rt->fib6_nsiblings, + nsiblings, arg->extack); else err = call_fib6_entry_notifier(arg->nb, fib_event, rt, @@ -1126,7 +1128,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (rt6_duplicate_nexthop(iter, rt)) { if (rt->fib6_nsiblings) - rt->fib6_nsiblings = 0; + WRITE_ONCE(rt->fib6_nsiblings, 0); if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { @@ -1155,7 +1157,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) - rt->fib6_nsiblings++; + WRITE_ONCE(rt->fib6_nsiblings, + rt->fib6_nsiblings + 1); } if (iter->fib6_metric > rt->fib6_metric) @@ -1205,7 +1208,8 @@ next_iter: fib6_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, &rt->fib6_siblings, fib6_siblings) { - sibling->fib6_nsiblings++; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings + 1); BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); fib6_nsiblings++; } @@ -1252,8 +1256,9 @@ add: list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) - sibling->fib6_nsiblings--; - rt->fib6_nsiblings = 0; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings - 1); + WRITE_ONCE(rt->fib6_nsiblings, 0); list_del_rcu(&rt->fib6_siblings); rcu_read_lock(); rt6_multipath_rebalance(next_sibling); @@ -2002,8 +2007,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, notify_del = true; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) - sibling->fib6_nsiblings--; - rt->fib6_nsiblings = 0; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings - 1); + WRITE_ONCE(rt->fib6_nsiblings, 0); list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3fbe0885c21c..17c5b54ecad8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5347,7 +5347,8 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, */ rcu_read_lock(); - if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { + if ((nlflags & NLM_F_APPEND) && rt_last && + READ_ONCE(rt_last->fib6_nsiblings)) { rt = list_first_or_null_rcu(&rt_last->fib6_siblings, struct fib6_info, fib6_siblings); @@ -5671,32 +5672,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg) static size_t rt6_nlmsg_size(struct fib6_info *f6i) { + struct fib6_info *sibling; + struct fib6_nh *nh; int nexthop_len; if (f6i->nh) { nexthop_len = nla_total_size(4); /* RTA_NH_ID */ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); - } else { - struct fib6_nh *nh = f6i->fib6_nh; - struct fib6_info *sibling; - - nexthop_len = 0; - if (f6i->fib6_nsiblings) { - rt6_nh_nlmsg_size(nh, &nexthop_len); - - rcu_read_lock(); + goto common; + } - list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, - fib6_siblings) { - rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); - } + rcu_read_lock(); +retry: + nh = f6i->fib6_nh; + nexthop_len = 0; + if (READ_ONCE(f6i->fib6_nsiblings)) { + rt6_nh_nlmsg_size(nh, &nexthop_len); - rcu_read_unlock(); + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { + rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); + if (!READ_ONCE(f6i->fib6_nsiblings)) + goto retry; } - nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } - + rcu_read_unlock(); + nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); +common: return NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(16) /* RTA_SRC */ + nla_total_size(16) /* RTA_DST */ @@ -5857,7 +5860,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; - } else if (rt->fib6_nsiblings) { + } else if (READ_ONCE(rt->fib6_nsiblings)) { struct fib6_info *sibling; struct nlattr *mp; @@ -5959,16 +5962,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, if (f6i->fib6_nh->fib_nh_dev == dev) return true; - if (f6i->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + if (READ_ONCE(f6i->fib6_nsiblings)) { + const struct fib6_info *sibling; - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh->fib_nh_dev == dev) + rcu_read_lock(); + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { + if (sibling->fib6_nh->fib_nh_dev == dev) { + rcu_read_unlock(); return true; + } + if (!READ_ONCE(f6i->fib6_nsiblings)) + break; } + rcu_read_unlock(); } - return false; } @@ -6324,8 +6332,9 @@ errout: void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int nlm_flags) { - struct sk_buff *skb; struct net *net = info->nl_net; + struct sk_buff *skb; + size_t sz; u32 seq; int err; @@ -6333,17 +6342,21 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, seq = info->nlh ? info->nlh->nlmsg_seq : 0; rcu_read_lock(); - - skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); + sz = rt6_nlmsg_size(rt); +retry: + skb = nlmsg_new(sz, GFP_ATOMIC); if (!skb) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, event, info->portid, seq, nlm_flags); if (err < 0) { - /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); kfree_skb(skb); + /* -EMSGSIZE implies needed space grew under us. */ + if (err == -EMSGSIZE) { + sz = max(rt6_nlmsg_size(rt), sz << 1); + goto retry; + } goto errout; } diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 3500d8f1bac4..c13dd5efa27a 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info): remote_addr = cfg.remote_addr_v[outer_ipver] tun_type = tun_info[0] - tun_arg = tun_info[2] + tun_arg = tun_info[1] ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") defer(ip, f"link del {tun_type}-ksft") ip(f"link set dev {tun_type}-ksft up") @@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info): return remote_v4, remote_v6 +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): """Construct specific tests from the common template.""" def f(cfg): cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) if not cfg.have_stat_super_count and \ not cfg.have_stat_wire_count: raise KsftSkipEx(f"Device does not support LSO queue stats") + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + ipver = outer_ipver if tun: remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) @@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): remote_v4 = cfg.remote_addr_v["4"] remote_v6 = cfg.remote_addr_v["6"] - tun_partial = tun and tun[1] - # Tunnel which can silently fall back to gso-partial - has_gso_partial = tun and 'tx-gso-partial' in cfg.features - - # For TSO4 via partial we need mangleid - if ipver == "4" and feature in cfg.partial_features: - ksft_pr("Testing with mangleid enabled") - if 'tx-tcp-mangleid-segmentation' not in cfg.features: - ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") - defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") - # First test without the feature enabled. ethtool(f"-K {cfg.ifname} {feature} off") - if has_gso_partial: - ethtool(f"-K {cfg.ifname} tx-gso-partial off") run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) - # Now test with the feature enabled. - # For compatible tunnels only - just GSO partial, not specific feature. - if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: ethtool(f"-K {cfg.ifname} tx-gso-partial on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, - should_lso=tun_partial) + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") # Full feature enabled. - if feature in cfg.features: - ethtool(f"-K {cfg.ifname} {feature} on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) - else: - raise KsftXfailEx(f"Device does not support {feature}") + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver return f @@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None: cfg.have_stat_super_count = False cfg.have_stat_wire_count = False - cfg.features = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) - for f in features["active"]["bits"]["bit"]: - cfg.features.add(f["name"]) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") # Check which features are supported via GSO partial cfg.partial_features = set() - if 'tx-gso-partial' in cfg.features: + if 'tx-gso-partial' in cfg.hw_features: ethtool(f"-K {cfg.ifname} tx-gso-partial off") no_partial = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) for f in features["active"]["bits"]["bit"]: no_partial.add(f["name"]) - cfg.partial_features = cfg.features - no_partial + cfg.partial_features = cfg.hw_features - no_partial ethtool(f"-K {cfg.ifname} tx-gso-partial on") + restore_wanted_features(cfg) + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) if stats: if 'tx-hw-gso-packets' in stats[0]: @@ -211,13 +227,14 @@ def main() -> None: query_nic_features(cfg) test_info = ( - # name, v4/v6 ethtool_feature tun:(type, partial, args) - ("", "4", "tx-tcp-segmentation", None), - ("", "6", "tx-tcp6-segmentation", None), - ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), - ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), ) cases = [] @@ -227,11 +244,13 @@ def main() -> None: if info[1] and outer_ipver != info[1]: continue - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="4")) if info[3]: - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="6")) + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) ksft_run(cases=cases, args=(cfg, )) ksft_exit() |