From 6b8b42585886c59a008015083282aae434349094 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 22 Dec 2021 06:54:53 +0000 Subject: net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources The mlx5_get_uars_page() function returns error pointers. Using IS_ERR() to check the return value to fix this. Fixes: 4ec9e7b02697 ("net/mlx5: DR, Expose steering domain functionality") Signed-off-by: Miaoqian Lin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 8cbd36c82b3b..f6e6d9209766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ @@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } -- cgit From 624bf42c2e3930acca9fcfc340b2fa38e712da84 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 12 Dec 2021 16:19:58 +0200 Subject: net/mlx5: DR, Fix querying eswitch manager vport for ECPF On BlueField the E-Switch manager is the ECPF (vport 0xFFFE), but when querying capabilities of ECPF eswitch manager, need to query vport 0 with other_vport = 0. Fixes: 9091b821aaa4 ("net/mlx5: DR, Handle eswitch manager and uplink vports separately") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index f6e6d9209766..c54cc45f63dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -164,9 +164,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { - return dr_domain_query_vport(dmn, - dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, - false, + return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); } -- cgit From 26a7993c93a74a3fee83a37b46e00e69e49e57c2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 26 Oct 2021 08:25:19 +0300 Subject: net/mlx5: Use first online CPU instead of hard coded CPU Hard coded CPU (0 in our case) might be offline. Hence, use the first online CPU instead. Fixes: f891b7cdbdcd ("net/mlx5: Enable single IRQ for PCI Function") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..0e84c005d160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -398,7 +398,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, cpumask_copy(irq->mask, affinity); if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && cpumask_empty(irq->mask)) - cpumask_set_cpu(0, irq->mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask); irq_set_affinity_hint(irq->irqn, irq->mask); unlock: mutex_unlock(&pool->lock); -- cgit From aa968f922039706f6d13e8870b49e424d0a8d9ad Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 24 Nov 2021 23:10:57 +0200 Subject: net/mlx5: Fix error print in case of IRQ request failed In case IRQ layer failed to find or to request irq, the driver is printing the first cpu of the provided affinity as part of the error print. Empty affinity is a valid input for the IRQ layer, and it is an error to call cpumask_first() on empty affinity. Remove the first cpu print from the error message. Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 0e84c005d160..bcee30f5de0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -356,8 +356,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, new_irq = irq_pool_create_irq(pool, affinity); if (IS_ERR(new_irq)) { if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", - cpumask_first(affinity)); + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); mutex_unlock(&pool->lock); return new_irq; } -- cgit From 33de865f7bce3968676e43b0182af0a2dd359dae Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 23 Nov 2021 20:08:13 +0200 Subject: net/mlx5: Fix SF health recovery flow SF do not directly control the PCI device. During recovery flow SF should not be allowed to do pci disable or pci reset, its PF will do it. It fixes the following kernel trace: mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:387:(pid 40948): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab175 after 1 iterations mlx5_core.sf mlx5_core.sf.25: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.23: mlx5_health_try_recover:387:(pid 40946): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab193 after 1 iterations mlx5_core.sf mlx5_core.sf.23: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.25: mlx5_cmd_check:813:(pid 40948): ENABLE_HCA(0x104) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x658908) mlx5_core.sf mlx5_core.sf.25: mlx5_function_setup:1292:(pid 40948): enable hca failed mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:389:(pid 40948): health recovery failed Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7df9c7f8d9c8..65083496f913 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1809,12 +1809,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) int mlx5_recover_device(struct mlx5_core_dev *dev) { - int ret = -EIO; + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } - mlx5_pci_disable_device(dev); - if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - ret = mlx5_load_one(dev); - return ret; + return mlx5_load_one(dev); } static struct pci_driver mlx5_core_driver = { -- cgit From d671e109bd8548d067b27e39e183a484430bf102 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 14 Dec 2021 03:52:53 +0200 Subject: net/mlx5: Fix tc max supported prio for nic mode Only prio 1 is supported if firmware doesn't support ignore flow level for nic mode. The offending commit removed the check wrongly. Add it back. Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfd..d5e47630e284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; -- cgit From 918fc3855a6507a200e9cf22c20be852c0982687 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 30 Nov 2021 16:05:44 +0200 Subject: net/mlx5e: Wrap the tx reporter dump callback to extract the sq Function mlx5e_tx_reporter_dump_sq() casts its void * argument to struct mlx5e_txqsq *, but in TX-timeout-recovery flow the argument is actually of type struct mlx5e_tx_timeout_ctx *. mlx5_core 0000:08:00.1 enp8s0f1: TX timeout detected mlx5_core 0000:08:00.1 enp8s0f1: TX timeout on queue: 1, SQ: 0x11ec, CQ: 0x146d, SQ Cons: 0x0 SQ Prod: 0x1, usecs since last trans: 21565000 BUG: stack guard page was hit at 0000000093f1a2de (stack is 00000000b66ea0dc..000000004d932dae) kernel stack overflow (page fault): 0000 [#1] SMP NOPTI CPU: 5 PID: 95 Comm: kworker/u20:1 Tainted: G W OE 5.13.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 [mlx5_core] Call Trace: mlx5e_tx_reporter_dump+0x43/0x1c0 [mlx5_core] devlink_health_do_dump.part.91+0x71/0xd0 devlink_health_report+0x157/0x1b0 mlx5e_reporter_tx_timeout+0xb9/0xf0 [mlx5_core] ? mlx5e_tx_reporter_err_cqe_recover+0x1d0/0x1d0 [mlx5_core] ? mlx5e_health_queue_dump+0xd0/0xd0 [mlx5_core] ? update_load_avg+0x19b/0x550 ? set_next_entity+0x72/0x80 ? pick_next_task_fair+0x227/0x340 ? finish_task_switch+0xa2/0x280 mlx5e_tx_timeout_work+0x83/0xb0 [mlx5_core] process_one_work+0x1de/0x3a0 worker_thread+0x2d/0x3c0 ? process_one_work+0x3a0/0x3a0 kthread+0x115/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 --[ end trace 51ccabea504edaff ]--- RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled end Kernel panic - not syncing: Fatal exception To fix this bug add a wrapper for mlx5e_tx_reporter_dump_sq() which extracts the sq from struct mlx5e_tx_timeout_ctx and set it as the TX-timeout-recovery flow dump callback. Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") Signed-off-by: Aya Levin Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 4f4bc8726ec4..614cd9477600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -561,7 +569,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, -- cgit From a0cb909644c36230a3c48904d14b91732de79fc0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Dec 2021 11:05:11 +0200 Subject: net/mlx5e: Fix skb memory leak when TC classifier action offloads are disabled When TC classifier action offloads are disabled (CONFIG_MLX5_CLS_ACT in Kconfig), the mlx5e_rep_tc_receive() function which is responsible for passing the skb to the stack (or freeing it) is defined as a nop, and results in leaking the skb memory. Replace the nop with a call to napi_gro_receive() to resolve the leak. Fixes: 28e7606fa8f1 ("net/mlx5e: Refactor rx handler of represetor device") Signed-off-by: Gal Pressman Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d6c7c81690eb..7c9dd3a75f8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) {} + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } #endif /* CONFIG_MLX5_CLS_ACT */ -- cgit From 17958d7cd731b977ae7d4af38d891c3a1235b5f1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Oct 2021 19:40:09 +0300 Subject: net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow Both regular RQ and XSKRQ use the same ICOSQ for UMRs. When doing recovery for the ICOSQ, don't forget to deactivate XSKRQ. XSK can be opened and closed while channels are active, so a new mutex prevents the ICOSQ recovery from running at the same time. The ICOSQ recovery deactivates and reactivates XSKRQ, so any parallel change in XSK state would break consistency. As the regular RQ is running, it's not enough to just flush the recovery work, because it can be rescheduled. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/en/health.h | 2 ++ .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 35 +++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 16 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++-- 5 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f0ac6b0d9653..f42067adc79d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -783,6 +783,8 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; }; struct mlx5e_ptp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d5b7110a4265..0107e4e73bb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 74086eb556ae..2684e9da9f41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) { + struct mlx5e_rq *xskrq = NULL; struct mlx5_core_dev *mdev; struct mlx5e_icosq *icosq; struct net_device *dev; @@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) int err; icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; mdev = icosq->channel->mdev; dev = icosq->channel->netdev; err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); @@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; @@ -97,15 +107,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); - mlx5e_activate_rq(rq); + mlx5e_activate_rq(rq); rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; out: clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); return err; } @@ -706,6 +729,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..8526a5fbbf0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,6 +4,7 @@ #include "setup.h" #include "en/params.h" #include "en/txrx.h" +#include "en/health.h" /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may * change unexpectedly, and mlx5e has a minimum valid stride size for striding @@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + /* TX queue is created active. */ spin_lock_bh(&c->async_icosq_lock); @@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { - mlx5e_deactivate_rq(&c->xskrq); + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + /* TX queue is disabled on close. */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..a572fc9690ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1087,8 +1087,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - if (rq->icosq) - cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -2088,6 +2086,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_xdpsq_cq; + mutex_init(&c->icosq_recovery_lock); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_close_async_icosq; @@ -2156,9 +2156,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); -- cgit From 19c4aba2d4e23997061fb11aed8a3e41334bfa14 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 22 Jul 2020 16:32:44 +0300 Subject: net/mlx5e: Fix ICOSQ recovery flow for XSK There are two ICOSQs per channel: one is needed for RX, and the other for async operations (XSK TX, kTLS offload). Currently, the recovery flow for both is the same, and async ICOSQ is mistakenly treated like the regular ICOSQ. This patch prevents running the regular ICOSQ recovery on async ICOSQ. The purpose of async ICOSQ is to handle XSK wakeup requests and post kTLS offload RX parameters, it has nothing to do with RQ and XSKRQ UMRs, so the regular recovery sequence is not applicable here. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 30 +++++++++++++++++------ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f42067adc79d..b47a0d3ef22f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1016,9 +1016,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a572fc9690ed..3b0f3a831216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1214,9 +1214,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1237,7 +1248,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1573,13 +1584,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1618,7 +1630,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -2082,13 +2094,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_close_xdpsq_cq; mutex_init(&c->icosq_recovery_lock); - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; -- cgit From 2820110d945923ab2f4901753e4ccbb2a506fa8e Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 2 Dec 2021 11:18:02 +0800 Subject: net/mlx5e: Delete forward rule for ct or sample action When there is ct or sample action, the ct or sample rule will be deleted and return. But if there is an extra mirror action, the forward rule can't be deleted because of the return. Fix it by removing the return. Fixes: 69e2916ebce4 ("net/mlx5: CT: Add support for mirroring") Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d45f4ae80c0..f633448c3cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1196,21 +1196,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) goto offload_rule_0; - if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - return; - } - - if (flow_flag_test(flow, SAMPLE)) { - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - return; - } - if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else if (flow_flag_test(flow, SAMPLE)) + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + else offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } struct mlx5_flow_handle * -- cgit From 4390c6edc0fb390e699d0f886f45575dfeafeb4b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Nov 2021 18:08:11 +0100 Subject: net/mlx5: Fix some error handling paths in 'mlx5e_tc_add_fdb_flow()' All the error handling paths of 'mlx5e_tc_add_fdb_flow()' end to 'err_out' where 'flow_flag_set(flow, FAILED);' is called. All but the new error handling paths added by the commits given in the Fixes tag below. Fix these error handling paths and branch to 'err_out'. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Christophe JAILLET Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed (cherry picked from commit 31108d142f3632970f6f3e0224bd1c6781c9f87d) --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f633448c3cc7..a60c7680fd2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1440,7 +1440,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, metadata); if (err) - return err; + goto err_out; } } @@ -1456,13 +1456,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule is only supported on chain 0"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } if (attr->dest_chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule offload doesn't support goto action"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), @@ -1470,8 +1472,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow_flag_test(flow, EGRESS) ? MLX5E_TC_INT_PORT_EGRESS : MLX5E_TC_INT_PORT_INGRESS); - if (IS_ERR(int_port)) - return PTR_ERR(int_port); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } esw_attr->int_port = int_port; } -- cgit From 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:40 +0000 Subject: udp: using datalen to cap ipv6 udp max gso segments The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, this is checked in udp_send_skb(). skb->len contains network and transport header len here, we should use only data len instead. This is the ipv6 counterpart to the below referenced commit, which missed the ipv6 change Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a2caca6ccf11..8cde9efd7919 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } -- cgit From 5471d5226c3b39b3d2f7011c082d5715795bd65c Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:41 +0000 Subject: selftests: Calculate udpgso segment count without header adjustment The below referenced commit correctly updated the computation of number of segments (gso_size) by using only the gso payload size and removing the header lengths. With this change the regression test started failing. Update the tests to match this new behavior. Both IPv4 and IPv6 tests are updated, as a separate patch in this series will update udp_v6_send_skb to match this change in udp_send_skb. Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-2-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8..7badaf215de2 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, -- cgit From b45396afa4177f2b1ddfeff7185da733fade1dc3 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 24 Dec 2021 02:14:59 +0000 Subject: net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register The fixed_phy_get_gpiod function() returns NULL, it doesn't return error pointers, using NULL checking to fix this.i Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211224021500.10362-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c65fb5f5d2dc..a0c256bd5441 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, /* Check if we have a GPIO associated with this fixed phy */ if (!gpiod) { gpiod = fixed_phy_get_gpiod(np); - if (IS_ERR(gpiod)) - return ERR_CAST(gpiod); + if (!gpiod) + return ERR_PTR(-EINVAL); } /* Get the next available PHY address, up to PHY_MAX_ADDR */ -- cgit From 5ec7d18d1813a5bead0b495045606c93873aecbb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 23 Dec 2021 13:04:30 -0500 Subject: sctp: use call_rcu to free endpoint This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 +++--- include/net/sctp/structs.h | 3 ++- net/sctp/diag.c | 12 ++++++------ net/sctp/endpointola.c | 23 +++++++++++++++-------- net/sctp/socket.c | 23 +++++++++++++++-------- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..d314a180ab93 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..8dabd8800006 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,6 +1355,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1370,7 +1371,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 760b367644c1..a7d623171501 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -290,9 +290,8 @@ out: return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -344,9 +345,8 @@ release: return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -505,8 +505,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 48c9c2c7602f..efffde7f2328 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33391254fa82..ad5028a07b18 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5351,26 +5352,32 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) -- cgit From e6007b85dfa284c4726c249e3c2fc4181ca8e179 Mon Sep 17 00:00:00 2001 From: Ma Xinjian Date: Fri, 24 Dec 2021 17:59:28 +0800 Subject: selftests: mptcp: Remove the deprecated config NFT_COUNTER NFT_COUNTER was removed since 390ad4295aa ("netfilter: nf_tables: make counter support built-in") LKP/0Day will check if all configs listing under selftests are able to be enabled properly. For the missing configs, it will report something like: LKP WARN miss config CONFIG_NFT_COUNTER= of net/mptcp/config - it's not reasonable to keep the deprecated configs. - configs under kselftests are recommended by corresponding tests. So if some configs are missing, it will impact the testing results Reported-by: kernel test robot Signed-off-by: Ma Xinjian Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 0faaccd21447..2b82628decb1 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -9,7 +9,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m -- cgit From c1833c3964d5bd8c163bd4e01736a38bc473cb8a Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 23 Dec 2021 12:33:16 -0500 Subject: ip6_vti: initialize __ip6_tnl_parm struct in vti6_siocdevprivate The "__ip6_tnl_parm" struct was left uninitialized causing an invalid load of random data when the "__ip6_tnl_parm" struct was used elsewhere. As an example, in the function "ip6_tnl_xmit_ctl()", it tries to access the "collect_md" member. With "__ip6_tnl_parm" being uninitialized and containing random data, the UBSAN detected that "collect_md" held a non-boolean value. The UBSAN issue is as follows: =============================================================== UBSAN: invalid-load in net/ipv6/ip6_tunnel.c:1025:14 load of value 30 is not a valid value for type '_Bool' CPU: 1 PID: 228 Comm: kworker/1:3 Not tainted 5.16.0-rc4+ #8 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work Call Trace: dump_stack_lvl+0x44/0x57 ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value+0x66/0x70 ? __cpuhp_setup_state+0x1d3/0x210 ip6_tnl_xmit_ctl.cold.52+0x2c/0x6f [ip6_tunnel] vti6_tnl_xmit+0x79c/0x1e96 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? vti6_rcv+0x100/0x100 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? rcu_read_lock_bh_held+0xc0/0xc0 ? lock_acquired+0x262/0xb10 dev_hard_start_xmit+0x1e6/0x820 __dev_queue_xmit+0x2079/0x3340 ? mark_lock.part.52+0xf7/0x1050 ? netdev_core_pick_tx+0x290/0x290 ? kvm_clock_read+0x14/0x30 ? kvm_sched_clock_read+0x5/0x10 ? sched_clock_cpu+0x15/0x200 ? find_held_lock+0x3a/0x1c0 ? lock_release+0x42f/0xc90 ? lock_downgrade+0x6b0/0x6b0 ? mark_held_locks+0xb7/0x120 ? neigh_connected_output+0x31f/0x470 ? lockdep_hardirqs_on+0x79/0x100 ? neigh_connected_output+0x31f/0x470 ? ip6_finish_output2+0x9b0/0x1d90 ? rcu_read_lock_bh_held+0x62/0xc0 ? ip6_finish_output2+0x9b0/0x1d90 ip6_finish_output2+0x9b0/0x1d90 ? ip6_append_data+0x330/0x330 ? ip6_mtu+0x166/0x370 ? __ip6_finish_output+0x1ad/0xfb0 ? nf_hook_slow+0xa6/0x170 ip6_output+0x1fb/0x710 ? nf_hook.constprop.32+0x317/0x430 ? ip6_finish_output+0x180/0x180 ? __ip6_finish_output+0xfb0/0xfb0 ? lock_is_held_type+0xd9/0x130 ndisc_send_skb+0xb33/0x1590 ? __sk_mem_raise_allocated+0x11cf/0x1560 ? dst_output+0x4a0/0x4a0 ? ndisc_send_rs+0x432/0x610 addrconf_dad_completed+0x30c/0xbb0 ? addrconf_rs_timer+0x650/0x650 ? addrconf_dad_work+0x73c/0x10e0 addrconf_dad_work+0x73c/0x10e0 ? addrconf_dad_completed+0xbb0/0xbb0 ? rcu_read_lock_sched_held+0xaf/0xe0 ? rcu_read_lock_bh_held+0xc0/0xc0 process_one_work+0x97b/0x1740 ? pwq_dec_nr_in_flight+0x270/0x270 worker_thread+0x87/0xbf0 ? process_one_work+0x1740/0x1740 kthread+0x3ac/0x490 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x22/0x30 =============================================================== The solution is to initialize "__ip6_tnl_parm" struct to zeros in the "vti6_siocdevprivate()" function. Signed-off-by: William Zhao Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 527e9ead7449..5e9474bc54fc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -808,6 +808,8 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { -- cgit From 6d7373dabfd3933ee30c40fc8c09d2a788f6ece1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 27 Dec 2021 14:35:30 +0100 Subject: net/smc: fix using of uninitialized completions In smc_wr_tx_send_wait() the completion on index specified by pend->idx is initialized and after smc_wr_tx_send() was called the wait for completion starts. pend->idx is used to get the correct index for the wait, but the pend structure could already be cleared in smc_wr_tx_process_cqe(). Introduce pnd_idx to hold and use a local copy of the correct index. Fixes: 09c61d24f96d ("net/smc: wait for departure of an IB message") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 600ab5889227..79a7431f534e 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -358,18 +358,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout) { struct smc_wr_tx_pend *pend; + u32 pnd_idx; int rc; pend = container_of(priv, struct smc_wr_tx_pend, priv); pend->compl_requested = 1; - init_completion(&link->wr_tx_compl[pend->idx]); + pnd_idx = pend->idx; + init_completion(&link->wr_tx_compl[pnd_idx]); rc = smc_wr_tx_send(link, priv); if (rc) return rc; /* wait for completion by smc_wr_tx_process_cqe() */ rc = wait_for_completion_interruptible_timeout( - &link->wr_tx_compl[pend->idx], timeout); + &link->wr_tx_compl[pnd_idx], timeout); if (rc <= 0) rc = -ENODATA; if (rc > 0) -- cgit From 6c25449e1a32c594d743df8e8258e8ef870b6a77 Mon Sep 17 00:00:00 2001 From: yangxingwu Date: Mon, 27 Dec 2021 16:29:51 +0800 Subject: net: udp: fix alignment problem in udp4_seq_show() $ cat /pro/net/udp before: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 after: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 Signed-off-by: yangxingwu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 15c6b450b8db..0cd6b857e7ec 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3075,7 +3075,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { -- cgit From 5f50153288452e10b6edd69ec9112c49442b054a Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sun, 26 Dec 2021 21:32:45 -0500 Subject: atlantic: Fix buff_ring OOB in aq_ring_rx_clean The function obtain the next buffer without boundary check. We should return with I/O error code. The bug is found by fuzzing and the crash report is attached. It is an OOB bug although reported as use-after-free. [ 4.804724] BUG: KASAN: use-after-free in aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.805661] Read of size 4 at addr ffff888034fe93a8 by task ksoftirqd/0/9 [ 4.806505] [ 4.806703] CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G W 5.6.0 #34 [ 4.809030] Call Trace: [ 4.809343] dump_stack+0x76/0xa0 [ 4.809755] print_address_description.constprop.0+0x16/0x200 [ 4.810455] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.811234] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.813183] __kasan_report.cold+0x37/0x7c [ 4.813715] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.814393] kasan_report+0xe/0x20 [ 4.814837] aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.815499] ? hw_atl_b0_hw_ring_rx_receive+0x9a5/0xb90 [atlantic] [ 4.816290] aq_vec_poll+0x179/0x5d0 [atlantic] [ 4.816870] ? _GLOBAL__sub_I_65535_1_aq_pci_func_init+0x20/0x20 [atlantic] [ 4.817746] ? __next_timer_interrupt+0xba/0xf0 [ 4.818322] net_rx_action+0x363/0xbd0 [ 4.818803] ? call_timer_fn+0x240/0x240 [ 4.819302] ? __switch_to_asm+0x40/0x70 [ 4.819809] ? napi_busy_loop+0x520/0x520 [ 4.820324] __do_softirq+0x18c/0x634 [ 4.820797] ? takeover_tasklets+0x5f0/0x5f0 [ 4.821343] run_ksoftirqd+0x15/0x20 [ 4.821804] smpboot_thread_fn+0x2f1/0x6b0 [ 4.822331] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.823041] ? __kthread_parkme+0x80/0x100 [ 4.823571] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.824301] kthread+0x2b5/0x3b0 [ 4.824723] ? kthread_create_on_node+0xd0/0xd0 [ 4.825304] ret_from_fork+0x35/0x40 Signed-off-by: Zekun Shen Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 81b3756417ec..77e76c9efd32 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -366,6 +366,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -389,6 +393,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; -- cgit From ca506fca461b260ab32952b610c3d4aadc6c11fd Mon Sep 17 00:00:00 2001 From: Matthias-Christian Ott Date: Sun, 26 Dec 2021 23:12:08 +0100 Subject: net: usb: pegasus: Do not drop long Ethernet frames The D-Link DSB-650TX (2001:4002) is unable to receive Ethernet frames that are longer than 1518 octets, for example, Ethernet frames that contain 802.1Q VLAN tags. The frames are sent to the pegasus driver via USB but the driver discards them because they have the Long_pkt field set to 1 in the received status report. The function read_bulk_callback of the pegasus driver treats such received "packets" (in the terminology of the hardware) as errors but the field simply does just indicate that the Ethernet frame (MAC destination to FCS) is longer than 1518 octets. It seems that in the 1990s there was a distinction between "giant" (> 1518) and "runt" (< 64) frames and the hardware includes flags to indicate this distinction. It seems that the purpose of the distinction "giant" frames was to not allow infinitely long frames due to transmission errors and to allow hardware to have an upper limit of the frame size. However, the hardware already has such limit with its 2048 octet receive buffer and, therefore, Long_pkt is merely a convention and should not be treated as a receive error. Actually, the hardware is even able to receive Ethernet frames with 2048 octets which exceeds the claimed limit frame size limit of the driver of 1536 octets (PEGASUS_MTU). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Matthias-Christian Ott Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index c4cd40b090fd..feb247e355f7 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb) goto goon; rx_status = buf[count - 2]; - if (rx_status & 0x1e) { + if (rx_status & 0x1c) { netif_dbg(pegasus, rx_err, net, "RX packet error %x\n", rx_status); net->stats.rx_errors++; - if (rx_status & 0x06) /* long or runt */ + if (rx_status & 0x04) /* runt */ net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; -- cgit From 7175f02c4e5f5a9430113ab9ca0fd0ce98b28a51 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 26 Dec 2021 16:01:27 +0300 Subject: uapi: fix linux/nfc.h userspace compilation errors Replace sa_family_t with __kernel_sa_family_t to fix the following linux/nfc.h userspace compilation errors: /usr/include/linux/nfc.h:266:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; /usr/include/linux/nfc.h:274:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol") Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Dmitry V. Levin Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c744..aadad43d943a 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,7 +271,7 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; -- cgit From 79b69a83705e621b258ac6d8ae6d3bfdb4b930aa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Dec 2021 13:03:47 +0100 Subject: nfc: uapi: use kernel size_t to fix user-space builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix user-space builds if it includes /usr/include/linux/nfc.h before some of other headers: /usr/include/linux/nfc.h:281:9: error: unknown type name ‘size_t’ 281 | size_t service_name_len; | ^~~~~~ Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index aadad43d943a..4fa4e979e948 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -278,7 +278,7 @@ struct sockaddr_nfc_llcp { __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ -- cgit From 8b5fdfc57cc2471179d1c51081424ded833c16c8 Mon Sep 17 00:00:00 2001 From: wolfgang huang Date: Tue, 28 Dec 2021 16:01:20 +0800 Subject: mISDN: change function names to avoid conflicts As we build for mips, we meet following error. l1_init error with multiple definition. Some architecture devices usually marked with l1, l2, lxx as the start-up phase. so we change the mISDN function names, align with Isdnl2_xxx. mips-linux-gnu-ld: drivers/isdn/mISDN/layer1.o: in function `l1_init': (.text+0x890): multiple definition of `l1_init'; \ arch/mips/kernel/bmips_5xxx_init.o:(.text+0xf0): first defined here make[1]: *** [home/mips/kernel-build/linux/Makefile:1161: vmlinux] Error 1 Signed-off-by: wolfgang huang Reported-by: k2ci Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 6 +++--- drivers/isdn/mISDN/core.h | 4 ++-- drivers/isdn/mISDN/layer1.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 55891e420446..a41b4b264594 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -381,7 +381,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; - err = l1_init(&debug); + err = Isdnl1_Init(&debug); if (err) goto error3; err = Isdnl2_Init(&debug); @@ -395,7 +395,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: - l1_cleanup(); + Isdnl1_cleanup(); error3: mISDN_timer_cleanup(); error2: @@ -408,7 +408,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); - l1_cleanup(); + Isdnl1_cleanup(); mISDN_timer_cleanup(); class_unregister(&mISDN_class); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 23b44d303327..42599f49c189 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); -extern int l1_init(u_int *); -extern void l1_cleanup(void); +extern int Isdnl1_Init(u_int *); +extern void Isdnl1_cleanup(void); extern int Isdnl2_Init(u_int *); extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index 98a3bc6c1700..7b31c25a550e 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) { EXPORT_SYMBOL(create_l1); int -l1_init(u_int *deb) +Isdnl1_Init(u_int *deb) { debug = deb; l1fsm_s.state_count = L1S_STATE_COUNT; @@ -409,7 +409,7 @@ l1_init(u_int *deb) } void -l1_cleanup(void) +Isdnl1_cleanup(void) { mISDN_FsmFree(&l1fsm_s); } -- cgit From 1cd5384c88af5b59bf9f3b6c1a151bc14b88c2cd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 18:51:44 +0100 Subject: net: ag71xx: Fix a potential double free in error handling paths 'ndev' is a managed resource allocated with devm_alloc_etherdev(), so there is no need to call free_netdev() explicitly or there will be a double free(). Simplify all error handling paths accordingly. Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 88d2ab748399..4579ddf9c427 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1913,15 +1913,12 @@ static int ag71xx_probe(struct platform_device *pdev) ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); if (IS_ERR(ag->mac_reset)) { netif_err(ag, probe, ndev, "missing mac reset\n"); - err = PTR_ERR(ag->mac_reset); - goto err_free; + return PTR_ERR(ag->mac_reset); } ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!ag->mac_base) { - err = -ENOMEM; - goto err_free; - } + if (!ag->mac_base) + return -ENOMEM; ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, @@ -1929,7 +1926,7 @@ static int ag71xx_probe(struct platform_device *pdev) if (err) { netif_err(ag, probe, ndev, "unable to request IRQ %d\n", ndev->irq); - goto err_free; + return err; } ndev->netdev_ops = &ag71xx_netdev_ops; @@ -1957,10 +1954,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); - if (!ag->stop_desc) { - err = -ENOMEM; - goto err_free; - } + if (!ag->stop_desc) + return -ENOMEM; ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; @@ -1975,7 +1970,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_phy_mode(np, &ag->phy_if_mode); if (err) { netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); - goto err_free; + return err; } netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); @@ -1983,7 +1978,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = clk_prepare_enable(ag->clk_eth); if (err) { netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); - goto err_free; + return err; } ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); @@ -2019,8 +2014,6 @@ err_mdio_remove: ag71xx_mdio_remove(ag); err_put_clk: clk_disable_unprepare(ag->clk_eth); -err_free: - free_netdev(ndev); return err; } -- cgit From 5be60a945329d82f06fc755a43eeefbfc5f77d72 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 27 Dec 2021 17:22:03 +0100 Subject: net: lantiq_xrx200: fix statistics of received bytes Received frames have FCS truncated. There is no need to subtract FCS length from the statistics. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 96bd6f2b21ed..80bfaf2fec92 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -224,7 +224,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) skb->protocol = eth_type_trans(skb, net_dev); netif_receive_skb(skb); net_dev->stats.rx_packets++; - net_dev->stats.rx_bytes += len - ETH_FCS_LEN; + net_dev->stats.rx_bytes += len; return 0; } -- cgit From 1b9dadba502234eea7244879b8d5d126bfaf9f0c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Dec 2021 12:48:11 +0000 Subject: NFC: st21nfca: Fix memory leak in device probe and remove 'phy->pending_skb' is alloced when device probe, but forgot to free in the error handling path and remove path, this cause memory leak as follows: unreferenced object 0xffff88800bc06800 (size 512): comm "8", pid 11775, jiffies 4295159829 (age 9.032s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d66c09ce>] __kmalloc_node_track_caller+0x1ed/0x450 [<00000000c93382b3>] kmalloc_reserve+0x37/0xd0 [<000000005fea522c>] __alloc_skb+0x124/0x380 [<0000000019f29f9a>] st21nfca_hci_i2c_probe+0x170/0x8f2 Fix it by freeing 'pending_skb' in error and remove. Fixes: 68957303f44a ("NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/nfc/st21nfca/i2c.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index f126ce96a7df..35b32fb90906 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -524,7 +524,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(phy->gpiod_ena)) { nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); + r = PTR_ERR(phy->gpiod_ena); + goto out_free; } phy->se_status.is_ese_present = @@ -535,7 +536,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, r = st21nfca_hci_platform_init(phy); if (r < 0) { nfc_err(&client->dev, "Unable to reboot st21nfca\n"); - return r; + goto out_free; } r = devm_request_threaded_irq(&client->dev, client->irq, NULL, @@ -544,15 +545,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, ST21NFCA_HCI_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); - return r; + goto out_free; } - return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, - ST21NFCA_FRAME_HEADROOM, - ST21NFCA_FRAME_TAILROOM, - ST21NFCA_HCI_LLC_MAX_PAYLOAD, - &phy->hdev, - &phy->se_status); + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, + ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, + &phy->hdev, + &phy->se_status); + if (r) + goto out_free; + + return 0; + +out_free: + kfree_skb(phy->pending_skb); + return r; } static int st21nfca_hci_i2c_remove(struct i2c_client *client) @@ -563,6 +572,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) if (phy->powered) st21nfca_hci_i2c_disable(phy); + if (phy->pending_skb) + kfree_skb(phy->pending_skb); return 0; } -- cgit From 90cee52f2e780345d3629e278291aea5ac74f40f Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:24 +0800 Subject: net/smc: don't send CDC/LLC message if link not ready We found smc_llc_send_link_delete_all() sometimes wait for 2s timeout when testing with RDMA link up/down. It is possible when a smc_link is in ACTIVATING state, the underlaying QP is still in RESET or RTR state, which cannot send any messages out. smc_llc_send_link_delete_all() use smc_link_usable() to checks whether the link is usable, if the QP is still in RESET or RTR state, but the smc_link is in ACTIVATING, this LLC message will always fail without any CQE entering the CQ, and we will always wait 2s before timeout. Since we cannot send any messages through the QP before the QP enter RTS. I add a wrapper smc_link_sendable() which checks the state of QP along with the link state. And replace smc_link_usable() with smc_link_sendable() in all LLC & CDC message sending routine. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- net/smc/smc_core.h | 6 ++++++ net/smc/smc_llc.c | 2 +- net/smc/smc_wr.c | 4 ++-- net/smc/smc_wr.h | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 387d28b2f8dd..55ca175e8d57 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -647,7 +647,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (smc_link_usable(lnk)) + if (smc_link_sendable(lnk)) lnk->state = SMC_LNK_INACTIVE; } wake_up_all(&lgr->llc_msg_waiter); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 59cef3b830d8..d63b08274197 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -415,6 +415,12 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_sendable(struct smc_link *lnk) +{ + return smc_link_usable(lnk) && + lnk->qp_attr.cur_qp_state == IB_QPS_RTS; +} + static inline bool smc_link_active(struct smc_link *lnk) { return lnk->state == SMC_LNK_ACTIVE; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b102680296b8..3e9fd8a3124c 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1630,7 +1630,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 79a7431f534e..df1dc225cbab 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -188,7 +188,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) @@ -231,7 +231,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - !smc_link_usable(link) || + !smc_link_sendable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f353311e6f84..48ed9b08ac7a 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) static inline bool smc_wr_tx_link_hold(struct smc_link *link) { - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return false; atomic_inc(&link->wr_tx_refcnt); return true; -- cgit From 349d43127dac00c15231e8ffbcaabd70f7b0e544 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:25 +0800 Subject: net/smc: fix kernel panic caused by race of smc_sock A crash occurs when smc_cdc_tx_handler() tries to access smc_sock but smc_release() has already freed it. [ 4570.695099] BUG: unable to handle page fault for address: 000000002eae9e88 [ 4570.696048] #PF: supervisor write access in kernel mode [ 4570.696728] #PF: error_code(0x0002) - not-present page [ 4570.697401] PGD 0 P4D 0 [ 4570.697716] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 4570.698228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-rc4+ #111 [ 4570.699013] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/0 [ 4570.699933] RIP: 0010:_raw_spin_lock+0x1a/0x30 <...> [ 4570.711446] Call Trace: [ 4570.711746] [ 4570.711992] smc_cdc_tx_handler+0x41/0xc0 [ 4570.712470] smc_wr_tx_tasklet_fn+0x213/0x560 [ 4570.712981] ? smc_cdc_tx_dismisser+0x10/0x10 [ 4570.713489] tasklet_action_common.isra.17+0x66/0x140 [ 4570.714083] __do_softirq+0x123/0x2f4 [ 4570.714521] irq_exit_rcu+0xc4/0xf0 [ 4570.714934] common_interrupt+0xba/0xe0 Though smc_cdc_tx_handler() checked the existence of smc connection, smc_release() may have already dismissed and released the smc socket before smc_cdc_tx_handler() further visits it. smc_cdc_tx_handler() |smc_release() if (!conn) | | |smc_cdc_tx_dismiss_slots() | smc_cdc_tx_dismisser() | |sock_put(&smc->sk) <- last sock_put, | smc_sock freed bh_lock_sock(&smc->sk) (panic) | To make sure we won't receive any CDC messages after we free the smc_sock, add a refcount on the smc_connection for inflight CDC message(posted to the QP but haven't received related CQE), and don't release the smc_connection until all the inflight CDC messages haven been done, for both success or failed ones. Using refcount on CDC messages brings another problem: when the link is going to be destroyed, smcr_link_clear() will reset the QP, which then remove all the pending CQEs related to the QP in the CQ. To make sure all the CQEs will always come back so the refcount on the smc_connection can always reach 0, smc_ib_modify_qp_reset() was replaced by smc_ib_modify_qp_error(). And remove the timeout in smc_wr_tx_wait_no_pending_sends() since we need to wait for all pending WQEs done, or we may encounter use-after- free when handling CQEs. For IB device removal routine, we need to wait for all the QPs on that device been destroyed before we can destroy CQs on the device, or the refcount on smc_connection won't reach 0 and smc_sock cannot be released. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Reported-by: Wen Gu Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc.h | 5 +++++ net/smc/smc_cdc.c | 52 ++++++++++++++++++++++++---------------------------- net/smc/smc_cdc.h | 2 +- net/smc/smc_core.c | 25 ++++++++++++++++++++----- net/smc/smc_ib.c | 4 ++-- net/smc/smc_ib.h | 1 + net/smc/smc_wr.c | 41 +++-------------------------------------- net/smc/smc_wr.h | 3 +-- 8 files changed, 57 insertions(+), 76 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index f4286ca1f228..1a4fc1c6c4ab 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -180,6 +180,11 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ + atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe + * - inc when post wqe, + * - dec on polled tx cqe + */ + wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 99acd337ba90..84c8a4374fdd 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_sock *smc; int diff; - if (!conn) - /* already dismissed */ - return; - smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { @@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn); conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } + + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && + unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); + smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } @@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); @@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + atomic_dec(&conn->cdc_pend_tx_wr); } return rc; @@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, peer->token = htonl(local->token); peer->prod_flags.failover_validation = 1; + /* We need to set pend->conn here to make sure smc_cdc_tx_handler() + * can handle properly + */ + smc_cdc_add_pending_send(conn, pend); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (unlikely(rc)) + atomic_dec(&conn->cdc_pend_tx_wr); + return rc; } @@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return rc; } -static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, - unsigned long data) +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) { - struct smc_connection *conn = (struct smc_connection *)data; - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - return cdc_pend->conn == conn; -} - -static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) -{ - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - cdc_pend->conn = NULL; -} - -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - - smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, smc_cdc_tx_dismisser, - (unsigned long)conn); + wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); } /* Send a SMC-D CDC header. diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 0a0a89abd38b..696cc11f2303 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 55ca175e8d57..a6849362f4dd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1127,7 +1127,7 @@ void smc_conn_free(struct smc_connection *conn) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) cancel_work_sync(&conn->abort_work); } @@ -1204,7 +1204,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_reset(lnk); + smc_ib_modify_qp_error(lnk); smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); @@ -1336,7 +1336,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) else tasklet_unlock_wait(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -1459,11 +1459,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. + * + * We must wait here for QPs been destroyed before we destroy the CQs, + * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus + * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); + LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1475,7 +1480,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - smcr_link_down_cond_sched(&lgr->lnk[i]); + list_move_tail(&lgr->list, &lgr_linkdown_list); } } } @@ -1487,6 +1492,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } + list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) { + mutex_lock(&lgr->llc_conf_mutex); + smcr_link_down_cond(&lgr->lnk[i]); + mutex_unlock(&lgr->llc_conf_mutex); + } + } + } + if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, @@ -1586,7 +1601,6 @@ static void smcr_link_down(struct smc_link *lnk) if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) return; - smc_ib_modify_qp_reset(lnk); to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk, true); @@ -1824,6 +1838,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + init_waitqueue_head(&conn->cdc_pend_tx_wq); INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d93055ec17ae..fe5d5399c4e8 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -109,12 +109,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) IB_QP_MAX_QP_RD_ATOMIC); } -int smc_ib_modify_qp_reset(struct smc_link *lnk) +int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IB_QPS_RESET; + qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 07585937370e..bfa1c6bf6313 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -90,6 +90,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); +int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index df1dc225cbab..c6cfdea8b71b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link) } /* wait till all pending tx work requests on the given link are completed */ -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) { - if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), - SMC_WR_TX_WAIT_PENDING_TIME)) - return 0; - else /* timeout */ - return -EPIPE; + wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); } static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) @@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) struct smc_wr_tx_pend pnd_snd; struct smc_link *link; u32 pnd_snd_idx; - int i; link = wc->qp->qp_context; @@ -128,14 +123,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } if (wc->status) { - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - /* clear full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[i], 0, - sizeof(link->wr_tx_pends[i])); - memset(&link->wr_tx_bufs[i], 0, - sizeof(link->wr_tx_bufs[i])); - clear_bit(i, link->wr_tx_mask); - } if (link->lgr->smc_version == SMC_V2) { memset(link->wr_tx_v2_pend, 0, sizeof(*link->wr_tx_v2_pend)); @@ -421,25 +408,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_tx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; - if (wr_tx->type != wr_tx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - dismisser(tx_pend); - } -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) @@ -675,10 +643,7 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); - if (smc_wr_tx_wait_no_pending_sends(lnk)) - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * - sizeof(*lnk->wr_tx_mask)); + smc_wr_tx_wait_no_pending_sends(lnk); wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 48ed9b08ac7a..47512ccce5ef 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -22,7 +22,6 @@ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) -#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ @@ -130,7 +129,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data); -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); -- cgit From 1e81dcc1ab7de7a789e60042ce82d5a612632599 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 13 Dec 2021 16:39:49 -0800 Subject: igc: Do not enable crosstimestamping for i225-V models It was reported that when PCIe PTM is enabled, some lockups could be observed with some integrated i225-V models. While the issue is investigated, we can disable crosstimestamp for those models and see no loss of functionality, because those models don't have any support for time synchronization. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Link: https://lore.kernel.org/all/924175a188159f4e03bd69908a91e606b574139b.camel@gmx.de/ Reported-by: Stefan Dietrich Signed-off-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 30568e3544cd..4f9245aa79a1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) */ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) { - return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false; + if (!IS_ENABLED(CONFIG_X86_TSC)) + return false; + + /* FIXME: it was noticed that enabling support for PCIe PTM in + * some i225-V models could cause lockups when bringing the + * interface up/down. There should be no downsides to + * disabling crosstimestamping support for i225-V, as it + * doesn't have any PTP support. That way we gain some time + * while root causing the issue. + */ + if (adapter->pdev->device == IGC_DEV_ID_I225_V) + return false; + + return pcie_ptm_enabled(adapter->pdev); } static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) -- cgit From f85846bbf43de38fb2c89fe7d2a085608c4eb25a Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Fri, 17 Dec 2021 16:49:33 -0700 Subject: igc: Fix TX timestamp support for non-MSI-X platforms Time synchronization was not properly enabled on non-MSI-X platforms. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: James McLaughlin Reviewed-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8e448288ee26..d28a80a00953 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5467,6 +5467,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5510,6 +5513,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; -- cgit From 140c7bc7d1195750342ea0e6ab76179499ae7cd7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 15:06:17 +0100 Subject: ionic: Initialize the 'lif->dbid_inuse' bitmap When allocated, this bitmap is not initialized. Only the first bit is set a few lines below. Use bitmap_zalloc() to make sure that it is cleared before being used. Fixes: 6461b446f2a0 ("ionic: Add interrupts and doorbells") Signed-off-by: Christophe JAILLET Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/6a478eae0b5e6c63774e1f0ddb1a3f8c38fa8ade.1640527506.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 63f8a8163b5f..2ff7be17e5af 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3135,7 +3135,7 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); if (!lif->dbid_inuse) { dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; -- cgit From 077cdda764c7f147e03e6065ba0cd1dbc1bf00d1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 22 Dec 2021 09:20:58 +0200 Subject: net/mlx5e: TC, Fix memory leak with rules with internal port Fix a memory leak with decap rule with internal port as destination device. The driver allocates a modify hdr action but doesn't set the flow attr modify hdr action which results in skipping releasing the modify hdr action when releasing the flow. backtrace: [<000000005f8c651c>] krealloc+0x83/0xd0 [<000000009f59b143>] alloc_mod_hdr_actions+0x156/0x310 [mlx5_core] [<000000002257f342>] mlx5e_tc_match_to_reg_set_and_get_id+0x12a/0x360 [mlx5_core] [<00000000b44ea75a>] mlx5e_tc_add_fdb_flow+0x962/0x1470 [mlx5_core] [<0000000003e384a0>] __mlx5e_add_fdb_flow+0x54c/0xb90 [mlx5_core] [<00000000ed8b22b6>] mlx5e_configure_flower+0xe45/0x4af0 [mlx5_core] [<00000000024f4ab5>] mlx5e_rep_indr_offload.isra.0+0xfe/0x1b0 [mlx5_core] [<000000006c3bb494>] mlx5e_rep_indr_setup_tc_cb+0x90/0x130 [mlx5_core] [<00000000d3dac2ea>] tc_setup_cb_add+0x1d2/0x420 Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a60c7680fd2b..5e454a14428f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1441,6 +1441,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, metadata); if (err) goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } } -- cgit From 992d8a4e38f0527f24e273ce3a9cd6dea1a6a436 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 29 Nov 2021 11:08:41 +0200 Subject: net/mlx5e: Fix wrong features assignment in case of error In case of an error in mlx5e_set_features(), 'netdev->features' must be updated with the correct state of the device to indicate which features were updated successfully. To do that we maintain a copy of 'netdev->features' and update it after successful feature changes, so we can assign it to back to 'netdev->features' if needed. However, since not all netdev features are handled by the driver (e.g. GRO/TSO/etc), some features may not be updated correctly in case of an error updating another feature. For example, while requesting to disable TSO (feature which is not handled by the driver) and enable HW-GRO, if an error occurs during HW-GRO enable, 'oper_features' will be assigned with 'netdev->features' and HW-GRO turned off. TSO will remain enabled in such case, which is a bug. To solve that, instead of using 'netdev->features' as the baseline of 'oper_features' and changing it on set feature success, use 'features' instead and update it in case of errors. Fixes: 75b81ce719b7 ("net/mlx5e: Don't override netdev features field unless in error flow") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3b0f3a831216..41379844eee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3741,12 +3741,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3754,22 +3753,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); -- cgit From 5bec7ca2be6955ca1aa0d7bae2b981de9b1c9844 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Mon, 20 Dec 2021 15:52:50 +0000 Subject: xsk: Initialise xskb free_list_node This commit initialises the xskb's free_list_node when the xskb is allocated. This prevents a potential false negative returned from a call to list_empty for that node, such as the one introduced in commit 199d983bc015 ("xsk: Fix crash on double free in buffer pool") In my environment this issue caused packets to not be received by the xdpsock application if the traffic was running prior to application launch. This happened when the first batch of packets failed the xskmap lookup and XDP_PASS was returned from the bpf program. This action is handled in the i40e zc driver (and others) by allocating an skbuff, freeing the xdp_buff and adding the associated xskb to the xsk_buff_pool's free_list if it hadn't been added already. Without this fix, the xskb is not added to the free_list because the check to determine if it was added already returns an invalid positive result. Later, this caused allocation errors in the driver and the failure to receive packets. Fixes: 199d983bc015 ("xsk: Fix crash on double free in buffer pool") Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") Signed-off-by: Ciara Loftus Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20211220155250.2746-1-ciara.loftus@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index bc4ad48ea4f0..fd39bb660ebc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -83,6 +83,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; + INIT_LIST_HEAD(&xskb->free_list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else -- cgit From fb7bc9204095090731430c8921f9e629740c110a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 29 Dec 2021 15:09:47 -0500 Subject: ipv6: raw: check passed optlen before reading Add a check that the user-provided option is at least as long as the number of bytes we intend to read. Before this patch we would blindly read sizeof(int) bytes even in cases where the user passed optlen Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211229200947.2862255-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 60f1e4f5be5a..c51d5ce3711c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1020,6 +1020,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val; + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; -- cgit From 99b40610956a8a8755653a67392e2a8b772453be Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:15 +0200 Subject: net: bridge: mcast: add and enforce query interval minimum As reported[1] if query interval is set too low and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the administrator know that the interval has been set to the minimum. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f3d751105343..998da4a2d209 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4522,6 +4522,22 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, } #endif +void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; + } + + brmctx->multicast_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0c8b5f1a15bc..701dd8b8455e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); - br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c0efd697865a..4ed7f11042e8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -28,6 +28,7 @@ #define BR_MAX_PORTS (1<multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 8ffd4ed2563c..bf1ac0874279 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]); - v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) { -- cgit From f83a112bd91a494cdee671aec74e777470fb4a07 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:16 +0200 Subject: net: bridge: mcast: add and enforce startup query interval minimum As reported[1] if startup query interval is set too low in combination with large number of startup queries and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the admin know that the startup interval has been set to the minimum. It doesn't make sense to make the startup interval lower than the normal query interval so use the same value of 1 second. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 998da4a2d209..de2409889489 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4538,6 +4538,22 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, brmctx->multicast_query_interval = intvl_jiffies; } +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; + } + + brmctx->multicast_startup_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 701dd8b8455e..2ff83d84230d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_STATS_ENABLED]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4ed7f11042e8..2187a0c3fd22 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,7 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) +#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN #define BR_HWDOM_MAX BITS_PER_LONG @@ -966,6 +967,8 @@ size_t br_multicast_querier_state_size(void); size_t br_rports_size(const struct net_bridge_mcast *brmctx); void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val); +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val); static inline bool br_group_is_l2(const struct br_ip *group) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index f5bd1114a434..7b0c19772111 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show( static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index bf1ac0874279..a6382973b3e7 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]); - v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) { -- cgit From 9c1952aeaa98b3cfc49e2a79cb2c7d6a674213e9 Mon Sep 17 00:00:00 2001 From: wujianguo Date: Wed, 29 Dec 2021 18:58:10 +0800 Subject: selftests/net: udpgso_bench_tx: fix dst ip argument udpgso_bench_tx call setup_sockaddr() for dest address before parsing all arguments, if we specify "-p ${dst_port}" after "-D ${dst_ip}", then ${dst_port} will be ignored, and using default cfg_port 8000. This will cause test case "multiple GRO socks" failed in udpgro.sh. Setup sockaddr after parsing all arguments. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Signed-off-by: Jianguo Wu Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/ff620d9f-5b52-06ab-5286-44b945453002@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench_tx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e..f1fdaa270291 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -419,6 +419,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; @@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; @@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); -- cgit From add25d6d6c85f7b6d00a055ee0a4169acf845681 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 29 Dec 2021 15:27:30 +0800 Subject: selftests: net: Fix a typo in udpgro_fwd.sh $rvs -> $rcv Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Link: https://lore.kernel.org/r/d247d7c8-a03a-0abf-3c71-4006a051d133@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 7f26591f236b..6a3985b8cd7f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -132,7 +132,7 @@ run_test() { local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ sed -e 's/\[//' -e 's/:.*//'` if [ $rcv != $pkts ]; then - echo " fail - received $rvs packets, expected $pkts" + echo " fail - received $rcv packets, expected $pkts" ret=1 return fi -- cgit From e22e45fc9e41bf9fcc1e92cfb78eb92786728ef0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Dec 2021 18:41:45 +0800 Subject: net: fix use-after-free in tw_timer_handler A real world panic issue was found as follow in Linux 5.4. BUG: unable to handle page fault for address: ffffde49a863de28 PGD 7e6fe62067 P4D 7e6fe62067 PUD 7e6fe63067 PMD f51e064067 PTE 0 RIP: 0010:tw_timer_handler+0x20/0x40 Call Trace: call_timer_fn+0x2b/0x120 run_timer_softirq+0x1ef/0x450 __do_softirq+0x10d/0x2b8 irq_exit+0xc7/0xd0 smp_apic_timer_interrupt+0x68/0x120 apic_timer_interrupt+0xf/0x20 This issue was also reported since 2017 in the thread [1], unfortunately, the issue was still can be reproduced after fixing DCCP. The ipv4_mib_exit_net is called before tcp_sk_exit_batch when a net namespace is destroyed since tcp_sk_ops is registered befrore ipv4_mib_ops, which means tcp_sk_ops is in the front of ipv4_mib_ops in the list of pernet_list. There will be a use-after-free on net->mib.net_statistics in tw_timer_handler after ipv4_mib_exit_net if there are some inflight time-wait timers. This bug is not introduced by commit f2bf415cfed7 ("mib: add net to NET_ADD_STATS_BH") since the net_statistics is a global variable instead of dynamic allocation and freeing. Actually, commit 61a7e26028b9 ("mib: put net statistics on struct net") introduces the bug since it put net statistics on struct net and free it when net namespace is destroyed. Moving init_ipv4_mibs() to the front of tcp_init() to fix this bug and replace pr_crit() with panic() since continuing is meaningless when init_ipv4_mibs() fails. [1] https://groups.google.com/g/syzkaller/c/p1tn-_Kc6l4/m/smuL_FMAAgAJ?pli=1 Fixes: 61a7e26028b9 ("mib: put net statistics on struct net") Signed-off-by: Muchun Song Cc: Cong Wang Cc: Fam Zheng Cc: Link: https://lore.kernel.org/r/20211228104145.9426-1-songmuchun@bytedance.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b5956500436..5f70ffdae1b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1994,6 +1994,10 @@ static int __init inet_init(void) ip_init(); + /* Initialise per-cpu ipv4 mibs */ + if (init_ipv4_mibs()) + panic("%s: Cannot init ipv4 mibs\n", __func__); + /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -2024,12 +2028,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); - /* - * Initialise per-cpu ipv4 mibs - */ - - if (init_ipv4_mibs()) - pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); -- cgit From 168fed986b3a7ec7b98cab1fe84e2f282b9e6a8f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Dec 2021 17:31:42 +0200 Subject: net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper We need to first check if the context is a vlan one, then we need to check the global bridge multicast vlan snooping flag, and finally the vlan's multicast flag, otherwise we will unnecessarily enable vlan mcast processing (e.g. querier timers). Fixes: 7b54aaaf53cb ("net: bridge: multicast: add vlan state initialization and control") Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20211228153142.536969-1-nikolay@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2187a0c3fd22..e8c6ee322c71 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1153,9 +1153,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) static inline bool br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) { - return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && - br_multicast_ctx_is_vlan(brmctx) && - !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); + return br_multicast_ctx_is_vlan(brmctx) && + (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) || + !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)); } static inline bool -- cgit From 92a34ab169f9eefe29cd420ce96b0a0a2a1da853 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 29 Dec 2021 11:21:18 +0800 Subject: net/ncsi: check for error return from call to nla_put_u32 As we can see from the comment of the nla_put() that it could return -EMSGSIZE if the tailroom of the skb is insufficient. Therefore, it should be better to check the return value of the nla_put_u32 and return the error code if error accurs. Also, there are many other functions have the same problem, and if this patch is correct, I will commit a new version to fix all. Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20211229032118.1706294-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bb5f1650f11c..c189b4c8a182 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); -- cgit From be1c5b53227ba8280f1ebb01c6f5da3c9eebdaad Mon Sep 17 00:00:00 2001 From: xu xin Date: Thu, 30 Dec 2021 03:28:56 +0000 Subject: Documentation: fix outdated interpretation of ip_no_pmtu_disc The updating way of pmtu has changed, but documentation is still in the old way. So this patch updates the interpretation of ip_no_pmtu_disc and min_pmtu. See commit 28d35bcdd3925 ("net: ipv4: don't let PMTU updates increase route MTU") Reported-by: Zeal Robot Signed-off-by: xu xin Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c04431144f7a..2572eecc3e86 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -25,7 +25,8 @@ ip_default_ttl - INTEGER ip_no_pmtu_disc - INTEGER Disable Path MTU Discovery. If enabled in mode 1 and a fragmentation-required ICMP is received, the PMTU to this - destination will be set to min_pmtu (see below). You will need + destination will be set to the smallest of the old MTU to + this destination and min_pmtu (see below). You will need to raise min_pmtu to the smallest interface MTU on your system manually if you want to avoid locally generated fragments. @@ -49,7 +50,8 @@ ip_no_pmtu_disc - INTEGER Default: FALSE min_pmtu - INTEGER - default 552 - minimum discovered Path MTU + default 552 - minimum Path MTU. Unless this is changed mannually, + each cached pmtu will never be lower than this setting. ip_forward_use_pmtu - BOOLEAN By default we don't trust protocol path MTUs while forwarding -- cgit From 8b3170e07539855ee91bc5e2fa7780a4c9b5c7aa Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 30 Dec 2021 18:40:29 +0800 Subject: selftests: net: using ping6 for IPv6 in udpgro_fwd.sh udpgro_fwd.sh output following message: ping: 2001:db8:1::100: Address family for hostname not supported Using ping6 when pinging IPv6 addresses. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro_fwd.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 6a3985b8cd7f..3ea73013d956 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -185,6 +185,7 @@ for family in 4 6; do IPT=iptables SUFFIX=24 VXDEV=vxlan + PING=ping if [ $family = 6 ]; then BM_NET=$BM_NET_V6 @@ -192,6 +193,7 @@ for family in 4 6; do SUFFIX="64 nodad" VXDEV=vxlan6 IPT=ip6tables + PING="ping6" fi echo "IPv$family" @@ -237,7 +239,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel - ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null + ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST cleanup -- cgit From bf2b09fedc17248b315f80fb249087b7d28a69a6 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 30 Dec 2021 12:26:27 +0000 Subject: fsl/fman: Fix missing put_device() call in fman_port_probe The reference taken by 'of_find_device_by_node()' must be released when not needed anymore. Add the corresponding 'put_device()' in the and error handling paths. Fixes: 18a6c85fcc78 ("fsl/fman: Add FMan Port Support") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_port.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index d9baac0dbc7d..4c9d05c45c03 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev) fman = dev_get_drvdata(&fm_pdev->dev); if (!fman) { err = -EINVAL; - goto return_err; + goto put_device; } err = of_property_read_u32(port_node, "cell-index", &val); @@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", __func__, port_node); err = -EINVAL; - goto return_err; + goto put_device; } port_id = (u8)val; port->dts_params.id = port_id; @@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else { dev_err(port->dev, "%s: Illegal port type\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.type = port_type; @@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: incorrect qman-channel-id\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.qman_channel_id = qman_channel_id; } @@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: of_address_to_resource() failed\n", __func__); err = -ENOMEM; - goto return_err; + goto put_device; } port->dts_params.fman = fman; @@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; +put_device: + put_device(&fm_pdev->dev); return_err: of_node_put(port_node); free_port: -- cgit