summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/devlink-health.txt86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c297
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c189
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c5
-rw-r--r--include/net/devlink.h211
-rw-r--r--include/trace/events/devlink.h65
-rw-r--r--include/uapi/linux/devlink.h24
-rw-r--r--net/core/devlink.c1008
11 files changed, 1755 insertions, 165 deletions
diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt
new file mode 100644
index 000000000000..1db3fbea0831
--- /dev/null
+++ b/Documentation/networking/devlink-health.txt
@@ -0,0 +1,86 @@
+The health mechanism is targeted for Real Time Alerting, in order to know when
+something bad had happened to a PCI device
+- Provide alert debug information
+- Self healing
+- If problem needs vendor support, provide a way to gather all needed debugging
+ information.
+
+The main idea is to unify and centralize driver health reports in the
+generic devlink instance and allow the user to set different
+attributes of the health reporting and recovery procedures.
+
+The devlink health reporter:
+Device driver creates a "health reporter" per each error/health type.
+Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
+or unknown (driver specific).
+For each registered health reporter a driver can issue error/health reports
+asynchronously. All health reports handling is done by devlink.
+Device driver can provide specific callbacks for each "health reporter", e.g.
+ - Recovery procedures
+ - Diagnostics and object dump procedures
+ - OOB initial parameters
+Different parts of the driver can register different types of health reporters
+with different handlers.
+
+Once an error is reported, devlink health will do the following actions:
+ * A log is being send to the kernel trace events buffer
+ * Health status and statistics are being updated for the reporter instance
+ * Object dump is being taken and saved at the reporter instance (as long as
+ there is no other dump which is already stored)
+ * Auto recovery attempt is being done. Depends on:
+ - Auto-recovery configuration
+ - Grace period vs. time passed since last recover
+
+The user interface:
+User can access/change each reporter's parameters and driver specific callbacks
+via devlink, e.g per error type (per health reporter)
+ - Configure reporter's generic parameters (like: disable/enable auto recovery)
+ - Invoke recovery procedure
+ - Run diagnostics
+ - Object dump
+
+The devlink health interface (via netlink):
+DEVLINK_CMD_HEALTH_REPORTER_GET
+ Retrieves status and configuration info per DEV and reporter.
+DEVLINK_CMD_HEALTH_REPORTER_SET
+ Allows reporter-related configuration setting.
+DEVLINK_CMD_HEALTH_REPORTER_RECOVER
+ Triggers a reporter's recovery procedure.
+DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
+ Retrieves diagnostics data from a reporter on a device.
+DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
+ Retrieves the last stored dump. Devlink health
+ saves a single dump. If an dump is not already stored by the devlink
+ for this reporter, devlink generates a new dump.
+ dump output is defined by the reporter.
+DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
+ Clears the last saved dump file for the specified reporter.
+
+
+ netlink
+ +--------------------------+
+ | |
+ | + |
+ | | |
+ +--------------------------+
+ |request for ops
+ |(diagnose,
+ mlx5_core devlink |recover,
+ |dump)
++--------+ +--------------------------+
+| | | reporter| |
+| | | +---------v----------+ |
+| | ops execution | | | |
+| <----------------------------------+ | |
+| | | | | |
+| | | + ^------------------+ |
+| | | | request for ops |
+| | | | (recover, dump) |
+| | | | |
+| | | +-+------------------+ |
+| | health report | | health handler | |
+| +-------------------------------> | |
+| | | +--------------------+ |
+| | health reporter create | |
+| +----------------------------> |
++--------+ +--------------------------+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9de9abacf7f6..6bb2a860b15b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
- en_selftest.o en/port.o en/monitor_stats.o
+ en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
#
# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 6dd74ef69389..66e510b16243 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -387,10 +387,7 @@ struct mlx5e_txqsq {
struct mlx5e_channel *channel;
int txq_ix;
u32 rate_limit;
- struct mlx5e_txqsq_recover {
- struct work_struct recover_work;
- u64 last_recover;
- } recover;
+ struct work_struct recover_work;
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
@@ -683,6 +680,13 @@ struct mlx5e_rss_params {
u8 hfunc;
};
+struct mlx5e_modify_sq_param {
+ int curr_state;
+ int next_state;
+ int rl_update;
+ int rl_index;
+};
+
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -738,6 +742,7 @@ struct mlx5e_priv {
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_tls *tls;
#endif
+ struct devlink_health_reporter *tx_reporter;
};
struct mlx5e_profile {
@@ -868,6 +873,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p);
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
+void mlx5e_tx_disable_queue(struct netdev_queue *txq);
+
static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
{
return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
new file mode 100644
index 000000000000..e78e92753d73
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5E_EN_REPORTER_H
+#define __MLX5E_EN_REPORTER_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
+void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv);
+void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq);
+int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
new file mode 100644
index 000000000000..0aebfb377cf0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/devlink.h>
+#include "reporter.h"
+#include "lib/eq.h"
+
+#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
+
+struct mlx5e_tx_err_ctx {
+ int (*recover)(struct mlx5e_txqsq *sq);
+ struct mlx5e_txqsq *sq;
+};
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+ while (time_before(jiffies, exp_time)) {
+ if (sq->cc == sq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(sq->channel->netdev,
+ "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+ WARN_ONCE(sq->cc != sq->pc,
+ "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+ sq->cc = 0;
+ sq->dma_fifo_cc = 0;
+ sq->pc = 0;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
+{
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ msp.curr_state = curr_state;
+ msp.next_state = MLX5_SQC_STATE_RST;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+ return err;
+ }
+
+ memset(&msp, 0, sizeof(msp));
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ u8 state;
+ int err;
+
+ if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+ return 0;
+
+ err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+ sq->sqn, err);
+ return err;
+ }
+
+ if (state != MLX5_SQC_STATE_ERR) {
+ netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+ return -EINVAL;
+ }
+
+ mlx5e_tx_disable_queue(sq->txq);
+
+ err = mlx5e_wait_for_sq_flush(sq);
+ if (err)
+ return err;
+
+ /* At this point, no new packets will arrive from the stack as TXQ is
+ * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+ * pending WQEs. SQ can safely reset the SQ.
+ */
+
+ err = mlx5e_sq_to_ready(sq, state);
+ if (err)
+ return err;
+
+ mlx5e_reset_txqsq_cc_pc(sq);
+ sq->stats->recover++;
+ mlx5e_activate_txqsq(sq);
+
+ return 0;
+}
+
+void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
+ struct mlx5e_tx_err_ctx err_ctx = {0};
+
+ err_ctx.sq = sq;
+ err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+ sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
+
+ devlink_health_report(sq->channel->priv->tx_reporter, err_str,
+ &err_ctx);
+}
+
+static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
+{
+ struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
+ u32 eqe_count;
+ int ret;
+
+ netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+ eqe_count = mlx5_eq_poll_irq_disabled(eq);
+ ret = eqe_count ? true : false;
+ if (!eqe_count) {
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ return ret;
+ }
+
+ netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
+ eqe_count, eq->core.eqn);
+ sq->channel->stats->eq_rearm++;
+ return ret;
+}
+
+int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
+{
+ char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
+ struct mlx5e_tx_err_ctx err_ctx;
+
+ err_ctx.sq = sq;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+ sprintf(err_str,
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+ sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - sq->txq->trans_start));
+
+ return devlink_health_report(sq->channel->priv->tx_reporter, err_str,
+ &err_ctx);
+}
+
+/* state lock cannot be grabbed within this function.
+ * It can cause a dead lock or a read-after-free.
+ */
+int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
+{
+ return err_ctx->recover(err_ctx->sq);
+}
+
+static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
+{
+ int err;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+ mlx5e_close_locked(priv->netdev);
+ err = mlx5e_open_locked(priv->netdev);
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+
+ return err;
+}
+
+static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
+ void *context)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ struct mlx5e_tx_err_ctx *err_ctx = context;
+
+ return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
+ mlx5e_tx_reporter_recover_all(priv);
+}
+
+static int
+mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
+ u32 sqn, u8 state, bool stopped)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+ int i, err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num * priv->channels.params.num_tc;
+ i++) {
+ struct mlx5e_txqsq *sq = priv->txq2sq[i];
+ u8 state;
+
+ err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+ if (err)
+ break;
+
+ err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn,
+ state,
+ netif_xmit_stopped(sq->txq));
+ if (err)
+ break;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ goto unlock;
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
+ .name = "tx",
+ .recover = mlx5e_tx_reporter_recover,
+ .diagnose = mlx5e_tx_reporter_diagnose,
+};
+
+#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
+
+int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct devlink *devlink = priv_to_devlink(mdev);
+
+ priv->tx_reporter =
+ devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
+ MLX5_REPORTER_TX_GRACEFUL_PERIOD,
+ true, priv);
+ if (IS_ERR_OR_NULL(priv->tx_reporter))
+ netdev_warn(priv->netdev,
+ "Failed to create tx reporter, err = %ld\n",
+ PTR_ERR(priv->tx_reporter));
+ return PTR_ERR_OR_ZERO(priv->tx_reporter);
+}
+
+void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->tx_reporter))
+ return;
+
+ devlink_health_reporter_destroy(priv->tx_reporter);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 099d307e6f25..d81ebba7c04e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -51,6 +51,7 @@
#include "en/xdp.h"
#include "lib/eq.h"
#include "en/monitor_stats.h"
+#include "en/reporter.h"
struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
@@ -1159,7 +1160,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
return 0;
}
-static void mlx5e_sq_recover(struct work_struct *work);
+static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int txq_ix,
struct mlx5e_params *params,
@@ -1181,7 +1182,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
- INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
+ INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev))
@@ -1269,15 +1270,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
return err;
}
-struct mlx5e_modify_sq_param {
- int curr_state;
- int next_state;
- bool rl_update;
- int rl_index;
-};
-
-static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
- struct mlx5e_modify_sq_param *p)
+int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p)
{
void *in;
void *sqc;
@@ -1375,17 +1369,7 @@ err_free_txqsq:
return err;
}
-static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
-{
- WARN_ONCE(sq->cc != sq->pc,
- "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
- sq->sqn, sq->cc, sq->pc);
- sq->cc = 0;
- sq->dma_fifo_cc = 0;
- sq->pc = 0;
-}
-
-static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
{
sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
@@ -1394,7 +1378,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
netif_tx_start_queue(sq->txq);
}
-static inline void netif_tx_disable_queue(struct netdev_queue *txq)
+void mlx5e_tx_disable_queue(struct netdev_queue *txq)
{
__netif_tx_lock_bh(txq);
netif_tx_stop_queue(txq);
@@ -1410,7 +1394,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
/* prevent netif_tx_wake_queue */
napi_synchronize(&c->napi);
- netif_tx_disable_queue(sq->txq);
+ mlx5e_tx_disable_queue(sq->txq);
/* last doorbell out, godspeed .. */
if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
@@ -1430,6 +1414,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
struct mlx5_rate_limit rl = {0};
cancel_work_sync(&sq->dim.work);
+ cancel_work_sync(&sq->recover_work);
mlx5e_destroy_sq(mdev, sq->sqn);
if (sq->rate_limit) {
rl.rate = sq->rate_limit;
@@ -1439,105 +1424,12 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
mlx5e_free_txqsq(sq);
}
-static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
-{
- unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
-
- while (time_before(jiffies, exp_time)) {
- if (sq->cc == sq->pc)
- return 0;
-
- msleep(20);
- }
-
- netdev_err(sq->channel->netdev,
- "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
- sq->sqn, sq->cc, sq->pc);
-
- return -ETIMEDOUT;
-}
-
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
- struct mlx5_core_dev *mdev = sq->channel->mdev;
- struct net_device *dev = sq->channel->netdev;
- struct mlx5e_modify_sq_param msp = {0};
- int err;
-
- msp.curr_state = curr_state;
- msp.next_state = MLX5_SQC_STATE_RST;
-
- err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
- if (err) {
- netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
- return err;
- }
-
- memset(&msp, 0, sizeof(msp));
- msp.curr_state = MLX5_SQC_STATE_RST;
- msp.next_state = MLX5_SQC_STATE_RDY;
-
- err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
- if (err) {
- netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
- return err;
- }
-
- return 0;
-}
-
-static void mlx5e_sq_recover(struct work_struct *work)
+static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
{
- struct mlx5e_txqsq_recover *recover =
- container_of(work, struct mlx5e_txqsq_recover,
- recover_work);
- struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
- recover);
- struct mlx5_core_dev *mdev = sq->channel->mdev;
- struct net_device *dev = sq->channel->netdev;
- u8 state;
- int err;
-
- err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
- if (err) {
- netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
- sq->sqn, err);
- return;
- }
-
- if (state != MLX5_RQC_STATE_ERR) {
- netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
- return;
- }
-
- netif_tx_disable_queue(sq->txq);
-
- if (mlx5e_wait_for_sq_flush(sq))
- return;
-
- /* If the interval between two consecutive recovers per SQ is too
- * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
- * If we reached this state, there is probably a bug that needs to be
- * fixed. let's keep the queue close and let tx timeout cleanup.
- */
- if (jiffies_to_msecs(jiffies - recover->last_recover) <
- MLX5E_SQ_RECOVER_MIN_INTERVAL) {
- netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
- sq->sqn);
- return;
- }
-
- /* At this point, no new packets will arrive from the stack as TXQ is
- * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
- * pending WQEs. SQ can safely reset the SQ.
- */
- if (mlx5e_sq_to_ready(sq, state))
- return;
+ struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
+ recover_work);
- mlx5e_reset_txqsq_cc_pc(sq);
- sq->stats->recover++;
- recover->last_recover = jiffies;
- mlx5e_activate_txqsq(sq);
+ mlx5e_tx_reporter_err_cqe(sq);
}
static int mlx5e_open_icosq(struct mlx5e_channel *c,
@@ -3236,6 +3128,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
{
int tc;
+ mlx5e_tx_reporter_destroy(priv);
for (tc = 0; tc < priv->profile->max_tc; tc++)
mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
}
@@ -4223,31 +4116,13 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb,
return features;
}
-static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
- struct mlx5e_txqsq *sq)
-{
- struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
- u32 eqe_count;
-
- netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
- eq->core.eqn, eq->core.cons_index, eq->core.irqn);
-
- eqe_count = mlx5_eq_poll_irq_disabled(eq);
- if (!eqe_count)
- return false;
-
- netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
- sq->channel->stats->eq_rearm++;
- return true;
-}
-
static void mlx5e_tx_timeout_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
tx_timeout_work);
- struct net_device *dev = priv->netdev;
- bool reopen_channels = false;
- int i, err;
+ bool report_failed = false;
+ int err;
+ int i;
rtnl_lock();
mutex_lock(&priv->state_lock);
@@ -4256,31 +4131,22 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
goto unlock;
for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
- struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
+ struct netdev_queue *dev_queue =
+ netdev_get_tx_queue(priv->netdev, i);
struct mlx5e_txqsq *sq = priv->txq2sq[i];
if (!netif_xmit_stopped(dev_queue))
continue;
- netdev_err(dev,
- "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
- i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
- jiffies_to_usecs(jiffies - dev_queue->trans_start));
-
- /* If we recover a lost interrupt, most likely TX timeout will
- * be resolved, skip reopening channels
- */
- if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
- clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- reopen_channels = true;
- }
+ if (mlx5e_tx_reporter_timeout(sq))
+ report_failed = true;
}
- if (!reopen_channels)
+ if (!report_failed)
goto unlock;
- mlx5e_close_locked(dev);
- err = mlx5e_open_locked(dev);
+ mlx5e_close_locked(priv->netdev);
+ err = mlx5e_open_locked(priv->netdev);
if (err)
netdev_err(priv->netdev,
"mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
@@ -4296,6 +4162,12 @@ static void mlx5e_tx_timeout(struct net_device *dev)
struct mlx5e_priv *priv = netdev_priv(dev);
netdev_err(dev, "TX timeout detected\n");
+
+ if (IS_ERR_OR_NULL(priv->tx_reporter)) {
+ netdev_err_once(priv->netdev, "tx timeout will not be handled, no valid tx reporter\n");
+ return;
+ }
+
queue_work(priv->wq, &priv->tx_timeout_work);
}
@@ -4953,6 +4825,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_initialize(priv);
#endif
+ mlx5e_tx_reporter_create(priv);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 598ad7e4d5c9..189211295e48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -513,8 +513,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
&sq->state)) {
mlx5e_dump_error_cqe(sq,
(struct mlx5_err_cqe *)cqe);
- queue_work(cq->channel->priv->wq,
- &sq->recover.recover_work);
+ if (!IS_ERR_OR_NULL(cq->channel->priv->tx_reporter))
+ queue_work(cq->channel->priv->wq,
+ &sq->recover_work);
}
stats->cqe_err++;
}
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 74d992a68a06..c12ad6e9095d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -30,6 +30,7 @@ struct devlink {
struct list_head param_list;
struct list_head region_list;
u32 snapshot_id;
+ struct list_head reporter_list;
struct devlink_dpipe_headers *dpipe_headers;
const struct devlink_ops *ops;
struct device *dev;
@@ -448,6 +449,29 @@ struct devlink_info_req;
typedef void devlink_snapshot_data_dest_t(const void *data);
+struct devlink_fmsg;
+struct devlink_health_reporter;
+
+/**
+ * struct devlink_health_reporter_ops - Reporter operations
+ * @name: reporter name
+ * @recover: callback to recover from reported error
+ * if priv_ctx is NULL, run a full recover
+ * @dump: callback to dump an object
+ * if priv_ctx is NULL, run a full dump
+ * @diagnose: callback to diagnose the current status
+ */
+
+struct devlink_health_reporter_ops {
+ char *name;
+ int (*recover)(struct devlink_health_reporter *reporter,
+ void *priv_ctx);
+ int (*dump)(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx);
+ int (*diagnose)(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg);
+};
+
struct devlink_ops {
int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
int (*port_type_set)(struct devlink_port *devlink_port,
@@ -639,6 +663,50 @@ int devlink_info_version_running_put(struct devlink_info_req *req,
const char *version_name,
const char *version_value);
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name);
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name);
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
+
+int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value);
+int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value);
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
+int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value);
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value);
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value);
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value);
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value);
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value);
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u16 value_len);
+
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, bool auto_recover,
+ void *priv);
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+int devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx);
+
#else
static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -971,6 +1039,149 @@ devlink_info_version_running_put(struct devlink_info_req *req,
{
return 0;
}
+
+static inline int
+devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value)
+{
+ return 0;
+}
+
+static inline int
+devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u16 value_len)
+{
+ return 0;
+}
+
+static inline struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, bool auto_recover,
+ void *priv)
+{
+ return NULL;
+}
+
+static inline void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+}
+
+static inline void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+ return NULL;
+}
+
+static inline int
+devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx)
+{
+ return 0;
+}
#endif
#if IS_REACHABLE(CONFIG_NET_DEVLINK)
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 40705364a50f..191ddf67d769 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -75,6 +75,71 @@ TRACE_EVENT(devlink_hwerr,
__get_str(driver_name), __entry->err, __get_str(msg))
);
+/*
+ * Tracepoint for devlink health message:
+ */
+TRACE_EVENT(devlink_health_report,
+ TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+ const char *msg),
+
+ TP_ARGS(devlink, reporter_name, msg),
+
+ TP_STRUCT__entry(
+ __string(bus_name, devlink->dev->bus->name)
+ __string(dev_name, dev_name(devlink->dev))
+ __string(driver_name, devlink->dev->driver->name)
+ __string(reporter_name, msg)
+ __string(msg, msg)
+ ),
+
+ TP_fast_assign(
+ __assign_str(bus_name, devlink->dev->bus->name);
+ __assign_str(dev_name, dev_name(devlink->dev));
+ __assign_str(driver_name, devlink->dev->driver->name);
+ __assign_str(reporter_name, reporter_name);
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
+ __get_str(bus_name), __get_str(dev_name),
+ __get_str(driver_name), __get_str(reporter_name),
+ __get_str(msg))
+);
+
+/*
+ * Tracepoint for devlink health recover aborted message:
+ */
+TRACE_EVENT(devlink_health_recover_aborted,
+ TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+ bool health_state, u64 time_since_last_recover),
+
+ TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
+
+ TP_STRUCT__entry(
+ __string(bus_name, devlink->dev->bus->name)
+ __string(dev_name, dev_name(devlink->dev))
+ __string(driver_name, devlink->dev->driver->name)
+ __string(reporter_name, reporter_name)
+ __field(bool, health_state)
+ __field(u64, time_since_last_recover)
+ ),
+
+ TP_fast_assign(
+ __assign_str(bus_name, devlink->dev->bus->name);
+ __assign_str(dev_name, dev_name(devlink->dev));
+ __assign_str(driver_name, devlink->dev->driver->name);
+ __assign_str(reporter_name, reporter_name);
+ __entry->health_state = health_state;
+ __entry->time_since_last_recover = time_since_last_recover;
+ ),
+
+ TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover=%llu recover aborted",
+ __get_str(bus_name), __get_str(dev_name),
+ __get_str(driver_name), __get_str(reporter_name),
+ __entry->health_state,
+ __entry->time_since_last_recover)
+);
+
#endif /* _TRACE_DEVLINK_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 054b2d1a4537..72d9f7c89190 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -96,6 +96,13 @@ enum devlink_command {
DEVLINK_CMD_INFO_GET, /* can dump */
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ DEVLINK_CMD_HEALTH_REPORTER_SET,
+ DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+ DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+
/* add new commands above here */
__DEVLINK_CMD_MAX,
DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -302,6 +309,23 @@ enum devlink_attr {
DEVLINK_ATTR_SB_POOL_CELL_SIZE, /* u32 */
+ DEVLINK_ATTR_FMSG, /* nested */
+ DEVLINK_ATTR_FMSG_OBJ_NEST_START, /* flag */
+ DEVLINK_ATTR_FMSG_PAIR_NEST_START, /* flag */
+ DEVLINK_ATTR_FMSG_ARR_NEST_START, /* flag */
+ DEVLINK_ATTR_FMSG_NEST_END, /* flag */
+ DEVLINK_ATTR_FMSG_OBJ_NAME, /* string */
+ DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, /* u8 */
+ DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA, /* dynamic */
+
+ DEVLINK_ATTR_HEALTH_REPORTER, /* nested */
+ DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */
+ DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */
+ DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */
+ DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */
+ DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */
+ DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */
+ DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index cd0d393bc62d..7fbdba547d4f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3879,6 +3879,965 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
return msg->len;
}
+struct devlink_fmsg_item {
+ struct list_head list;
+ int attrtype;
+ u8 nla_type;
+ u16 len;
+ int value[0];
+};
+
+struct devlink_fmsg {
+ struct list_head item_list;
+};
+
+static struct devlink_fmsg *devlink_fmsg_alloc(void)
+{
+ struct devlink_fmsg *fmsg;
+
+ fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
+ if (!fmsg)
+ return NULL;
+
+ INIT_LIST_HEAD(&fmsg->item_list);
+
+ return fmsg;
+}
+
+static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
+{
+ struct devlink_fmsg_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
+ list_del(&item->list);
+ kfree(item);
+ }
+ kfree(fmsg);
+}
+
+static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
+ int attrtype)
+{
+ struct devlink_fmsg_item *item;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->attrtype = attrtype;
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
+
+static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+}
+
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
+
+#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
+
+static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+{
+ struct devlink_fmsg_item *item;
+
+ if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = NLA_NUL_STRING;
+ item->len = strlen(name) + 1;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
+ memcpy(&item->value, name, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+ int err;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_put_name(fmsg, name);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
+
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
+
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
+
+static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+ const void *value, u16 value_len,
+ u8 value_nla_type)
+{
+ struct devlink_fmsg_item *item;
+
+ if (value_len > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = value_nla_type;
+ item->len = value_len;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ memcpy(&item->value, value, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
+
+int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
+
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
+
+int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
+
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+ return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
+ NLA_NUL_STRING);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
+
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len)
+{
+ return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
+
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
+
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
+
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
+
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
+
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u16 value_len)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_binary_put(fmsg, value, value_len);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
+
+static int
+devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ case NLA_U8:
+ case NLA_U32:
+ case NLA_U64:
+ case NLA_NUL_STRING:
+ case NLA_BINARY:
+ return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
+ msg->nla_type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ u8 tmp;
+
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ /* Always provide flag data, regardless of its value */
+ tmp = *(bool *) msg->value;
+
+ return nla_put_u8(skb, attrtype, tmp);
+ case NLA_U8:
+ return nla_put_u8(skb, attrtype, *(u8 *) msg->value);
+ case NLA_U32:
+ return nla_put_u32(skb, attrtype, *(u32 *) msg->value);
+ case NLA_U64:
+ return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value,
+ DEVLINK_ATTR_PAD);
+ case NLA_NUL_STRING:
+ return nla_put_string(skb, attrtype, (char *) &msg->value);
+ case NLA_BINARY:
+ return nla_put(skb, attrtype, msg->len, (void *) &msg->value);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ int *start)
+{
+ struct devlink_fmsg_item *item;
+ struct nlattr *fmsg_nlattr;
+ int i = 0;
+ int err;
+
+ fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG);
+ if (!fmsg_nlattr)
+ return -EMSGSIZE;
+
+ list_for_each_entry(item, &fmsg->item_list, list) {
+ if (i < *start) {
+ i++;
+ continue;
+ }
+
+ switch (item->attrtype) {
+ case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
+ case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
+ case DEVLINK_ATTR_FMSG_ARR_NEST_START:
+ case DEVLINK_ATTR_FMSG_NEST_END:
+ err = nla_put_flag(skb, item->attrtype);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
+ err = devlink_fmsg_item_fill_type(item, skb);
+ if (err)
+ break;
+ err = devlink_fmsg_item_fill_data(item, skb);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_NAME:
+ err = nla_put_string(skb, item->attrtype,
+ (char *) &item->value);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ if (!err)
+ *start = ++i;
+ else
+ break;
+ }
+
+ nla_nest_end(skb, fmsg_nlattr);
+ return err;
+}
+
+static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
+ struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ bool last = false;
+ int index = 0;
+ void *hdr;
+ int err;
+
+ while (!last) {
+ int tmp_index = index;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, flags | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if (!err)
+ last = true;
+ else if (err != -EMSGSIZE || tmp_index == index)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ err = genlmsg_reply(skb, info);
+ if (err)
+ return err;
+ }
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+ err = genlmsg_reply(skb, info);
+ if (err)
+ return err;
+
+ return 0;
+
+nla_put_failure:
+ nlmsg_free(skb);
+ return err;
+}
+
+struct devlink_health_reporter {
+ struct list_head list;
+ void *priv;
+ const struct devlink_health_reporter_ops *ops;
+ struct devlink *devlink;
+ struct devlink_fmsg *dump_fmsg;
+ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
+ u64 graceful_period;
+ bool auto_recover;
+ u8 health_state;
+ u64 dump_ts;
+ u64 error_count;
+ u64 recovery_count;
+ u64 last_recovery_ts;
+};
+
+enum devlink_health_reporter_state {
+ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+ DEVLINK_HEALTH_REPORTER_STATE_ERROR,
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+ return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+ const char *reporter_name)
+{
+ struct devlink_health_reporter *reporter;
+
+ list_for_each_entry(reporter, &devlink->reporter_list, list)
+ if (!strcmp(reporter->ops->name, reporter_name))
+ return reporter;
+ return NULL;
+}
+
+/**
+ * devlink_health_reporter_create - create devlink health reporter
+ *
+ * @devlink: devlink
+ * @ops: ops
+ * @graceful_period: to avoid recovery loops, in msecs
+ * @auto_recover: auto recover when error occurs
+ * @priv: priv
+ */
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, bool auto_recover,
+ void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ mutex_lock(&devlink->lock);
+ if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
+ reporter = ERR_PTR(-EEXIST);
+ goto unlock;
+ }
+
+ if (WARN_ON(auto_recover && !ops->recover) ||
+ WARN_ON(graceful_period && !ops->recover)) {
+ reporter = ERR_PTR(-EINVAL);
+ goto unlock;
+ }
+
+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+ if (!reporter) {
+ reporter = ERR_PTR(-ENOMEM);
+ goto unlock;
+ }
+
+ reporter->priv = priv;
+ reporter->ops = ops;
+ reporter->devlink = devlink;
+ reporter->graceful_period = graceful_period;
+ reporter->auto_recover = auto_recover;
+ mutex_init(&reporter->dump_lock);
+ list_add_tail(&reporter->list, &devlink->reporter_list);
+unlock:
+ mutex_unlock(&devlink->lock);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+/**
+ * devlink_health_reporter_destroy - destroy devlink health reporter
+ *
+ * @reporter: devlink health reporter to destroy
+ */
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ mutex_lock(&reporter->devlink->lock);
+ list_del(&reporter->list);
+ mutex_unlock(&reporter->devlink->lock);
+ if (reporter->dump_fmsg)
+ devlink_fmsg_free(reporter->dump_fmsg);
+ kfree(reporter);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx)
+{
+ int err;
+
+ if (!reporter->ops->recover)
+ return -EOPNOTSUPP;
+
+ err = reporter->ops->recover(reporter, priv_ctx);
+ if (err)
+ return err;
+
+ reporter->recovery_count++;
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+ reporter->last_recovery_ts = jiffies;
+
+ return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+ if (!reporter->dump_fmsg)
+ return;
+ devlink_fmsg_free(reporter->dump_fmsg);
+ reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+ void *priv_ctx)
+{
+ int err;
+
+ if (!reporter->ops->dump)
+ return 0;
+
+ if (reporter->dump_fmsg)
+ return 0;
+
+ reporter->dump_fmsg = devlink_fmsg_alloc();
+ if (!reporter->dump_fmsg) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+ priv_ctx);
+ if (err)
+ goto dump_err;
+
+ err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ reporter->dump_ts = jiffies;
+
+ return 0;
+
+dump_err:
+ devlink_health_dump_clear(reporter);
+ return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx)
+{
+ struct devlink *devlink = reporter->devlink;
+
+ /* write a log message of the current error */
+ WARN_ON(!msg);
+ trace_devlink_health_report(devlink, reporter->ops->name, msg);
+ reporter->error_count++;
+
+ /* abort if the previous error wasn't recovered */
+ if (reporter->auto_recover &&
+ (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+ jiffies - reporter->last_recovery_ts <
+ msecs_to_jiffies(reporter->graceful_period))) {
+ trace_devlink_health_recover_aborted(devlink,
+ reporter->ops->name,
+ reporter->health_state,
+ jiffies -
+ reporter->last_recovery_ts);
+ return -ECANCELED;
+ }
+
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+ mutex_lock(&reporter->dump_lock);
+ /* store current dump of current error, for later analysis */
+ devlink_health_do_dump(reporter, priv_ctx);
+ mutex_unlock(&reporter->dump_lock);
+
+ if (reporter->auto_recover)
+ return devlink_health_reporter_recover(reporter, priv_ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *reporter_name;
+
+ if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+ return NULL;
+
+ reporter_name =
+ nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+ return devlink_health_reporter_find_by_name(devlink, reporter_name);
+}
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_health_reporter *reporter,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct nlattr *reporter_attr;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
+ if (!reporter_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ reporter->ops->name))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+ reporter->health_state))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
+ reporter->error_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
+ reporter->recovery_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period,
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+ reporter->auto_recover))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts),
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+
+ nla_nest_end(msg, reporter_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+reporter_nest_cancel:
+ nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct sk_buff *msg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ info->snd_portid, info->snd_seq,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_health_reporter *reporter;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(reporter, &devlink->reporter_list,
+ list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, devlink,
+ reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int
+devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->recover &&
+ (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+ return -EOPNOTSUPP;
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+ reporter->graceful_period =
+ nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+ reporter->auto_recover =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+ return 0;
+}
+
+static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ return devlink_health_reporter_recover(reporter, NULL);
+}
+
+static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct devlink_fmsg *fmsg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->diagnose)
+ return -EOPNOTSUPP;
+
+ fmsg = devlink_fmsg_alloc();
+ if (!fmsg)
+ return -ENOMEM;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ goto out;
+
+ err = reporter->ops->diagnose(reporter, fmsg);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_snd(fmsg, info,
+ DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
+
+out:
+ devlink_fmsg_free(fmsg);
+ return err;
+}
+
+static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ err = devlink_health_do_dump(reporter, NULL);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_snd(reporter->dump_fmsg, info,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 0);
+
+out:
+ mutex_unlock(&reporter->dump_lock);
+ return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ devlink_health_dump_clear(reporter);
+ mutex_unlock(&reporter->dump_lock);
+ return 0;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3904,6 +4863,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -4147,6 +5109,51 @@ static const struct genl_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
/* can be retrieved by unprivileged users */
},
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .doit = devlink_nl_cmd_health_reporter_get_doit,
+ .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+ .policy = devlink_nl_policy,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+ .doit = devlink_nl_cmd_health_reporter_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+ .doit = devlink_nl_cmd_health_reporter_recover_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+ .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+ .doit = devlink_nl_cmd_health_reporter_dump_get_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+ .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -4187,6 +5194,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->resource_list);
INIT_LIST_HEAD(&devlink->param_list);
INIT_LIST_HEAD(&devlink->region_list);
+ INIT_LIST_HEAD(&devlink->reporter_list);
mutex_init(&devlink->lock);
return devlink;
}