summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
diff options
context:
space:
mode:
authorJiaran Zhang <zhangjiaran@huawei.com>2021-06-08 21:08:28 +0800
committerDavid S. Miller <davem@davemloft.net>2021-06-08 14:43:30 -0700
commit2e2deee7618b062efe3aba9fcb017dadcf148819 (patch)
tree9ae5d5c225a936965375dca2dc3c21ff4a178728 /drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
parent17f59244029bf9c0673725efdd0386ed95e127a7 (diff)
net: hns3: add the RAS compatibility adaptation solution
To adapt to hardware modification and ensure that the driver is compatible with the original error handling content, we need to add the RAS compatibility adaptation solution. Add a processing branch to the driver during error handling. In the new processing branch, NIC fault information is integrated by the IMP. An interaction command is added between the driver and IMP to query and clear the fault source and interrupt source. The IMP integrates error information and reports the highest reset level to the driver. Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com> Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c')
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c320
1 files changed, 292 insertions, 28 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 540dd15d7771..36f8055bd859 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -631,6 +631,98 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
{ /* sentinel */ }
};
+static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
+ {
+ .module_id = MODULE_NONE,
+ .msg = "MODULE_NONE"
+ }, {
+ .module_id = MODULE_BIOS_COMMON,
+ .msg = "MODULE_BIOS_COMMON"
+ }, {
+ .module_id = MODULE_GE,
+ .msg = "MODULE_GE"
+ }, {
+ .module_id = MODULE_IGU_EGU,
+ .msg = "MODULE_IGU_EGU"
+ }, {
+ .module_id = MODULE_LGE,
+ .msg = "MODULE_LGE"
+ }, {
+ .module_id = MODULE_NCSI,
+ .msg = "MODULE_NCSI"
+ }, {
+ .module_id = MODULE_PPP,
+ .msg = "MODULE_PPP"
+ }, {
+ .module_id = MODULE_QCN,
+ .msg = "MODULE_QCN"
+ }, {
+ .module_id = MODULE_RCB_RX,
+ .msg = "MODULE_RCB_RX"
+ }, {
+ .module_id = MODULE_RTC,
+ .msg = "MODULE_RTC"
+ }, {
+ .module_id = MODULE_SSU,
+ .msg = "MODULE_SSU"
+ }, {
+ .module_id = MODULE_TM,
+ .msg = "MODULE_TM"
+ }, {
+ .module_id = MODULE_RCB_TX,
+ .msg = "MODULE_RCB_TX"
+ }, {
+ .module_id = MODULE_TXDMA,
+ .msg = "MODULE_TXDMA"
+ }, {
+ .module_id = MODULE_MASTER,
+ .msg = "MODULE_MASTER"
+ }
+};
+
+static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
+ {
+ .type_id = NONE_ERROR,
+ .msg = "none_error"
+ }, {
+ .type_id = FIFO_ERROR,
+ .msg = "fifo_error"
+ }, {
+ .type_id = MEMORY_ERROR,
+ .msg = "memory_error"
+ }, {
+ .type_id = POISON_ERROR,
+ .msg = "poison_error"
+ }, {
+ .type_id = MSIX_ECC_ERROR,
+ .msg = "msix_ecc_error"
+ }, {
+ .type_id = TQP_INT_ECC_ERROR,
+ .msg = "tqp_int_ecc_error"
+ }, {
+ .type_id = PF_ABNORMAL_INT_ERROR,
+ .msg = "pf_abnormal_int_error"
+ }, {
+ .type_id = MPF_ABNORMAL_INT_ERROR,
+ .msg = "mpf_abnormal_int_error"
+ }, {
+ .type_id = COMMON_ERROR,
+ .msg = "common_error"
+ }, {
+ .type_id = PORT_ERROR,
+ .msg = "port_error"
+ }, {
+ .type_id = ETS_ERROR,
+ .msg = "ets_error"
+ }, {
+ .type_id = NCSI_ERROR,
+ .msg = "ncsi_error"
+ }, {
+ .type_id = GLB_ERROR,
+ .msg = "glb_error"
+ }
+};
+
static void hclge_log_error(struct device *dev, char *reg,
const struct hclge_hw_error *err,
u32 err_sts, unsigned long *reset_requests)
@@ -1892,11 +1984,8 @@ static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
unsigned long *reset_requests)
{
- struct hclge_mac_tnl_stats mac_tnl_stats;
- struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
struct hclge_desc *desc;
- u32 status;
int ret;
/* query the number of bds for the MSIx int status */
@@ -1919,29 +2008,7 @@ static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
if (ret)
goto msi_error;
- /* query and clear mac tnl interruptions */
- hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
- true);
- ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
- if (ret) {
- dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
- goto msi_error;
- }
-
- status = le32_to_cpu(desc->data[0]);
- if (status) {
- /* When mac tnl interrupt occurs, we record current time and
- * register status here in a fifo, then clear the status. So
- * that if link status changes suddenly at some time, we can
- * query them by debugfs.
- */
- mac_tnl_stats.time = local_clock();
- mac_tnl_stats.status = status;
- kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
- ret = hclge_clear_mac_tnl_int(hdev);
- if (ret)
- dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
- }
+ ret = hclge_handle_mac_tnl(hdev);
msi_error:
kfree(desc);
@@ -1963,10 +2030,43 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
return hclge_handle_all_hw_msix_error(hdev, reset_requests);
}
-void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
+int hclge_handle_mac_tnl(struct hclge_dev *hdev)
{
-#define HCLGE_DESC_NO_DATA_LEN 8
+ struct hclge_mac_tnl_stats mac_tnl_stats;
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_desc desc;
+ u32 status;
+ int ret;
+ /* query and clear mac tnl interruptions */
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
+ return ret;
+ }
+
+ status = le32_to_cpu(desc.data[0]);
+ if (status) {
+ /* When mac tnl interrupt occurs, we record current time and
+ * register status here in a fifo, then clear the status. So
+ * that if link status changes suddenly at some time, we can
+ * query them by debugfs.
+ */
+ mac_tnl_stats.time = local_clock();
+ mac_tnl_stats.status = status;
+ kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
+ ret = hclge_clear_mac_tnl_int(hdev);
+ if (ret)
+ dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
+ ret);
+ }
+
+ return ret;
+}
+
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
+{
struct hclge_dev *hdev = ae_dev->priv;
struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
@@ -2015,3 +2115,167 @@ void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
msi_error:
kfree(desc);
}
+
+static void
+hclge_handle_error_type_reg_log(struct device *dev,
+ struct hclge_mod_err_info *mod_info,
+ struct hclge_type_reg_err_info *type_reg_info)
+{
+#define HCLGE_ERR_TYPE_MASK 0x7F
+#define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
+
+ u8 mod_id, total_module, type_id, total_type, i, is_ras;
+
+ mod_id = mod_info->mod_id;
+ type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
+ is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;
+
+ total_module = ARRAY_SIZE(hclge_hw_module_id_st);
+ total_type = ARRAY_SIZE(hclge_hw_type_id_st);
+
+ if (mod_id < total_module && type_id < total_type)
+ dev_err(dev,
+ "found %s %s, is %s error.\n",
+ hclge_hw_module_id_st[mod_id].msg,
+ hclge_hw_type_id_st[type_id].msg,
+ is_ras ? "ras" : "msix");
+ else
+ dev_err(dev,
+ "unknown module[%u] or type[%u].\n", mod_id, type_id);
+
+ dev_err(dev, "reg_value:\n");
+ for (i = 0; i < type_reg_info->reg_num; i++)
+ dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
+}
+
+static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
+ const u32 *buf, u32 buf_size)
+{
+ struct hclge_type_reg_err_info *type_reg_info;
+ struct hclge_dev *hdev = ae_dev->priv;
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_mod_err_info *mod_info;
+ struct hclge_sum_err_info *sum_info;
+ u8 mod_num, err_num, i;
+ u32 offset = 0;
+
+ sum_info = (struct hclge_sum_err_info *)&buf[offset++];
+ if (sum_info->reset_type &&
+ sum_info->reset_type != HNAE3_NONE_RESET)
+ set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
+ mod_num = sum_info->mod_num;
+
+ while (mod_num--) {
+ if (offset >= buf_size) {
+ dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
+ offset, buf_size);
+ return;
+ }
+ mod_info = (struct hclge_mod_err_info *)&buf[offset++];
+ err_num = mod_info->err_num;
+
+ for (i = 0; i < err_num; i++) {
+ if (offset >= buf_size) {
+ dev_err(dev,
+ "The offset(%u) exceeds buf size(%u).\n",
+ offset, buf_size);
+ return;
+ }
+
+ type_reg_info = (struct hclge_type_reg_err_info *)
+ &buf[offset++];
+ hclge_handle_error_type_reg_log(dev, mod_info,
+ type_reg_info);
+
+ offset += type_reg_info->reg_num;
+ }
+ }
+}
+
+static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
+{
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_desc desc_bd;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+ if (ret) {
+ dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
+ return ret;
+ }
+
+ *bd_num = le32_to_cpu(desc_bd.data[0]);
+ if (!(*bd_num)) {
+ dev_err(dev, "The value of bd_num is 0!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hclge_query_all_err_info(struct hclge_dev *hdev,
+ struct hclge_desc *desc, u32 bd_num)
+{
+ struct device *dev = &hdev->pdev->dev;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
+ ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
+ if (ret)
+ dev_err(dev, "failed to query error info, ret = %d.\n", ret);
+
+ return ret;
+}
+
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
+{
+ u32 bd_num, desc_len, buf_len, buf_size, i;
+ struct hclge_dev *hdev = ae_dev->priv;
+ struct hclge_desc *desc;
+ __le32 *desc_data;
+ u32 *buf;
+ int ret;
+
+ ret = hclge_query_all_err_bd_num(hdev, &bd_num);
+ if (ret)
+ goto out;
+
+ desc_len = bd_num * sizeof(struct hclge_desc);
+ desc = kzalloc(desc_len, GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = hclge_query_all_err_info(hdev, desc, bd_num);
+ if (ret)
+ goto err_desc;
+
+ buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
+ buf_size = buf_len / sizeof(u32);
+
+ desc_data = kzalloc(buf_len, GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto err_buf_alloc;
+ }
+
+ memcpy(desc_data, &desc[0].data[0], buf_len);
+ for (i = 0; i < buf_size; i++)
+ buf[i] = le32_to_cpu(desc_data[i]);
+
+ hclge_handle_error_module_log(ae_dev, buf, buf_size);
+ kfree(buf);
+
+err_buf_alloc:
+ kfree(desc_data);
+err_desc:
+ kfree(desc);
+out:
+ return ret;
+}