summaryrefslogtreecommitdiff
path: root/include/ras/ras_event.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ras/ras_event.h')
-rw-r--r--include/ras/ras_event.h293
1 files changed, 288 insertions, 5 deletions
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 21cdb0b7b0fb..eaecc3c5f772 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ras
#define TRACE_INCLUDE_FILE ras_event
@@ -8,6 +9,72 @@
#include <linux/tracepoint.h>
#include <linux/edac.h>
#include <linux/ktime.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+TRACE_EVENT(extlog_mem_event,
+ TP_PROTO(struct cper_sec_mem_err *mem,
+ u32 err_seq,
+ const guid_t *fru_id,
+ const char *fru_text,
+ u8 sev),
+
+ TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, etype)
+ __field(u8, sev)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(guid_t, fru_id)
+ __string(fru_text, fru_text)
+ __field_struct(struct cper_mem_err_compact, data)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+ if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+ __entry->etype = mem->error_type;
+ else
+ __entry->etype = ~0;
+ __entry->sev = sev;
+ if (mem->validation_bits & CPER_MEM_VALID_PA)
+ __entry->pa = mem->physical_addr;
+ else
+ __entry->pa = ~0ull;
+
+ if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+ __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
+ else
+ __entry->pa_mask_lsb = ~0;
+ __entry->fru_id = *fru_id;
+ __assign_str(fru_text);
+ cper_mem_err_pack(mem, &__entry->data);
+ ),
+
+ TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_mem_err_type_str(__entry->etype),
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_mem_err_unpack(p, &__entry->data),
+ &__entry->fru_id,
+ __get_str(fru_text))
+);
+#endif
/*
* Hardware Events Report
@@ -63,8 +130,8 @@ TRACE_EVENT(mc_event,
TP_fast_assign(
__entry->error_type = err_type;
- __assign_str(msg, error_msg);
- __assign_str(label, label);
+ __assign_str(msg);
+ __assign_str(label);
__entry->error_count = error_count;
__entry->mc_index = mc_index;
__entry->top_layer = top_layer;
@@ -73,14 +140,14 @@ TRACE_EVENT(mc_event,
__entry->address = address;
__entry->grain_bits = grain_bits;
__entry->syndrome = syndrome;
- __assign_str(driver_detail, driver_detail);
+ __assign_str(driver_detail);
),
TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)",
__entry->error_count,
mc_event_error_type(__entry->error_type),
__entry->error_count > 1 ? "s" : "",
- ((char *)__get_str(msg))[0] ? " " : "",
+ __get_str(msg)[0] ? " " : "",
__get_str(msg),
__get_str(label),
__entry->mc_index,
@@ -90,10 +157,226 @@ TRACE_EVENT(mc_event,
__entry->address,
1 << __entry->grain_bits,
__entry->syndrome,
- ((char *)__get_str(driver_detail))[0] ? " " : "",
+ __get_str(driver_detail)[0] ? " " : "",
__get_str(driver_detail))
);
+/*
+ * ARM Processor Events Report
+ *
+ * This event is generated when hardware detects an ARM processor error
+ * has occurred. UEFI 2.6 spec section N.2.4.4.
+ */
+#define APEIL "ARM Processor Err Info data len"
+#define APEID "ARM Processor Err Info raw data"
+#define APECIL "ARM Processor Err Context Info data len"
+#define APECID "ARM Processor Err Context Info raw data"
+#define VSEIL "Vendor Specific Err Info data len"
+#define VSEID "Vendor Specific Err Info raw data"
+TRACE_EVENT(arm_event,
+
+ TP_PROTO(const struct cper_sec_proc_arm *proc,
+ const u8 *pei_err,
+ const u32 pei_len,
+ const u8 *ctx_err,
+ const u32 ctx_len,
+ const u8 *oem,
+ const u32 oem_len,
+ u8 sev,
+ int cpu),
+
+ TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu),
+
+ TP_STRUCT__entry(
+ __field(u64, mpidr)
+ __field(u64, midr)
+ __field(u32, running_state)
+ __field(u32, psci_state)
+ __field(u8, affinity)
+ __field(u32, pei_len)
+ __dynamic_array(u8, pei_buf, pei_len)
+ __field(u32, ctx_len)
+ __dynamic_array(u8, ctx_buf, ctx_len)
+ __field(u32, oem_len)
+ __dynamic_array(u8, oem_buf, oem_len)
+ __field(u8, sev)
+ __field(int, cpu)
+ ),
+
+ TP_fast_assign(
+ if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
+ __entry->affinity = proc->affinity_level;
+ else
+ __entry->affinity = ~0;
+ if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
+ __entry->mpidr = proc->mpidr;
+ else
+ __entry->mpidr = 0ULL;
+ __entry->midr = proc->midr;
+ if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
+ __entry->running_state = proc->running_state;
+ __entry->psci_state = proc->psci_state;
+ } else {
+ __entry->running_state = ~0;
+ __entry->psci_state = ~0;
+ }
+ __entry->pei_len = pei_len;
+ memcpy(__get_dynamic_array(pei_buf), pei_err, pei_len);
+ __entry->ctx_len = ctx_len;
+ memcpy(__get_dynamic_array(ctx_buf), ctx_err, ctx_len);
+ __entry->oem_len = oem_len;
+ memcpy(__get_dynamic_array(oem_buf), oem, oem_len);
+ __entry->sev = sev;
+ __entry->cpu = cpu;
+ ),
+
+ TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
+ "running state: %d; PSCI state: %d; "
+ "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s",
+ __entry->cpu,
+ __entry->sev,
+ __entry->affinity, __entry->mpidr, __entry->midr,
+ __entry->running_state, __entry->psci_state,
+ APEIL, __entry->pei_len, APEID,
+ __print_hex(__get_dynamic_array(pei_buf), __entry->pei_len),
+ APECIL, __entry->ctx_len, APECID,
+ __print_hex(__get_dynamic_array(ctx_buf), __entry->ctx_len),
+ VSEIL, __entry->oem_len, VSEID,
+ __print_hex(__get_dynamic_array(oem_buf), __entry->oem_len))
+);
+
+/*
+ * Non-Standard Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(non_standard_event,
+
+ TP_PROTO(const guid_t *sec_type,
+ const guid_t *fru_id,
+ const char *fru_text,
+ const u8 sev,
+ const u8 *err,
+ const u32 len),
+
+ TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+ TP_STRUCT__entry(
+ __array(char, sec_type, UUID_SIZE)
+ __array(char, fru_id, UUID_SIZE)
+ __string(fru_text, fru_text)
+ __field(u8, sev)
+ __field(u32, len)
+ __dynamic_array(u8, buf, len)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->sec_type, sec_type, UUID_SIZE);
+ memcpy(__entry->fru_id, fru_id, UUID_SIZE);
+ __assign_str(fru_text);
+ __entry->sev = sev;
+ __entry->len = len;
+ memcpy(__get_dynamic_array(buf), err, len);
+ ),
+
+ TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+ __entry->sev, __entry->sec_type,
+ __entry->fru_id, __get_str(fru_text),
+ __entry->len,
+ __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+#ifdef CONFIG_PCIEAER
+/*
+ * PCIe AER Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a PCIe device. The event report has
+ * the following structure:
+ *
+ * char * dev_name - The name of the slot where the device resides
+ * ([domain:]bus:device.function).
+ * u32 status - Either the correctable or uncorrectable register
+ * indicating what error or errors have been seen
+ * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define aer_correctable_errors \
+ {PCI_ERR_COR_RCVR, "Receiver Error"}, \
+ {PCI_ERR_COR_BAD_TLP, "Bad TLP"}, \
+ {PCI_ERR_COR_BAD_DLLP, "Bad DLLP"}, \
+ {PCI_ERR_COR_REP_ROLL, "RELAY_NUM Rollover"}, \
+ {PCI_ERR_COR_REP_TIMER, "Replay Timer Timeout"}, \
+ {PCI_ERR_COR_ADV_NFAT, "Advisory Non-Fatal Error"}, \
+ {PCI_ERR_COR_INTERNAL, "Corrected Internal Error"}, \
+ {PCI_ERR_COR_LOG_OVER, "Header Log Overflow"}
+
+#define aer_uncorrectable_errors \
+ {PCI_ERR_UNC_UND, "Undefined"}, \
+ {PCI_ERR_UNC_DLP, "Data Link Protocol Error"}, \
+ {PCI_ERR_UNC_SURPDN, "Surprise Down Error"}, \
+ {PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"}, \
+ {PCI_ERR_UNC_FCP, "Flow Control Protocol Error"}, \
+ {PCI_ERR_UNC_COMP_TIME, "Completion Timeout"}, \
+ {PCI_ERR_UNC_COMP_ABORT,"Completer Abort"}, \
+ {PCI_ERR_UNC_UNX_COMP, "Unexpected Completion"}, \
+ {PCI_ERR_UNC_RX_OVER, "Receiver Overflow"}, \
+ {PCI_ERR_UNC_MALF_TLP, "Malformed TLP"}, \
+ {PCI_ERR_UNC_ECRC, "ECRC Error"}, \
+ {PCI_ERR_UNC_UNSUP, "Unsupported Request Error"}, \
+ {PCI_ERR_UNC_ACSV, "ACS Violation"}, \
+ {PCI_ERR_UNC_INTN, "Uncorrectable Internal Error"},\
+ {PCI_ERR_UNC_MCBTLP, "MC Blocked TLP"}, \
+ {PCI_ERR_UNC_ATOMEG, "AtomicOp Egress Blocked"}, \
+ {PCI_ERR_UNC_TLPPRE, "TLP Prefix Blocked Error"}
+
+TRACE_EVENT(aer_event,
+ TP_PROTO(const char *dev_name,
+ const u32 status,
+ const u8 severity,
+ const u8 tlp_header_valid,
+ struct pcie_tlp_log *tlp),
+
+ TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
+
+ TP_STRUCT__entry(
+ __string( dev_name, dev_name )
+ __field( u32, status )
+ __field( u8, severity )
+ __field( u8, tlp_header_valid)
+ __array( u32, tlp_header, PCIE_STD_MAX_TLP_HEADERLOG)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name);
+ __entry->status = status;
+ __entry->severity = severity;
+ __entry->tlp_header_valid = tlp_header_valid;
+ if (tlp_header_valid) {
+ int i;
+
+ for (i = 0; i < PCIE_STD_MAX_TLP_HEADERLOG; i++)
+ __entry->tlp_header[i] = tlp->dw[i];
+ }
+ ),
+
+ TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
+ __get_str(dev_name),
+ __entry->severity == AER_CORRECTABLE ? "Corrected" :
+ __entry->severity == AER_FATAL ?
+ "Fatal" : "Uncorrected, non-fatal",
+ __entry->severity == AER_CORRECTABLE ?
+ __print_flags(__entry->status, "|", aer_correctable_errors) :
+ __print_flags(__entry->status, "|", aer_uncorrectable_errors),
+ __entry->tlp_header_valid ?
+ __print_array(__entry->tlp_header, PCIE_STD_MAX_TLP_HEADERLOG, 4) :
+ "Not available")
+);
+#endif /* CONFIG_PCIEAER */
#endif /* _TRACE_HW_EVENT_MC_H */
/* This part must be outside protection */