From 4a20bc3e207488064e08fc5d7220d6acf95c80dd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 8 Dec 2022 09:02:00 -0800 Subject: cxl/pci: Move tracepoint definitions to drivers/cxl/core/ CXL is using tracepoints for reporting RAS capability register payloads for AER events, and has plans to use tracepoints for the output payload of Get Poison List and Get Event Records commands. For organization purposes it would be nice to keep those all under a single + local CXL trace system. This also organization also potentially helps in the future when CXL drivers expand beyond generic memory expanders, however that would also entail a move away from the expander-specific cxl_dev_state context, save that for later. Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl' trace system, however, it is unlikely that a single platform will ever load both drivers simultaneously. Cc: Steven Rostedt Tested-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167051869176.436579.9728373544811641087.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/Makefile | 3 ++ drivers/cxl/core/pci.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/trace.c | 5 +++ drivers/cxl/core/trace.h | 109 ++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 + drivers/cxl/cxlpci.h | 3 ++ drivers/cxl/pci.c | 111 --------------------------------------------- 7 files changed, 234 insertions(+), 111 deletions(-) create mode 100644 drivers/cxl/core/trace.c create mode 100644 drivers/cxl/core/trace.h (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 79c7257f4107..ca4ae31d8f57 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o obj-$(CONFIG_CXL_SUSPEND) += suspend.o ccflags-y += -I$(srctree)/drivers/cxl +CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src) + cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o @@ -10,4 +12,5 @@ cxl_core-y += memdev.o cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o +cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 57764e9cd19d..1d1492440287 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -9,6 +9,7 @@ #include #include #include "core.h" +#include "trace.h" /** * DOC: cxl core pci @@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port) } } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + void __iomem *addr; + u32 status; + + if (!cxlds->regs.ras) + return; + + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(dev, status); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!cxlds->regs.ras) + return false; + + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr))); + } else { + fe = status; + } + + header_log_copy(cxlds, hl); + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_report_and_clear(cxlds); + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c new file mode 100644 index 000000000000..29ae7ce81dc5 --- /dev/null +++ b/drivers/cxl/core/trace.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h new file mode 100644 index 000000000000..20ca2fe2ca8e --- /dev/null +++ b/drivers/cxl/core/trace.h @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_EVENTS_H + +#include +#include + +#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) +#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) +#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2) +#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3) +#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4) +#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5) +#define CXL_RAS_UC_MEM_BE_PARITY BIT(6) +#define CXL_RAS_UC_MEM_DATA_ECC BIT(7) +#define CXL_RAS_UC_REINIT_THRESH BIT(8) +#define CXL_RAS_UC_RSVD_ENCODE BIT(9) +#define CXL_RAS_UC_POISON BIT(10) +#define CXL_RAS_UC_RECV_OVERFLOW BIT(11) +#define CXL_RAS_UC_INTERNAL_ERR BIT(14) +#define CXL_RAS_UC_IDE_TX_ERR BIT(15) +#define CXL_RAS_UC_IDE_RX_ERR BIT(16) + +#define show_uc_errs(status) __print_flags(status, " | ", \ + { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \ + { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \ + { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \ + { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ + { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \ + { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \ + { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \ + { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \ + { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \ + { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \ + { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \ + { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \ + { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \ + { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \ + { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ +) + +TRACE_EVENT(cxl_aer_uncorrectable_error, + TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl), + TP_ARGS(dev, status, fe, hl), + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u32, status) + __field(u32, first_error) + __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) + ), + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->status = status; + __entry->first_error = fe; + /* + * Embed the 512B headerlog data for user app retrieval and + * parsing, but no need to print this in the trace buffer. + */ + memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); + ), + TP_printk("%s: status: '%s' first_error: '%s'", + __get_str(dev_name), + show_uc_errs(__entry->status), + show_uc_errs(__entry->first_error) + ) +); + +#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0) +#define CXL_RAS_CE_MEM_DATA_ECC BIT(1) +#define CXL_RAS_CE_CRC_THRESH BIT(2) +#define CLX_RAS_CE_RETRY_THRESH BIT(3) +#define CXL_RAS_CE_CACHE_POISON BIT(4) +#define CXL_RAS_CE_MEM_POISON BIT(5) +#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6) + +#define show_ce_errs(status) __print_flags(status, " | ", \ + { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ + { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \ + { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \ + { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \ + { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \ + { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \ + { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ +) + +TRACE_EVENT(cxl_aer_correctable_error, + TP_PROTO(const struct device *dev, u32 status), + TP_ARGS(dev, status), + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u32, status) + ), + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->status = status; + ), + TP_printk("%s: status: '%s'", + __get_str(dev_name), show_ce_errs(__entry->status) + ) +); + +#endif /* _CXL_EVENTS_H */ + +#define TRACE_INCLUDE_FILE trace +#include diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1b1cf459ac77..aa3af3bb73b2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -140,6 +140,8 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0) #define CXL_RAS_HEADER_LOG_OFFSET 0x18 #define CXL_RAS_CAPABILITY_LENGTH 0x58 +#define CXL_HEADERLOG_SIZE SZ_512 +#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32) /* CXL 2.0 8.2.8.1 Device Capabilities Array Register */ #define CXLDEV_CAP_ARRAY_OFFSET 0x0 diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 920909791bb9..77dbdb980b12 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -66,4 +66,7 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); void read_cdat_data(struct cxl_port *port); +void cxl_cor_error_detected(struct pci_dev *pdev); +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 33083a522fd1..3a66aadb4df0 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -14,8 +14,6 @@ #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" -#define CREATE_TRACE_POINTS -#include /** * DOC: cxl pci @@ -514,96 +512,6 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) -{ - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!cxlds->regs.ras) - return false; - - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr))); - } else { - fe = status; - } - - header_log_copy(cxlds, hl); - trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_report_and_clear(cxlds); - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} - static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); @@ -628,25 +536,6 @@ static void cxl_error_resume(struct pci_dev *pdev) dev->driver ? "successful" : "failed"); } -static void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - void __iomem *addr; - u32 status; - - if (!cxlds->regs.ras) - return; - - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(dev, status); - } -} - static const struct pci_error_handlers cxl_error_handlers = { .error_detected = cxl_error_detected, .slot_reset = cxl_slot_reset, -- cgit From e520d52d7cabf6817b2c56d2681e9618bc5540ae Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 Dec 2022 10:38:36 -0800 Subject: cxl/region: Only warn about cpu_cache_invalidate_memregion() once No need for more than once per module load. Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221215183836.24136-1-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 02f28da519e3..c11a6ab5e48d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1967,7 +1967,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) if (!cpu_cache_has_invalidate_memregion()) { if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { - dev_warn( + dev_warn_once( &cxlr->dev, "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); -- cgit From 852db33c6c180a48268eca52e25b267ed32de3ab Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 3 Jan 2023 22:01:50 +0100 Subject: cxl/pci: Show opcode in debug messages when sending a command For debugging it is very helpful to see which commands are sent. Add it to the debug message. Signed-off-by: Robert Richter Link: https://lore.kernel.org/r/20230103210151.1126873-1-rrichter@amd.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 3a66aadb4df0..2bbebbc7e032 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -160,7 +160,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); /* #4 */ - dev_dbg(dev, "Sending command\n"); + dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); writel(CXLDEV_MBOX_CTRL_DOORBELL, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); -- cgit From af3ea9ab61d728d5a8be01bbec6d5cf7551b9600 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Dec 2022 17:33:38 -0800 Subject: cxl/region: Clarify when a cxld->commit() callback is mandatory Both cxl_switch_decoders() and cxl_endpoint_decoders() are considered by cxl_region_decode_commit(). Flag cases where cxl_switch_decoders with multiple targets, or cxl_endpoint_decoders do not have a commit callback set. The switch case is unlikely to happen since switches are only enumerated by the CXL core, but the endpoint case may support decoders defined by drivers outside of drivers/cxl, like accerator drivers. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167124081824.1626103.1555704405392757219.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c11a6ab5e48d..60828d01972a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -156,6 +156,22 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) return 0; } +static int commit_decoder(struct cxl_decoder *cxld) +{ + struct cxl_switch_decoder *cxlsd = NULL; + + if (cxld->commit) + return cxld->commit(cxld); + + if (is_switch_decoder(&cxld->dev)) + cxlsd = to_cxl_switch_decoder(&cxld->dev); + + if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1, + "->commit() is required\n")) + return -ENXIO; + return 0; +} + static int cxl_region_decode_commit(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -174,8 +190,7 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - if (cxld->commit) - rc = cxld->commit(cxld); + rc = commit_decoder(cxld); if (rc) break; } -- cgit From 172738bbccdb4ef76bdd72fc72a315c741c39161 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 25 Jan 2023 15:32:57 -0800 Subject: cxl/port: Link the 'parent_dport' in portX/ and endpointX/ sysfs Similar to the justification in: 1b58b4cac6fc ("cxl/port: Record parent dport when adding ports") ...userspace wants to know the routing information for ports for calculating the memdev order for region creation among other things. Cache the information the kernel discovers at enumeration time in a 'parent_dport' attribute to save userspace the time of trawling sysfs to recover the same information. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167124082375.1626103.6047000000121298560.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b631a0520456..410c036c09fa 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -583,6 +583,29 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) return devm_add_action_or_reset(host, cxl_unlink_uport, port); } +static void cxl_unlink_parent_dport(void *_port) +{ + struct cxl_port *port = _port; + + sysfs_remove_link(&port->dev.kobj, "parent_dport"); +} + +static int devm_cxl_link_parent_dport(struct device *host, + struct cxl_port *port, + struct cxl_dport *parent_dport) +{ + int rc; + + if (!parent_dport) + return 0; + + rc = sysfs_create_link(&port->dev.kobj, &parent_dport->dport->kobj, + "parent_dport"); + if (rc) + return rc; + return devm_add_action_or_reset(host, cxl_unlink_parent_dport, port); +} + static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport, @@ -692,6 +715,10 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, if (rc) return ERR_PTR(rc); + rc = devm_cxl_link_parent_dport(host, port, parent_dport); + if (rc) + return ERR_PTR(rc); + return port; err: @@ -1164,6 +1191,7 @@ static void delete_endpoint(void *data) device_lock(parent); if (parent->driver && !endpoint->dead) { + devm_release_action(parent, cxl_unlink_parent_dport, endpoint); devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } @@ -1194,6 +1222,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); */ static void delete_switch_port(struct cxl_port *port) { + devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port); devm_release_action(port->dev.parent, cxl_unlink_uport, port); devm_release_action(port->dev.parent, unregister_port, port); } -- cgit From 6ebe28f9ec7228e1a35df6074695ac11d7cdcf68 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 17 Jan 2023 21:53:36 -0800 Subject: cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/trace.h | 121 +++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 12 ++++ drivers/cxl/cxlmem.h | 79 +++++++++++++++++++++++++ drivers/cxl/pci.c | 33 +++++++++++ 5 files changed, 392 insertions(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index b03fba212799..c329c1e67a2c 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -8,6 +8,7 @@ #include #include "core.h" +#include "trace.h" static bool cxl_raw_allow_all; @@ -717,6 +718,152 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); +static int cxl_clear_event_record(struct cxl_dev_state *cxlds, + enum cxl_event_log_type log, + struct cxl_get_event_payload *get_pl) +{ + struct cxl_mbox_clear_event_payload *payload; + u16 total = le16_to_cpu(get_pl->record_count); + u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES; + size_t pl_size = struct_size(payload, handles, max_handles); + struct cxl_mbox_cmd mbox_cmd; + u16 cnt; + int rc = 0; + int i; + + /* Payload size may limit the max handles */ + if (pl_size > cxlds->payload_size) { + max_handles = (cxlds->payload_size - sizeof(*payload)) / + sizeof(__le16); + pl_size = struct_size(payload, handles, max_handles); + } + + payload = kvzalloc(pl_size, GFP_KERNEL); + if (!payload) + return -ENOMEM; + + *payload = (struct cxl_mbox_clear_event_payload) { + .event_log = log, + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD, + .payload_in = payload, + .size_in = pl_size, + }; + + /* + * Clear Event Records uses u8 for the handle cnt while Get Event + * Record can return up to 0xffff records. + */ + i = 0; + for (cnt = 0; cnt < total; cnt++) { + payload->handles[i++] = get_pl->records[cnt].hdr.handle; + dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n", + log, le16_to_cpu(payload->handles[i])); + + if (i == max_handles) { + payload->nr_recs = i; + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + goto free_pl; + i = 0; + } + } + + /* Clear what is left if any */ + if (i) { + payload->nr_recs = i; + mbox_cmd.size_in = struct_size(payload, handles, i); + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + goto free_pl; + } + +free_pl: + kvfree(payload); + return rc; +} + +static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, + enum cxl_event_log_type type) +{ + struct cxl_get_event_payload *payload; + struct cxl_mbox_cmd mbox_cmd; + u8 log_type = type; + u16 nr_rec; + + mutex_lock(&cxlds->event.log_lock); + payload = cxlds->event.buf; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, + .payload_in = &log_type, + .size_in = sizeof(log_type), + .payload_out = payload, + .size_out = cxlds->payload_size, + .min_out = struct_size(payload, records, 0), + }; + + do { + int rc, i; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) { + dev_err_ratelimited(cxlds->dev, + "Event log '%d': Failed to query event records : %d", + type, rc); + break; + } + + nr_rec = le16_to_cpu(payload->record_count); + if (!nr_rec) + break; + + for (i = 0; i < nr_rec; i++) + trace_cxl_generic_event(cxlds->dev, type, + &payload->records[i]); + + if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) + trace_cxl_overflow(cxlds->dev, type, payload); + + rc = cxl_clear_event_record(cxlds, type, payload); + if (rc) { + dev_err_ratelimited(cxlds->dev, + "Event log '%d': Failed to clear events : %d", + type, rc); + break; + } + } while (nr_rec); + + mutex_unlock(&cxlds->event.log_lock); +} + +/** + * cxl_mem_get_event_records - Get Event Records from the device + * @cxlds: The device data for the operation + * + * Retrieve all event records available on the device, report them as trace + * events, and clear them. + * + * See CXL rev 3.0 @8.2.9.2.2 Get Event Records + * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records + */ +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status) +{ + dev_dbg(cxlds->dev, "Reading event logs: %x\n", status); + + if (status & CXLDEV_EVENT_STATUS_FATAL) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL); + if (status & CXLDEV_EVENT_STATUS_FAIL) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL); + if (status & CXLDEV_EVENT_STATUS_WARN) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN); + if (status & CXLDEV_EVENT_STATUS_INFO) + cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL); + /** * cxl_mem_get_partition_info - Get partition info * @cxlds: The device data for the operation @@ -868,6 +1015,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev) } mutex_init(&cxlds->mbox_mutex); + mutex_init(&cxlds->event.log_lock); cxlds->dev = dev; return cxlds; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 20ca2fe2ca8e..1805936e2e6a 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -6,8 +6,11 @@ #if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) #define _CXL_EVENTS_H -#include #include +#include + +#include +#include #define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) #define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) @@ -103,6 +106,122 @@ TRACE_EVENT(cxl_aer_correctable_error, ) ); +#define cxl_event_log_type_str(type) \ + __print_symbolic(type, \ + { CXL_EVENT_TYPE_INFO, "Informational" }, \ + { CXL_EVENT_TYPE_WARN, "Warning" }, \ + { CXL_EVENT_TYPE_FAIL, "Failure" }, \ + { CXL_EVENT_TYPE_FATAL, "Fatal" }) + +TRACE_EVENT(cxl_overflow, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_get_event_payload *payload), + + TP_ARGS(dev, log, payload), + + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(int, log) + __field(u64, first_ts) + __field(u64, last_ts) + __field(u16, count) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->log = log; + __entry->count = le16_to_cpu(payload->overflow_err_count); + __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp); + __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp); + ), + + TP_printk("%s: log=%s : %u records from %llu to %llu", + __get_str(dev_name), cxl_event_log_type_str(__entry->log), + __entry->count, __entry->first_ts, __entry->last_ts) + +); + +/* + * Common Event Record Format + * CXL 3.0 section 8.2.9.2.1; Table 8-42 + */ +#define CXL_EVENT_RECORD_FLAG_PERMANENT BIT(2) +#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3) +#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4) +#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5) +#define show_hdr_flags(flags) __print_flags(flags, " | ", \ + { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \ + { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \ + { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \ + { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \ +) + +/* + * Define macros for the common header of each CXL event. + * + * Tracepoints using these macros must do 3 things: + * + * 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry + * 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign; + * pass the dev, log, and CXL event header + * 3) Use CXL_EVT_TP_printk() instead of TP_printk() + * + * See the generic_event tracepoint as an example. + */ +#define CXL_EVT_TP_entry \ + __string(dev_name, dev_name(dev)) \ + __field(int, log) \ + __field_struct(uuid_t, hdr_uuid) \ + __field(u32, hdr_flags) \ + __field(u16, hdr_handle) \ + __field(u16, hdr_related_handle) \ + __field(u64, hdr_timestamp) \ + __field(u8, hdr_length) \ + __field(u8, hdr_maint_op_class) + +#define CXL_EVT_TP_fast_assign(dev, l, hdr) \ + __assign_str(dev_name, dev_name(dev)); \ + __entry->log = (l); \ + memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ + __entry->hdr_length = (hdr).length; \ + __entry->hdr_flags = get_unaligned_le24((hdr).flags); \ + __entry->hdr_handle = le16_to_cpu((hdr).handle); \ + __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \ + __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \ + __entry->hdr_maint_op_class = (hdr).maint_op_class + +#define CXL_EVT_TP_printk(fmt, ...) \ + TP_printk("%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \ + "handle=%x related_handle=%x maint_op_class=%u" \ + " : " fmt, \ + __get_str(dev_name), cxl_event_log_type_str(__entry->log), \ + __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ + show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ + __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ + ##__VA_ARGS__) + +TRACE_EVENT(cxl_generic_event, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_record_raw *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + __array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); + ), + + CXL_EVT_TP_printk("%s", + __print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH)) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index aa3af3bb73b2..5974d1082210 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -156,6 +156,18 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3 #define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000 +/* CXL 3.0 8.2.8.3.1 Event Status Register */ +#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00 +#define CXLDEV_EVENT_STATUS_INFO BIT(0) +#define CXLDEV_EVENT_STATUS_WARN BIT(1) +#define CXLDEV_EVENT_STATUS_FAIL BIT(2) +#define CXLDEV_EVENT_STATUS_FATAL BIT(3) + +#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \ + CXLDEV_EVENT_STATUS_WARN | \ + CXLDEV_EVENT_STATUS_FAIL | \ + CXLDEV_EVENT_STATUS_FATAL) + /* CXL 2.0 8.2.8.4 Mailbox Registers */ #define CXLDEV_MBOX_CAPS_OFFSET 0x00 #define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index ab138004f644..b74013a5f528 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -4,6 +4,7 @@ #define __CXL_MEM_H__ #include #include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -193,6 +194,17 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; +/** + * struct cxl_event_state - Event log driver state + * + * @event_buf: Buffer to receive event data + * @event_log_lock: Serialize event_buf and log use + */ +struct cxl_event_state { + struct cxl_get_event_payload *buf; + struct mutex log_lock; +}; + /** * struct cxl_dev_state - The driver device state * @@ -266,12 +278,16 @@ struct cxl_dev_state { struct xarray doe_mbs; + struct cxl_event_state event; + int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); }; enum cxl_opcode { CXL_MBOX_OP_INVALID = 0x0000, CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, + CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100, + CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101, CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, @@ -347,6 +363,68 @@ struct cxl_mbox_identify { u8 qos_telemetry_caps; } __packed; +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + uuid_t id; + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_record_raw { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * Get Event Records output payload + * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 + */ +#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0) +#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1) +struct cxl_get_event_payload { + u8 flags; + u8 reserved1; + __le16 overflow_err_count; + __le64 first_overflow_timestamp; + __le64 last_overflow_timestamp; + __le16 record_count; + u8 reserved2[10]; + struct cxl_event_record_raw records[]; +} __packed; + +/* + * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 + */ +enum cxl_event_log_type { + CXL_EVENT_TYPE_INFO = 0x00, + CXL_EVENT_TYPE_WARN, + CXL_EVENT_TYPE_FAIL, + CXL_EVENT_TYPE_FATAL, + CXL_EVENT_TYPE_MAX +}; + +/* + * Clear Event Records input payload + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51 + */ +struct cxl_mbox_clear_event_payload { + u8 event_log; /* enum cxl_event_log_type */ + u8 clear_flags; + u8 nr_recs; + u8 reserved[3]; + __le16 handles[]; +} __packed; +#define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX + struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; @@ -441,6 +519,7 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status); #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); void cxl_mem_active_dec(void); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2bbebbc7e032..0a24bc7da1e4 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -417,8 +417,30 @@ static void disable_aer(void *pdev) pci_disable_pcie_error_reporting(pdev); } +static void free_event_buf(void *buf) +{ + kvfree(buf); +} + +/* + * There is a single buffer for reading event logs from the mailbox. All logs + * share this buffer protected by the cxlds->event_log_lock. + */ +static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds) +{ + struct cxl_get_event_payload *buf; + + buf = kvmalloc(cxlds->payload_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cxlds->event.buf = buf; + + return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf); +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); struct cxl_register_map map; struct cxl_memdev *cxlmd; struct cxl_dev_state *cxlds; @@ -494,6 +516,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); + /* + * When BIOS maintains CXL error reporting control, it will process + * event records. Only one agent can do so. + */ + if (host_bridge->native_cxl_error) { + rc = cxl_mem_alloc_event_buf(cxlds); + if (rc) + return rc; + cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); + } + if (cxlds->regs.ras) { pci_enable_pcie_error_reporting(pdev); rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); -- cgit From ee611e5e6616f7981939cc0441a5fb531312c2b6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 25 Jan 2023 09:57:28 +0100 Subject: cxl/mbox: Add debug messages for enabled mailbox commands Only unsupported mailbox commands are reported in debug messages. A list of enabled commands is useful too. Change debug messages to also report the opcodes of enabled commands. Esp. if card initialization fails there is no way to get this information from userland. On that occasion also add missing trailing newlines. Signed-off-by: Robert Richter Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230125085728.234697-1-rrichter@amd.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index b03fba212799..cae43cea00cd 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -610,11 +610,12 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel) if (!cmd) { dev_dbg(cxlds->dev, - "Opcode 0x%04x unsupported by driver", opcode); + "Opcode 0x%04x unsupported by driver\n", opcode); continue; } set_bit(cmd->info.id, cxlds->enabled_cmds); + dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode); } } -- cgit From cbbd05d036e91b9dd976af4382f6c8d91b69b38a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jan 2023 19:22:21 -0800 Subject: cxl: fix spelling mistakes Correct spelling mistakes (reported by codespell). Signed-off-by: Randy Dunlap Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Dan Williams Cc: linux-cxl@vger.kernel.org Reviewed-by: Vishal Verma Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/20230125032221.21277-1-rdunlap@infradead.org Signed-off-by: Dan Williams --- drivers/cxl/Kconfig | 2 +- drivers/cxl/acpi.c | 2 +- drivers/cxl/core/port.c | 2 +- drivers/cxl/core/region.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 0ac53c422c31..9e709ecba50f 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -116,7 +116,7 @@ config CXL_REGION_INVALIDATION_TEST depends on CXL_REGION help CXL Region management and security operations potentially invalidate - the content of CPU caches without notifiying those caches to + the content of CPU caches without notifying those caches to invalidate the affected cachelines. The CXL Region driver attempts to invalidate caches when those events occur. If that invalidation fails the region will fail to enable. Reasons for cache diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index ad0849af42d7..6927149f2a16 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -19,7 +19,7 @@ struct cxl_cxims_data { /* * Find a targets entry (n) in the host bridge interleave list. - * CXL Specfication 3.0 Table 9-22 + * CXL Specification 3.0 Table 9-22 */ static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, int ig) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 410c036c09fa..609aa6801b14 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1164,7 +1164,7 @@ static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, } /* - * All users of grandparent() are using it to walk PCIe-like swich port + * All users of grandparent() are using it to walk PCIe-like switch port * hierarchy. A PCIe switch is comprised of a bridge device representing the * upstream switch port and N bridges representing downstream switch ports. When * bridges stack the grand-parent of a downstream switch port is another diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 60828d01972a..3482a9e6d2f2 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -414,7 +414,7 @@ static ssize_t interleave_granularity_store(struct device *dev, * When the host-bridge is interleaved, disallow region granularity != * root granularity. Regions with a granularity less than the root * interleave result in needing multiple endpoints to support a single - * slot in the interleave (possible to suport in the future). Regions + * slot in the interleave (possible to support in the future). Regions * with a granularity greater than the root interleave result in invalid * DPA translations (invalid to support). */ -- cgit From a49aa8141b657a7fd33d86f7bed3ca07567ab37a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 17 Jan 2023 21:53:37 -0800 Subject: cxl/mem: Wire up event interrupts Currently the only CXL features targeted for irq support require their message numbers to be within the first 16 entries. The device may however support less than 16 entries depending on the support it provides. Attempt to allocate these 16 irq vectors. If the device supports less then the PCI infrastructure will allocate that number. Upon successful allocation, users can plug in their respective isr at any point thereafter. CXL device events are signaled via interrupts. Each event log may have a different interrupt message number. These message numbers are reported in the Get Event Interrupt Policy mailbox command. Add interrupt support for event logs. Interrupts are allocated as shared interrupts. Therefore, all or some event logs can share the same message number. In addition all logs are queried on any interrupt in order of the most to least severe based on the status register. Finally place all event configuration logic into cxl_event_config(). Previously the logic was a simple 'read all' on start up. But interrupts must be configured prior to any reads to ensure no events are missed. A single event configuration function results in a cleaner over all implementation. Cc: Bjorn Helgaas Cc: Jonathan Cameron Co-developed-by: Ira Weiny Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-2-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxl.h | 4 + drivers/cxl/cxlmem.h | 19 +++++ drivers/cxl/cxlpci.h | 6 ++ drivers/cxl/pci.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 240 insertions(+), 10 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 5974d1082210..b3964149c77b 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -168,6 +168,10 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) CXLDEV_EVENT_STATUS_FAIL | \ CXLDEV_EVENT_STATUS_FATAL) +/* CXL rev 3.0 section 8.2.9.2.4; Table 8-52 */ +#define CXLDEV_EVENT_INT_MODE_MASK GENMASK(1, 0) +#define CXLDEV_EVENT_INT_MSGNUM_MASK GENMASK(7, 4) + /* CXL 2.0 8.2.8.4 Mailbox Registers */ #define CXLDEV_MBOX_CAPS_OFFSET 0x00 #define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index b74013a5f528..c7db2fa4d7a4 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -194,6 +194,23 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; +/** + * Event Interrupt Policy + * + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + */ +enum cxl_event_int_mode { + CXL_INT_NONE = 0x00, + CXL_INT_MSI_MSIX = 0x01, + CXL_INT_FW = 0x02 +}; +struct cxl_event_interrupt_policy { + u8 info_settings; + u8 warn_settings; + u8 failure_settings; + u8 fatal_settings; +} __packed; + /** * struct cxl_event_state - Event log driver state * @@ -288,6 +305,8 @@ enum cxl_opcode { CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100, CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101, + CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102, + CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 77dbdb980b12..a8ea04f536ab 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -53,6 +53,12 @@ #define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) #define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) +/* + * NOTE: Currently all the functions which are enabled for CXL require their + * vectors to be in the first 16. Use this as the default max. + */ +#define CXL_PCI_DEFAULT_MAX_VECTORS 16 + /* Register Block Identifier (RBI) */ enum cxl_regloc_type { CXL_REGLOC_RBI_EMPTY = 0, diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0a24bc7da1e4..d47005223c4f 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -438,6 +438,209 @@ static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds) return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf); } +static int cxl_alloc_irq_vectors(struct pci_dev *pdev) +{ + int nvecs; + + /* + * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must + * not generate INTx messages if that function participates in + * CXL.cache or CXL.mem. + * + * Additionally pci_alloc_irq_vectors() handles calling + * pci_free_irq_vectors() automatically despite not being called + * pcim_*. See pci_setup_msi_context(). + */ + nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nvecs < 1) { + dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); + return -ENXIO; + } + return 0; +} + +struct cxl_dev_id { + struct cxl_dev_state *cxlds; +}; + +static irqreturn_t cxl_event_thread(int irq, void *id) +{ + struct cxl_dev_id *dev_id = id; + struct cxl_dev_state *cxlds = dev_id->cxlds; + u32 status; + + do { + /* + * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; + * ignore the reserved upper 32 bits + */ + status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); + /* Ignore logs unknown to the driver */ + status &= CXLDEV_EVENT_STATUS_ALL; + if (!status) + break; + cxl_mem_get_event_records(cxlds, status); + cond_resched(); + } while (status); + + return IRQ_HANDLED; +} + +static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) +{ + struct device *dev = cxlds->dev; + struct pci_dev *pdev = to_pci_dev(dev); + struct cxl_dev_id *dev_id; + int irq; + + if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) + return -ENXIO; + + /* dev_id must be globally unique and must contain the cxlds */ + dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); + if (!dev_id) + return -ENOMEM; + dev_id->cxlds = cxlds; + + irq = pci_irq_vector(pdev, + FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); + if (irq < 0) + return irq; + + return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread, + IRQF_SHARED, NULL, dev_id); +} + +static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds, + struct cxl_event_interrupt_policy *policy) +{ + struct cxl_mbox_cmd mbox_cmd = { + .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, + .payload_out = policy, + .size_out = sizeof(*policy), + }; + int rc; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) + dev_err(cxlds->dev, "Failed to get event interrupt policy : %d", + rc); + + return rc; +} + +static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, + struct cxl_event_interrupt_policy *policy) +{ + struct cxl_mbox_cmd mbox_cmd; + int rc; + + *policy = (struct cxl_event_interrupt_policy) { + .info_settings = CXL_INT_MSI_MSIX, + .warn_settings = CXL_INT_MSI_MSIX, + .failure_settings = CXL_INT_MSI_MSIX, + .fatal_settings = CXL_INT_MSI_MSIX, + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, + .payload_in = policy, + .size_in = sizeof(*policy), + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) { + dev_err(cxlds->dev, "Failed to set event interrupt policy : %d", + rc); + return rc; + } + + /* Retrieve final interrupt settings */ + return cxl_event_get_int_policy(cxlds, policy); +} + +static int cxl_event_irqsetup(struct cxl_dev_state *cxlds) +{ + struct cxl_event_interrupt_policy policy; + int rc; + + rc = cxl_event_config_msgnums(cxlds, &policy); + if (rc) + return rc; + + rc = cxl_event_req_irq(cxlds, policy.info_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); + return rc; + } + + rc = cxl_event_req_irq(cxlds, policy.warn_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); + return rc; + } + + rc = cxl_event_req_irq(cxlds, policy.failure_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); + return rc; + } + + rc = cxl_event_req_irq(cxlds, policy.fatal_settings); + if (rc) { + dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); + return rc; + } + + return 0; +} + +static bool cxl_event_int_is_fw(u8 setting) +{ + u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); + + return mode == CXL_INT_FW; +} + +static int cxl_event_config(struct pci_host_bridge *host_bridge, + struct cxl_dev_state *cxlds) +{ + struct cxl_event_interrupt_policy policy; + int rc; + + /* + * When BIOS maintains CXL error reporting control, it will process + * event records. Only one agent can do so. + */ + if (!host_bridge->native_cxl_error) + return 0; + + rc = cxl_mem_alloc_event_buf(cxlds); + if (rc) + return rc; + + rc = cxl_event_get_int_policy(cxlds, &policy); + if (rc) + return rc; + + if (cxl_event_int_is_fw(policy.info_settings) || + cxl_event_int_is_fw(policy.warn_settings) || + cxl_event_int_is_fw(policy.failure_settings) || + cxl_event_int_is_fw(policy.fatal_settings)) { + dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n"); + return -EBUSY; + } + + rc = cxl_event_irqsetup(cxlds); + if (rc) + return rc; + + cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -456,6 +659,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rc = pcim_enable_device(pdev); if (rc) return rc; + pci_set_master(pdev); cxlds = cxl_dev_state_create(&pdev->dev); if (IS_ERR(cxlds)) @@ -512,20 +716,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_alloc_irq_vectors(pdev); + if (rc) + return rc; + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - /* - * When BIOS maintains CXL error reporting control, it will process - * event records. Only one agent can do so. - */ - if (host_bridge->native_cxl_error) { - rc = cxl_mem_alloc_event_buf(cxlds); - if (rc) - return rc; - cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); - } + rc = cxl_event_config(host_bridge, cxlds); + if (rc) + return rc; if (cxlds->regs.ras) { pci_enable_pcie_error_reporting(pdev); -- cgit From d54a531a430b775ae505563934a03720329fa5bd Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 17 Jan 2023 21:53:38 -0800 Subject: cxl/mem: Trace General Media Event Record CXL rev 3.0 section 8.2.9.2.1.1 defines the General Media Event Record. Determine if the event read is a general media record and if so trace the record as a General Media Event Record. Reviewed-by: Dan Williams Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-3-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 29 ++++++++++- drivers/cxl/core/trace.h | 124 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 19 ++++++++ 3 files changed, 170 insertions(+), 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index c329c1e67a2c..d5fe2f2a23ad 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -718,6 +718,31 @@ out: } EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +static const uuid_t gen_media_event_uuid = + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6); + +static void cxl_event_trace_record(const struct device *dev, + enum cxl_event_log_type type, + struct cxl_event_record_raw *record) +{ + uuid_t *id = &record->hdr.id; + + if (uuid_equal(id, &gen_media_event_uuid)) { + struct cxl_event_gen_media *rec = + (struct cxl_event_gen_media *)record; + + trace_cxl_general_media(dev, type, rec); + } else { + /* For unknown record types print just the header */ + trace_cxl_generic_event(dev, type, record); + } +} + static int cxl_clear_event_record(struct cxl_dev_state *cxlds, enum cxl_event_log_type log, struct cxl_get_event_payload *get_pl) @@ -821,8 +846,8 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, break; for (i = 0; i < nr_rec; i++) - trace_cxl_generic_event(cxlds->dev, type, - &payload->records[i]); + cxl_event_trace_record(cxlds->dev, type, + &payload->records[i]); if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) trace_cxl_overflow(cxlds->dev, type, payload); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 1805936e2e6a..ece4161d6620 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -222,6 +222,130 @@ TRACE_EVENT(cxl_generic_event, __print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH)) ); +/* + * Physical Address field masks + * + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + * + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CXL_DPA_FLAGS_MASK 0x3F +#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK) + +#define CXL_DPA_VOLATILE BIT(0) +#define CXL_DPA_NOT_REPAIRABLE BIT(1) +#define show_dpa_flags(flags) __print_flags(flags, "|", \ + { CXL_DPA_VOLATILE, "VOLATILE" }, \ + { CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \ +) + +/* + * General Media Event Record - GMER + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0) +#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1) +#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2) +#define show_event_desc_flags(flags) __print_flags(flags, "|", \ + { CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "UNCORRECTABLE_EVENT" }, \ + { CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "THRESHOLD_EVENT" }, \ + { CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "POISON_LIST_OVERFLOW" } \ +) + +#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00 +#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01 +#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02 +#define show_mem_event_type(type) __print_symbolic(type, \ + { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ + { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ + { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ +) + +#define CXL_GMER_TRANS_UNKNOWN 0x00 +#define CXL_GMER_TRANS_HOST_READ 0x01 +#define CXL_GMER_TRANS_HOST_WRITE 0x02 +#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03 +#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04 +#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05 +#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06 +#define show_trans_type(type) __print_symbolic(type, \ + { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \ + { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \ + { CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \ + { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \ + { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \ + { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \ +) + +#define CXL_GMER_VALID_CHANNEL BIT(0) +#define CXL_GMER_VALID_RANK BIT(1) +#define CXL_GMER_VALID_DEVICE BIT(2) +#define CXL_GMER_VALID_COMPONENT BIT(3) +#define show_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_GMER_VALID_RANK, "RANK" }, \ + { CXL_GMER_VALID_DEVICE, "DEVICE" }, \ + { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \ +) + +TRACE_EVENT(cxl_general_media, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_gen_media *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + /* General Media */ + __field(u64, dpa) + __field(u8, descriptor) + __field(u8, type) + __field(u8, transaction_type) + __field(u8, channel) + __field(u32, device) + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE) + __field(u16, validity_flags) + /* Following are out of order to pack trace record */ + __field(u8, rank) + __field(u8, dpa_flags) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + + /* General Media */ + __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; + /* Mask after flags have been parsed */ + __entry->dpa &= CXL_DPA_MASK; + __entry->descriptor = rec->descriptor; + __entry->type = rec->type; + __entry->transaction_type = rec->transaction_type; + __entry->channel = rec->channel; + __entry->rank = rec->rank; + __entry->device = get_unaligned_le24(rec->device); + memcpy(__entry->comp_id, &rec->component_id, + CXL_EVENT_GEN_MED_COMP_ID_SIZE); + __entry->validity_flags = get_unaligned_le16(&rec->validity_flags); + ), + + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \ + "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \ + "device=%x comp_id=%s validity_flags='%s'", + __entry->dpa, show_dpa_flags(__entry->dpa_flags), + show_event_desc_flags(__entry->descriptor), + show_mem_event_type(__entry->type), + show_trans_type(__entry->transaction_type), + __entry->channel, __entry->rank, __entry->device, + __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE), + show_valid_flags(__entry->validity_flags) + ) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index c7db2fa4d7a4..1713b4330ad4 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -444,6 +444,25 @@ struct cxl_mbox_clear_event_payload { } __packed; #define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; -- cgit From 2d6c1e6d60ba3030450ca8396a332da84e8b7614 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 17 Jan 2023 21:53:39 -0800 Subject: cxl/mem: Trace DRAM Event Record CXL rev 3.0 section 8.2.9.2.1.2 defines the DRAM Event Record. Determine if the event read is a DRAM event record and if so trace the record. Reviewed-by: Dan Williams Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-4-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 12 +++++++ drivers/cxl/core/trace.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 23 ++++++++++++ 3 files changed, 127 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index d5fe2f2a23ad..552d98bc9f93 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -726,6 +726,14 @@ static const uuid_t gen_media_event_uuid = UUID_INIT(0xfbcd0a77, 0xc260, 0x417f, 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6); +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +static const uuid_t dram_event_uuid = + UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24); + static void cxl_event_trace_record(const struct device *dev, enum cxl_event_log_type type, struct cxl_event_record_raw *record) @@ -737,6 +745,10 @@ static void cxl_event_trace_record(const struct device *dev, (struct cxl_event_gen_media *)record; trace_cxl_general_media(dev, type, rec); + } else if (uuid_equal(id, &dram_event_uuid)) { + struct cxl_event_dram *rec = (struct cxl_event_dram *)record; + + trace_cxl_dram(dev, type, rec); } else { /* For unknown record types print just the header */ trace_cxl_generic_event(dev, type, record); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index ece4161d6620..f4bf7dfaf7fd 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -346,6 +346,98 @@ TRACE_EVENT(cxl_general_media, ) ); +/* + * DRAM Event Record - DER + * + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +/* + * DRAM Event Record defines many fields the same as the General Media Event + * Record. Reuse those definitions as appropriate. + */ +#define CXL_DER_VALID_CHANNEL BIT(0) +#define CXL_DER_VALID_RANK BIT(1) +#define CXL_DER_VALID_NIBBLE BIT(2) +#define CXL_DER_VALID_BANK_GROUP BIT(3) +#define CXL_DER_VALID_BANK BIT(4) +#define CXL_DER_VALID_ROW BIT(5) +#define CXL_DER_VALID_COLUMN BIT(6) +#define CXL_DER_VALID_CORRECTION_MASK BIT(7) +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ + { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ + { CXL_DER_VALID_RANK, "RANK" }, \ + { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ + { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \ + { CXL_DER_VALID_BANK, "BANK" }, \ + { CXL_DER_VALID_ROW, "ROW" }, \ + { CXL_DER_VALID_COLUMN, "COLUMN" }, \ + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ +) + +TRACE_EVENT(cxl_dram, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_dram *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + /* DRAM */ + __field(u64, dpa) + __field(u8, descriptor) + __field(u8, type) + __field(u8, transaction_type) + __field(u8, channel) + __field(u16, validity_flags) + __field(u16, column) /* Out of order to pack trace record */ + __field(u32, nibble_mask) + __field(u32, row) + __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE) + __field(u8, rank) /* Out of order to pack trace record */ + __field(u8, bank_group) /* Out of order to pack trace record */ + __field(u8, bank) /* Out of order to pack trace record */ + __field(u8, dpa_flags) /* Out of order to pack trace record */ + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + + /* DRAM */ + __entry->dpa = le64_to_cpu(rec->phys_addr); + __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK; + __entry->dpa &= CXL_DPA_MASK; + __entry->descriptor = rec->descriptor; + __entry->type = rec->type; + __entry->transaction_type = rec->transaction_type; + __entry->validity_flags = get_unaligned_le16(rec->validity_flags); + __entry->channel = rec->channel; + __entry->rank = rec->rank; + __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask); + __entry->bank_group = rec->bank_group; + __entry->bank = rec->bank; + __entry->row = get_unaligned_le24(rec->row); + __entry->column = get_unaligned_le16(rec->column); + memcpy(__entry->cor_mask, &rec->correction_mask, + CXL_EVENT_DER_CORRECTION_MASK_SIZE); + ), + + CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \ + "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \ + "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \ + "validity_flags='%s'", + __entry->dpa, show_dpa_flags(__entry->dpa_flags), + show_event_desc_flags(__entry->descriptor), + show_mem_event_type(__entry->type), + show_trans_type(__entry->transaction_type), + __entry->channel, __entry->rank, __entry->nibble_mask, + __entry->bank_group, __entry->bank, + __entry->row, __entry->column, + __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE), + show_dram_valid_flags(__entry->validity_flags) + ) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 1713b4330ad4..9aacedda13f4 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -463,6 +463,29 @@ struct cxl_event_gen_media { u8 reserved[46]; } __packed; +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; -- cgit From 95b4947992858f725ad6a65a2087a3533749660f Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 17 Jan 2023 21:53:40 -0800 Subject: cxl/mem: Trace Memory Module Event Record CXL rev 3.0 section 8.2.9.2.1.3 defines the Memory Module Event Record. Determine if the event read is memory module record and if so trace the record. Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-5-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 13 +++++ drivers/cxl/core/trace.h | 143 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 26 +++++++++ 3 files changed, 182 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 552d98bc9f93..6ed8e3654939 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -734,6 +734,14 @@ static const uuid_t dram_event_uuid = UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24); +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +static const uuid_t mem_mod_event_uuid = + UUID_INIT(0xfe927475, 0xdd59, 0x4339, + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74); + static void cxl_event_trace_record(const struct device *dev, enum cxl_event_log_type type, struct cxl_event_record_raw *record) @@ -749,6 +757,11 @@ static void cxl_event_trace_record(const struct device *dev, struct cxl_event_dram *rec = (struct cxl_event_dram *)record; trace_cxl_dram(dev, type, rec); + } else if (uuid_equal(id, &mem_mod_event_uuid)) { + struct cxl_event_mem_module *rec = + (struct cxl_event_mem_module *)record; + + trace_cxl_memory_module(dev, type, rec); } else { /* For unknown record types print just the header */ trace_cxl_generic_event(dev, type, record); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index f4bf7dfaf7fd..c72ef9321cfe 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -438,6 +438,149 @@ TRACE_EVENT(cxl_dram, ) ); +/* + * Memory Module Event Record - MMER + * + * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00 +#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01 +#define CXL_MMER_LIFE_USED_CHANGE 0x02 +#define CXL_MMER_TEMP_CHANGE 0x03 +#define CXL_MMER_DATA_PATH_ERROR 0x04 +#define CXL_MMER_LSA_ERROR 0x05 +#define show_dev_evt_type(type) __print_symbolic(type, \ + { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \ + { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \ + { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \ + { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \ + { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \ + { CXL_MMER_LSA_ERROR, "LSA Error" } \ +) + +/* + * Device Health Information - DHI + * + * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0) +#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1) +#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2) +#define show_health_status_flags(flags) __print_flags(flags, "|", \ + { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \ + { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \ + { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \ +) + +#define CXL_DHI_MS_NORMAL 0x00 +#define CXL_DHI_MS_NOT_READY 0x01 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02 +#define CXL_DHI_MS_ALL_DATA_LOST 0x03 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05 +#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06 +#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07 +#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08 +#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09 +#define show_media_status(ms) __print_symbolic(ms, \ + { CXL_DHI_MS_NORMAL, \ + "Normal" }, \ + { CXL_DHI_MS_NOT_READY, \ + "Not Ready" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \ + "Write Persistency Lost" }, \ + { CXL_DHI_MS_ALL_DATA_LOST, \ + "All Data Lost" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \ + "Write Persistency Loss in the Event of Power Loss" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \ + "Write Persistency Loss in Event of Shutdown" }, \ + { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \ + "Write Persistency Loss Imminent" }, \ + { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \ + "All Data Loss in Event of Power Loss" }, \ + { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \ + "All Data loss in the Event of Shutdown" }, \ + { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \ + "All Data Loss Imminent" } \ +) + +#define CXL_DHI_AS_NORMAL 0x0 +#define CXL_DHI_AS_WARNING 0x1 +#define CXL_DHI_AS_CRITICAL 0x2 +#define show_two_bit_status(as) __print_symbolic(as, \ + { CXL_DHI_AS_NORMAL, "Normal" }, \ + { CXL_DHI_AS_WARNING, "Warning" }, \ + { CXL_DHI_AS_CRITICAL, "Critical" } \ +) +#define show_one_bit_status(as) __print_symbolic(as, \ + { CXL_DHI_AS_NORMAL, "Normal" }, \ + { CXL_DHI_AS_WARNING, "Warning" } \ +) + +#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3) +#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2) +#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4) +#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5) + +TRACE_EVENT(cxl_memory_module, + + TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + struct cxl_event_mem_module *rec), + + TP_ARGS(dev, log, rec), + + TP_STRUCT__entry( + CXL_EVT_TP_entry + + /* Memory Module Event */ + __field(u8, event_type) + + /* Device Health Info */ + __field(u8, health_status) + __field(u8, media_status) + __field(u8, life_used) + __field(u32, dirty_shutdown_cnt) + __field(u32, cor_vol_err_cnt) + __field(u32, cor_per_err_cnt) + __field(s16, device_temp) + __field(u8, add_status) + ), + + TP_fast_assign( + CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + + /* Memory Module Event */ + __entry->event_type = rec->event_type; + + /* Device Health Info */ + __entry->health_status = rec->info.health_status; + __entry->media_status = rec->info.media_status; + __entry->life_used = rec->info.life_used; + __entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt); + __entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt); + __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt); + __entry->device_temp = get_unaligned_le16(rec->info.device_temp); + __entry->add_status = rec->info.add_status; + ), + + CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \ + "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \ + "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \ + "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u", + show_dev_evt_type(__entry->event_type), + show_health_status_flags(__entry->health_status), + show_media_status(__entry->media_status), + show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)), + show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)), + show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)), + show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)), + __entry->life_used, __entry->device_temp, + __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt, + __entry->cor_per_err_cnt + ) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 9aacedda13f4..f21692ccb7a2 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -486,6 +486,32 @@ struct cxl_event_dram { u8 reserved[0x17]; } __packed; +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; -- cgit From 623c0751336e4035ab0047f2c152a02bd26b612b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 19 Jan 2023 10:49:34 +0100 Subject: cxl/mbox: Fix Payload Length check for Get Log command Commit 2aeaf663b85e introduced strict checking for variable length payload size validation. The payload length of received data must match the size of the requested data by the caller except for the case where the min_out value is set. The Get Log command does not have a header with a length field set. The Log size is determined by the Get Supported Logs command (CXL 3.0, 8.2.9.5.1). However, the actual size can be smaller and the number of valid bytes in the payload output must be determined reading the Payload Length field (CXL 3.0, Table 8-36, Note 2). Two issues arise: The command can successfully complete with a payload length of zero. And, the valid payload length must then also be consumed by the caller. Change cxl_xfer_log() to pass the number of payload bytes back to the caller to determine the number of log entries. Implement the payload handling as a special case where mbox_cmd->size_out is consulted when cxl_internal_send_cmd() returns -EIO. A WARN_ONCE() is added to check that -EIO is only returned in case of an unexpected output size. Logs can be bigger than the maximum payload length and multiple Get Log commands can be issued. If the received payload size is smaller than the maximum payload size we can assume all valid bytes have been fetched. Stop sending further Get Log commands then. On that occasion, change debug messages to also report the opcodes of supported commands. The variable payload commands GET_LSA and SET_LSA are not affected by this strict check: SET_LSA cannot be broken because SET_LSA does not return an output payload, and GET_LSA never expects short reads. Fixes: 2aeaf663b85e ("cxl/mbox: Add variable output size validation for internal commands") Signed-off-by: Robert Richter Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230119094934.86067-1-rrichter@amd.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index cae43cea00cd..202d49dd9911 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -170,6 +170,12 @@ int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, out_size = mbox_cmd->size_out; min_out = mbox_cmd->min_out; rc = cxlds->mbox_send(cxlds, mbox_cmd); + /* + * EIO is reserved for a payload size mismatch and mbox_send() + * may not return this error. + */ + if (WARN_ONCE(rc == -EIO, "Bad return code: -EIO")) + return -ENXIO; if (rc) return rc; @@ -550,9 +556,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) return 0; } -static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 *out) +static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 *out) { - u32 remaining = size; + u32 remaining = *size; u32 offset = 0; while (remaining) { @@ -576,6 +582,17 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 }; rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + + /* + * The output payload length that indicates the number + * of valid bytes can be smaller than the Log buffer + * size. + */ + if (rc == -EIO && mbox_cmd.size_out < xfer_size) { + offset += mbox_cmd.size_out; + break; + } + if (rc < 0) return rc; @@ -584,6 +601,8 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 offset += xfer_size; } + *size = offset; + return 0; } @@ -695,7 +714,7 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds) goto out; } - rc = cxl_xfer_log(cxlds, &uuid, size, log); + rc = cxl_xfer_log(cxlds, &uuid, &size, log); if (rc) { kvfree(log); goto out; -- cgit From 7ebf38c9115ec3b4502c73b4377a7648374d700f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 30 Jan 2023 15:34:37 +0000 Subject: cxl/mbox: Add missing parameter to docs. Kernel-doc should be complete, so add documentation for the status parameter. Signed-off-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230130153437.3153-1-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 6ed8e3654939..14fa09452f9f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -892,6 +892,7 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, /** * cxl_mem_get_event_records - Get Event Records from the device * @cxlds: The device data for the operation + * @status: Event Status register value identifying which events are available. * * Retrieve all event records available on the device, report them as trace * events, and clear them. -- cgit From fa8843451bec55f900b8673d9ddc0be02a61528a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 30 Jan 2023 15:13:27 +0000 Subject: cxl/pci: Set the device timestamp CXL r3.0 section 8.2.9.4.2 "Set Timestamp" recommends that the host sets the timestamp after every Conventional or CXL Reset to ensure accurate timestamps. This should include on initial boot up. The time base that is being set is used by a device for the poison list overflow timestamp and all event timestamps. Note that the command is optional and if not supported and the device cannot return accurate timestamps it will fill the fields in with an appropriate marker (see the specification description of each timestamp). Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230130151327.32415-1-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 27 +++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 9 +++++++++ drivers/cxl/pci.c | 4 ++++ 3 files changed, 40 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 14fa09452f9f..a12255907852 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -1055,6 +1056,32 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); +int cxl_set_timestamp(struct cxl_dev_state *cxlds) +{ + struct cxl_mbox_cmd mbox_cmd; + struct cxl_mbox_set_timestamp_in pi; + int rc; + + pi.timestamp = cpu_to_le64(ktime_get_real_ns()); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_TIMESTAMP, + .size_in = sizeof(pi), + .payload_in = &pi, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + /* + * Command is optional. Devices may have another way of providing + * a timestamp, or may return all 0s in timestamp fields. + * Don't report an error if this command isn't supported + */ + if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED)) + return rc; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL); + struct cxl_dev_state *cxl_dev_state_create(struct device *dev) { struct cxl_dev_state *cxlds; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index f21692ccb7a2..802b5b396daf 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -309,6 +309,7 @@ enum cxl_opcode { CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, + CXL_MBOX_OP_SET_TIMESTAMP = 0x0301, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, CXL_MBOX_OP_GET_LOG = 0x0401, CXL_MBOX_OP_IDENTIFY = 0x4000, @@ -537,6 +538,12 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ +struct cxl_mbox_set_timestamp_in { + __le64 timestamp; + +} __packed; + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI @@ -607,6 +614,8 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status); +int cxl_set_timestamp(struct cxl_dev_state *cxlds); + #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); void cxl_mem_active_dec(void); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index d47005223c4f..ad2ebe7bfaeb 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -708,6 +708,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_set_timestamp(cxlds); + if (rc) + return rc; + rc = cxl_dev_state_identify(cxlds); if (rc) return rc; -- cgit From 5a84711fd734c09b7d991b00657ba61a96612254 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Jan 2023 15:39:26 -0800 Subject: cxl/pci: Fix irq oneshot expectations The IRQ core expects that users of the default hardirq handler specify IRQF_ONESHOT to keep interrupts disabled until the threaded handler runs. That meets the CXL driver's expectations since it is an edge triggered MSI and this flag would have been passed by default using pci_request_irq() instead of devm_request_threaded_irq(). Fixes: a49aa8141b65 ("cxl/mem: Wire up event interrupts") Reported-by: kernel test robot Reported-by: Julia Lawall Cc: Davidlohr Bueso Cc: Ira Weiny Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index ad2ebe7bfaeb..4cf9a2191602 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -509,7 +509,8 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) return irq; return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread, - IRQF_SHARED, NULL, dev_id); + IRQF_SHARED | IRQF_ONESHOT, NULL, + dev_id); } static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds, -- cgit From d874297bc7c8bb69f9fcbe6422ac5623c5897977 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 8 Feb 2023 10:19:44 -0800 Subject: cxl/mem: Correct full ID range allocation For ID allocations we want 0-(max-1), ie: smatch complains: error: Calling ida_alloc_range() with a 'max' argument which is a power of 2. -1 missing? Correct this and also replace the call to use the max() flavor instead. Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230208181944.240261-1-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index a74a93310d26..12bd9ddaba22 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -242,7 +242,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, if (!cxlmd) return ERR_PTR(-ENOMEM); - rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); + rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); if (rc < 0) goto err; cxlmd->id = rc; -- cgit From 2345df54249c6fb7779e2a72b427ee79ed3eaad5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 17:29:09 -0800 Subject: cxl/memdev: Fix endpoint port removal Testing of ram region support [1], stimulates a long standing bug in cxl_detach_ep() where some cxl_ep_remove() cleanup is skipped due to inability to walk ports after dports have been unregistered. That results in a failure to re-register a memdev after the port is re-enabled leading to a crash like the following: cxl_port_setup_targets: cxl region4: cxl_host_bridge.0:port4 iw: 1 ig: 256 general protection fault, ... [..] RIP: 0010:cxl_region_setup_targets+0x897/0x9e0 [cxl_core] dev_name at include/linux/device.h:700 (inlined by) cxl_port_setup_targets at drivers/cxl/core/region.c:1155 (inlined by) cxl_region_setup_targets at drivers/cxl/core/region.c:1249 [..] Call Trace: attach_target+0x39a/0x760 [cxl_core] ? __mutex_unlock_slowpath+0x3a/0x290 cxl_add_to_region+0xb8/0x340 [cxl_core] ? lockdep_hardirqs_on+0x7d/0x100 discover_region+0x4b/0x80 [cxl_port] ? __pfx_discover_region+0x10/0x10 [cxl_port] device_for_each_child+0x58/0x90 cxl_port_probe+0x10e/0x130 [cxl_port] cxl_bus_probe+0x17/0x50 [cxl_core] Change the port ancestry walk to be by depth rather than by dport. This ensures that even if a port has unregistered its dports a deferred memdev cleanup will still be able to cleanup the memdev's interest in that port. The parent_port->dev.driver check is only needed for determining if the bottom up removal beat the top-down removal, but cxl_ep_remove() can always proceed given the port is pinned. That is, the two sources of cxl_ep_remove() are in cxl_detach_ep() and cxl_port_release(), and cxl_port_release() can not run if cxl_detach_ep() holds a reference. Fixes: 2703c16c75ae ("cxl/core/port: Add switch port enumeration") Link: http://lore.kernel.org/r/167564534874.847146.5222419648551436750.stgit@dwillia2-xfh.jf.intel.com [1] Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/167601992789.1924368.8083994227892600608.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 1 + drivers/cxl/core/port.c | 58 ++++++++++++++++++++++++++--------------------- drivers/cxl/cxlmem.h | 2 ++ 3 files changed, 35 insertions(+), 26 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index a74a93310d26..3a8bc2b06047 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -246,6 +246,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, if (rc < 0) goto err; cxlmd->id = rc; + cxlmd->depth = -1; dev = &cxlmd->dev; device_initialize(dev); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 410c036c09fa..317bcf4dbd9d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1207,6 +1207,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) get_device(&endpoint->dev); dev_set_drvdata(dev, endpoint); + cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); @@ -1241,50 +1242,55 @@ static void reap_dports(struct cxl_port *port) } } +struct detach_ctx { + struct cxl_memdev *cxlmd; + int depth; +}; + +static int port_has_memdev(struct device *dev, const void *data) +{ + const struct detach_ctx *ctx = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + if (port->depth != ctx->depth) + return 0; + + return !!cxl_ep_load(port, ctx->cxlmd); +} + static void cxl_detach_ep(void *data) { struct cxl_memdev *cxlmd = data; - struct device *iter; - for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) { - struct device *dport_dev = grandparent(iter); + for (int i = cxlmd->depth - 1; i >= 1; i--) { struct cxl_port *port, *parent_port; + struct detach_ctx ctx = { + .cxlmd = cxlmd, + .depth = i, + }; + struct device *dev; struct cxl_ep *ep; bool died = false; - if (!dport_dev) - break; - - port = find_cxl_port(dport_dev, NULL); - if (!port) - continue; - - if (is_cxl_root(port)) { - put_device(&port->dev); + dev = bus_find_device(&cxl_bus_type, NULL, &ctx, + port_has_memdev); + if (!dev) continue; - } + port = to_cxl_port(dev); parent_port = to_cxl_port(port->dev.parent); device_lock(&parent_port->dev); - if (!parent_port->dev.driver) { - /* - * The bottom-up race to delete the port lost to a - * top-down port disable, give up here, because the - * parent_port ->remove() will have cleaned up all - * descendants. - */ - device_unlock(&parent_port->dev); - put_device(&port->dev); - continue; - } - device_lock(&port->dev); ep = cxl_ep_load(port, cxlmd); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); cxl_ep_remove(port, ep); if (ep && !port->dead && xa_empty(&port->endpoints) && - !is_cxl_root(parent_port)) { + !is_cxl_root(parent_port) && parent_port->dev.driver) { /* * This was the last ep attached to a dynamically * enumerated port. Block new cxl_add_ep() and garbage diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index ab138004f644..c9da3c699a21 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -38,6 +38,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @id: id number of this memdev instance. + * @depth: endpoint port depth */ struct cxl_memdev { struct device dev; @@ -47,6 +48,7 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; int id; + int depth; }; static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) -- cgit From 7d505f982f53189da819581ad4cc13c8cef76803 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:39 -0800 Subject: cxl/region: Add a mode attribute for regions In preparation for a new region type, "ram" regions, add a mode attribute to clarify the mode of the decoders that can be added to a region. Share the internals of mode_show() (for decoders) with the region case. Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601993930.1924368.4305018565539515665.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 12 +----------- drivers/cxl/core/region.c | 10 ++++++++++ drivers/cxl/cxl.h | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 317bcf4dbd9d..1e541956f605 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -180,17 +180,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - switch (cxled->mode) { - case CXL_DECODER_RAM: - return sysfs_emit(buf, "ram\n"); - case CXL_DECODER_PMEM: - return sysfs_emit(buf, "pmem\n"); - case CXL_DECODER_NONE: - return sysfs_emit(buf, "none\n"); - case CXL_DECODER_MIXED: - default: - return sysfs_emit(buf, "mixed\n"); - } + return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 60828d01972a..17d2d0c12725 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -458,6 +458,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(resource); +static ssize_t mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + + return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); +} +static DEVICE_ATTR_RO(mode); + static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); @@ -585,6 +594,7 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_interleave_granularity.attr, &dev_attr_resource.attr, &dev_attr_size.attr, + &dev_attr_mode.attr, NULL, }; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index aa3af3bb73b2..ca76879af1de 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -320,6 +320,20 @@ enum cxl_decoder_mode { CXL_DECODER_DEAD, }; +static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) +{ + static const char * const names[] = { + [CXL_DECODER_NONE] = "none", + [CXL_DECODER_RAM] = "ram", + [CXL_DECODER_PMEM] = "pmem", + [CXL_DECODER_MIXED] = "mixed", + }; + + if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED) + return names[mode]; + return "mixed"; +} + /** * struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder * @cxld: base cxl_decoder_object -- cgit From a8e7d558f7e9c2921de8b487baa01d23f068aa93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:45 -0800 Subject: cxl/region: Support empty uuids for non-pmem regions Shipping versions of the cxl-cli utility expect all regions to have a 'uuid' attribute. In preparation for 'ram' regions, update the 'uuid' attribute to return an empty string which satisfies the current expectations of 'cxl list -R'. Otherwise, 'cxl list -R' fails in the presence of regions with the 'uuid' attribute missing. Force the attribute to be read-only as there is no facility or expectation for a 'ram' region to recall its uuid from one boot to the next. Reviewed-by: Vishal Verma Tested-by: Fan Ni Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601994558.1924368.12612811533724694444.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 17d2d0c12725..0fc80478ff6b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -45,7 +45,10 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; - rc = sysfs_emit(buf, "%pUb\n", &p->uuid); + if (cxlr->mode != CXL_DECODER_PMEM) + rc = sysfs_emit(buf, "\n"); + else + rc = sysfs_emit(buf, "%pUb\n", &p->uuid); up_read(&cxl_region_rwsem); return rc; @@ -300,8 +303,12 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, struct device *dev = kobj_to_dev(kobj); struct cxl_region *cxlr = to_cxl_region(dev); + /* + * Support tooling that expects to find a 'uuid' attribute for all + * regions regardless of mode. + */ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) - return 0; + return 0444; return a->mode; } -- cgit From 1b9b7a6fd618239db47a83da39dff9e725a5865a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:51 -0800 Subject: cxl/region: Validate region mode vs decoder mode In preparation for a new region mode, do not, for example, allow 'ram' decoders to be assigned to 'pmem' regions and vice versa. Reviewed-by: Vishal Verma Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601995111.1924368.7459128614177994602.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 0fc80478ff6b..285835145e9b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1221,6 +1221,12 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_dport *dport; int i, rc = -ENXIO; + if (cxled->mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", + dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); + return -EINVAL; + } + if (cxled->mode == CXL_DECODER_DEAD) { dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); return -ENODEV; -- cgit From 6e099264185d05f50400ea494f5029264a4fe995 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:57 -0800 Subject: cxl/region: Add volatile region creation support Expand the region creation infrastructure to enable 'ram' (volatile-memory) regions. The internals of create_pmem_region_store() and create_pmem_region_show() are factored out into helpers __create_region() and __create_region_show() for the 'ram' case to reuse. Reviewed-by: Vishal Verma Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601995775.1924368.352616146815830591.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 1 + drivers/cxl/core/port.c | 14 +++++++++- drivers/cxl/core/region.c | 71 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 72 insertions(+), 14 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 8c04672dca56..5eb873da5a30 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -11,6 +11,7 @@ extern struct attribute_group cxl_base_attribute_group; #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; +extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 1e541956f605..9e5df64ea6b5 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -294,6 +294,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_type3.attr, &dev_attr_target_list.attr, SET_CXL_REGION_ATTR(create_pmem_region) + SET_CXL_REGION_ATTR(create_ram_region) SET_CXL_REGION_ATTR(delete_region) NULL, }; @@ -305,6 +306,13 @@ static bool can_create_pmem(struct cxl_root_decoder *cxlrd) return (cxlrd->cxlsd.cxld.flags & flags) == flags; } +static bool can_create_ram(struct cxl_root_decoder *cxlrd) +{ + unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_RAM; + + return (cxlrd->cxlsd.cxld.flags & flags) == flags; +} + static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); @@ -313,7 +321,11 @@ static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute * if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd)) return 0; - if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd)) + if (a == CXL_REGION_ATTR(create_ram_region) && !can_create_ram(cxlrd)) + return 0; + + if (a == CXL_REGION_ATTR(delete_region) && + !(can_create_pmem(cxlrd) || can_create_ram(cxlrd))) return 0; return a->mode; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 285835145e9b..e440db8611a4 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1689,6 +1689,15 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, struct device *dev; int rc; + switch (mode) { + case CXL_DECODER_RAM: + case CXL_DECODER_PMEM: + break; + default: + dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); + return ERR_PTR(-EINVAL); + } + cxlr = cxl_region_alloc(cxlrd, id); if (IS_ERR(cxlr)) return cxlr; @@ -1717,12 +1726,38 @@ err: return ERR_PTR(rc); } +static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) +{ + return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); +} + static ssize_t create_pmem_region_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + return __create_region_show(to_cxl_root_decoder(dev), buf); +} - return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); +static ssize_t create_ram_region_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __create_region_show(to_cxl_root_decoder(dev), buf); +} + +static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, + enum cxl_decoder_mode mode, int id) +{ + int rc; + + rc = memregion_alloc(GFP_KERNEL); + if (rc < 0) + return ERR_PTR(rc); + + if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) { + memregion_free(rc); + return ERR_PTR(-EBUSY); + } + + return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER); } static ssize_t create_pmem_region_store(struct device *dev, @@ -1731,29 +1766,39 @@ static ssize_t create_pmem_region_store(struct device *dev, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; - int id, rc; + int rc, id; rc = sscanf(buf, "region%d\n", &id); if (rc != 1) return -EINVAL; - rc = memregion_alloc(GFP_KERNEL); - if (rc < 0) - return rc; + cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id); + if (IS_ERR(cxlr)) + return PTR_ERR(cxlr); - if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) { - memregion_free(rc); - return -EBUSY; - } + return len; +} +DEVICE_ATTR_RW(create_pmem_region); + +static ssize_t create_ram_region_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + struct cxl_region *cxlr; + int rc, id; - cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM, - CXL_DECODER_EXPANDER); + rc = sscanf(buf, "region%d\n", &id); + if (rc != 1) + return -EINVAL; + + cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); return len; } -DEVICE_ATTR_RW(create_pmem_region); +DEVICE_ATTR_RW(create_ram_region); static ssize_t region_show(struct device *dev, struct device_attribute *attr, char *buf) -- cgit From 3528b1e101a40ff75c8130e0b94e9555d2976f45 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:04 -0800 Subject: cxl/region: Refactor attach_target() for autodiscovery Region autodiscovery is the process of kernel creating 'struct cxl_region' object to represent active CXL memory ranges it finds already active in hardware when the driver loads. Typically this happens when platform firmware establishes CXL memory regions and then publishes them in the memory map. However, this can also happen in the case of kexec-reboot after the kernel has created regions. In the autodiscovery case the region creation process starts with a known endpoint decoder. Refactor attach_target() into a helper that is suitable to be called from either sysfs, for runtime region creation, or from cxl_port_probe() after it has enumerated all endpoint decoders. The cxl_port_probe() context is an async device-core probing context, so it is not appropriate to allow SIGTERM to interrupt the assembly process. Refactor attach_target() to take @cxled and @state as arguments where @state indicates whether waiting from the region rwsem is interruptible or not. No behavior change is intended. Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601996393.1924368.2202255054618600069.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e440db8611a4..040bbd39c81d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1422,31 +1422,25 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) up_write(&cxl_region_rwsem); } -static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos) +static int attach_target(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + unsigned int state) { - struct device *dev; - int rc; - - dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder); - if (!dev) - return -ENODEV; - - if (!is_endpoint_decoder(dev)) { - put_device(dev); - return -EINVAL; - } + int rc = 0; - rc = down_write_killable(&cxl_region_rwsem); + if (state == TASK_INTERRUPTIBLE) + rc = down_write_killable(&cxl_region_rwsem); + else + down_write(&cxl_region_rwsem); if (rc) - goto out; + return rc; + down_read(&cxl_dpa_rwsem); - rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos); + rc = cxl_region_attach(cxlr, cxled, pos); if (rc == 0) set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); up_read(&cxl_dpa_rwsem); up_write(&cxl_region_rwsem); -out: - put_device(dev); return rc; } @@ -1484,8 +1478,23 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, if (sysfs_streq(buf, "\n")) rc = detach_target(cxlr, pos); - else - rc = attach_target(cxlr, buf, pos); + else { + struct device *dev; + + dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf); + if (!dev) + return -ENODEV; + + if (!is_endpoint_decoder(dev)) { + rc = -EINVAL; + goto out; + } + + rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos, + TASK_INTERRUPTIBLE); +out: + put_device(dev); + } if (rc < 0) return rc; -- cgit From 86987c766276acf1289700cd38bd6d5b5a167fea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:09 -0800 Subject: cxl/region: Cleanup target list on attach error Jonathan noticed that the target list setup is not unwound completely upon error. Undo all the setup in the 'err_decrement:' exit path. Fixes: 27b3f8d13830 ("cxl/region: Program target lists") Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20230208123031.00006990@Huawei.com Reviewed-by: Ira Weiny Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601996980.1924368.390423634911157277.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 040bbd39c81d..ae7d3adcd41a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1347,6 +1347,8 @@ static int cxl_region_attach(struct cxl_region *cxlr, err_decrement: p->nr_targets--; + cxled->pos = -1; + p->targets[pos] = NULL; err: for (iter = ep_port; !is_cxl_root(iter); iter = to_cxl_port(iter->dev.parent)) -- cgit From 9995576cef48dcbb0ba3de068292ed14f72fa0eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:15 -0800 Subject: cxl/region: Move region-position validation to a helper In preparation for region autodiscovery, that needs all devices discovered before their relative position in the region can be determined, consolidate all position dependent validation in a helper. Recall that in the on-demand region creation flow the end-user picks the position of a given endpoint decoder in a region. In the autodiscovery case the position of an endpoint decoder can only be determined after all other endpoint decoders that claim to decode the region's address range have been enumerated and attached. So, in the autodiscovery case endpoint decoders may be attached before their relative position is known. Once all decoders arrive, then positions can be determined and validated with cxl_region_validate_position() the same as user initiated on-demand creation. Reviewed-by: Vishal Verma Tested-by: Fan Ni Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601997584.1924368.4615769326126138969.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 119 +++++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 43 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ae7d3adcd41a..691605f1e120 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1211,35 +1211,13 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) return 0; } -static int cxl_region_attach(struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled, int pos) +static int cxl_region_validate_position(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + int pos) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *ep_port, *root_port, *iter; struct cxl_region_params *p = &cxlr->params; - struct cxl_dport *dport; - int i, rc = -ENXIO; - - if (cxled->mode != cxlr->mode) { - dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", - dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); - return -EINVAL; - } - - if (cxled->mode == CXL_DECODER_DEAD) { - dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); - return -ENODEV; - } - - /* all full of members, or interleave config not established? */ - if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { - dev_dbg(&cxlr->dev, "region already active\n"); - return -EBUSY; - } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { - dev_dbg(&cxlr->dev, "interleave config missing\n"); - return -ENXIO; - } + int i; if (pos < 0 || pos >= p->interleave_ways) { dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, @@ -1278,6 +1256,71 @@ static int cxl_region_attach(struct cxl_region *cxlr, } } + return 0; +} + +static int cxl_region_attach_position(struct cxl_region *cxlr, + struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled, + const struct cxl_dport *dport, int pos) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *iter; + int rc; + + if (cxlrd->calc_hb(cxlrd, pos) != dport) { + dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + dev_name(&cxlrd->cxlsd.cxld.dev)); + return -ENXIO; + } + + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) { + rc = cxl_port_attach_region(iter, cxlr, cxled, pos); + if (rc) + goto err; + } + + return 0; + +err: + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) + cxl_port_detach_region(iter, cxlr, cxled); + return rc; +} + +static int cxl_region_attach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_region_params *p = &cxlr->params; + struct cxl_port *ep_port, *root_port; + struct cxl_dport *dport; + int rc = -ENXIO; + + if (cxled->mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", + dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); + return -EINVAL; + } + + if (cxled->mode == CXL_DECODER_DEAD) { + dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); + return -ENODEV; + } + + /* all full of members, or interleave config not established? */ + if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_dbg(&cxlr->dev, "region already active\n"); + return -EBUSY; + } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_dbg(&cxlr->dev, "interleave config missing\n"); + return -ENXIO; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1288,13 +1331,6 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } - if (cxlrd->calc_hb(cxlrd, pos) != dport) { - dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), - dev_name(&cxlrd->cxlsd.cxld.dev)); - return -ENXIO; - } - if (cxled->cxld.target_type != cxlr->type) { dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), @@ -1318,12 +1354,13 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } - for (iter = ep_port; !is_cxl_root(iter); - iter = to_cxl_port(iter->dev.parent)) { - rc = cxl_port_attach_region(iter, cxlr, cxled, pos); - if (rc) - goto err; - } + rc = cxl_region_validate_position(cxlr, cxled, pos); + if (rc) + return rc; + + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos); + if (rc) + return rc; p->targets[pos] = cxled; cxled->pos = pos; @@ -1349,10 +1386,6 @@ err_decrement: p->nr_targets--; cxled->pos = -1; p->targets[pos] = NULL; -err: - for (iter = ep_port; !is_cxl_root(iter); - iter = to_cxl_port(iter->dev.parent)) - cxl_port_detach_region(iter, cxlr, cxled); return rc; } -- cgit From 93c177fd6ff0655a5fa43ec945a57d7b0200ad80 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:21 -0800 Subject: kernel/range: Uplevel the cxl subsystem's range_contains() helper In support of the CXL subsystem's use of 'struct range' to track decode address ranges, add a common range_contains() implementation with identical semantics as resource_contains(); The existing 'range_contains()' in lib/stackinit_kunit.c is namespaced with a 'stackinit_' prefix. Cc: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601998163.1924368.6067392174077323935.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 1d1492440287..9ed2120dbf8a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -214,11 +214,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) return devm_add_action_or_reset(host, clear_mem_enable, cxlds); } -static bool range_contains(struct range *r1, struct range *r2) -{ - return r1->start <= r2->start && r1->end >= r2->end; -} - /* require dvsec ranges to be covered by a locked platform window */ static int dvsec_range_allowed(struct device *dev, void *arg) { -- cgit From 45d235c56b2bc51749af9cc8fe6ace18aa8b81be Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:27 -0800 Subject: cxl/region: Enable CONFIG_CXL_REGION to be toggled Add help text and a label so the CXL_REGION config option can be toggled. This is mainly to enable compile testing without region support. Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601998765.1924368.258370414771847699.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/Kconfig | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 0ac53c422c31..163c094e67ae 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -104,12 +104,22 @@ config CXL_SUSPEND depends on SUSPEND && CXL_MEM config CXL_REGION - bool + bool "CXL: Region Support" default CXL_BUS # For MAX_PHYSMEM_BITS depends on SPARSEMEM select MEMREGION select GET_FREE_REGION + help + Enable the CXL core to enumerate and provision CXL regions. A CXL + region is defined by one or more CXL expanders that decode a given + system-physical address range. For CXL regions established by + platform-firmware this option enables memory error handling to + identify the devices participating in a given interleaved memory + range. Otherwise, platform-firmware managed CXL is enabled by being + placed in the system address map and does not need a driver. + + If unsure say 'y' config CXL_REGION_INVALIDATION_TEST bool "CXL: Region Cache Management Bypass (TEST)" -- cgit From 32ce3f185bbb3802cd0ac925bc8fddf1797e0ad4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:33 -0800 Subject: cxl/port: Split endpoint and switch port probe Jonathan points out that the shared code between the switch and endpoint case is small. Before adding another is_cxl_endpoint() conditional, just split the two cases. Rather than duplicate the "Couldn't enumerate decoders" error message take the opportunity to improve the error messages in devm_cxl_enumerate_decoders(). Reported-by: Jonathan Cameron Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601999378.1924368.15071142145866277623.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 11 +++++--- drivers/cxl/port.c | 69 ++++++++++++++++++++++++++++---------------------- 2 files changed, 47 insertions(+), 33 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index dcc16d7cb8f3..a0891c3464f1 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -826,7 +826,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxled = cxl_endpoint_decoder_alloc(port); if (IS_ERR(cxled)) { dev_warn(&port->dev, - "Failed to allocate the decoder\n"); + "Failed to allocate decoder%d.%d\n", + port->id, i); return PTR_ERR(cxled); } cxld = &cxled->cxld; @@ -836,7 +837,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxlsd = cxl_switch_decoder_alloc(port, target_count); if (IS_ERR(cxlsd)) { dev_warn(&port->dev, - "Failed to allocate the decoder\n"); + "Failed to allocate decoder%d.%d\n", + port->id, i); return PTR_ERR(cxlsd); } cxld = &cxlsd->cxld; @@ -844,13 +846,16 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base); if (rc) { + dev_warn(&port->dev, + "Failed to initialize decoder%d.%d\n", + port->id, i); put_device(&cxld->dev); return rc; } rc = add_hdm_decoder(port, cxld, target_map); if (rc) { dev_warn(&port->dev, - "Failed to add decoder to port\n"); + "Failed to add decoder%d.%d\n", port->id, i); return rc; } } diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 5453771bf330..a8d46a67b45e 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -30,55 +30,64 @@ static void schedule_detach(void *cxlmd) schedule_cxl_memdev_detach(cxlmd); } -static int cxl_port_probe(struct device *dev) +static int cxl_switch_port_probe(struct cxl_port *port) { - struct cxl_port *port = to_cxl_port(dev); struct cxl_hdm *cxlhdm; int rc; + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; - if (!is_cxl_endpoint(port)) { - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - if (rc == 1) - return devm_cxl_add_passthrough_decoder(port); - } + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); cxlhdm = devm_cxl_setup_hdm(port); if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); - if (is_cxl_endpoint(port)) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - struct cxl_dev_state *cxlds = cxlmd->cxlds; + return devm_cxl_enumerate_decoders(cxlhdm); +} - /* Cache the data early to ensure is_visible() works */ - read_cdat_data(port); +static int cxl_endpoint_port_probe(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_hdm *cxlhdm; + int rc; + + cxlhdm = devm_cxl_setup_hdm(port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); - get_device(&cxlmd->dev); - rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); - if (rc) - return rc; + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); - rc = cxl_hdm_decode_init(cxlds, cxlhdm); - if (rc) - return rc; + get_device(&cxlmd->dev); + rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd); + if (rc) + return rc; - rc = cxl_await_media_ready(cxlds); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - } + rc = cxl_hdm_decode_init(cxlds, cxlhdm); + if (rc) + return rc; - rc = devm_cxl_enumerate_decoders(cxlhdm); + rc = cxl_await_media_ready(cxlds); if (rc) { - dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); + dev_err(&port->dev, "Media not active (%d)\n", rc); return rc; } - return 0; + return devm_cxl_enumerate_decoders(cxlhdm); +} + +static int cxl_port_probe(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + + if (is_cxl_endpoint(port)) + return cxl_endpoint_port_probe(port); + return cxl_switch_port_probe(port); } static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, -- cgit From a32320b71f085f8d82afedcf285f1682c8c00aed Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 17:31:17 -0800 Subject: cxl/region: Add region autodiscovery Region autodiscovery is an asynchronous state machine advanced by cxl_port_probe(). After the decoders on an endpoint port are enumerated they are scanned for actively enabled instances. Each active decoder is flagged for auto-assembly CXL_DECODER_F_AUTO and attached to a region. If a region does not already exist for the address range setting of the decoder one is created. That creation process may race with other decoders of the same region being discovered since cxl_port_probe() is asynchronous. A new 'struct cxl_root_decoder' lock, @range_lock, is introduced to mitigate that race. Once all decoders have arrived, "p->nr_targets == p->interleave_ways", they are sorted by their relative decode position. The sort algorithm involves finding the point in the cxl_port topology where one leg of the decode leads to deviceA and the other deviceB. At that point in the topology the target order in the 'struct cxl_switch_decoder' indicates the relative position of those endpoint decoders in the region. >From that point the region goes through the same setup and validation steps as user-created regions, but instead of programming the decoders it validates that driver would have written the same values to the decoders as were already present. Tested-by: Fan Ni Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601999958.1924368.9366954455835735048.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 11 + drivers/cxl/core/port.c | 2 + drivers/cxl/core/region.c | 502 +++++++++++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 29 +++ drivers/cxl/port.c | 47 ++++- 5 files changed, 580 insertions(+), 11 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index a0891c3464f1..8c29026a4b9d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -676,6 +676,14 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) port->commit_end--; cxld->flags &= ~CXL_DECODER_F_ENABLE; + /* Userspace is now responsible for reconfiguring this decoder */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled; + + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxled->state = CXL_DECODER_STATE_MANUAL; + } + return 0; } @@ -783,6 +791,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return rc; } *dpa_base += dpa_size + skip; + + cxled->state = CXL_DECODER_STATE_AUTO; + return 0; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 9e5df64ea6b5..59620528571a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -446,6 +446,7 @@ bool is_endpoint_decoder(struct device *dev) { return dev->type == &cxl_decoder_endpoint_type; } +EXPORT_SYMBOL_NS_GPL(is_endpoint_decoder, CXL); bool is_root_decoder(struct device *dev) { @@ -1628,6 +1629,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, } cxlrd->calc_hb = calc_hb; + mutex_init(&cxlrd->range_lock); cxld = &cxlsd->cxld; cxld->dev.type = &cxl_decoder_root_type; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 691605f1e120..8c29204279e9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -524,7 +525,12 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr) if (device_is_registered(&cxlr->dev)) lockdep_assert_held_write(&cxl_region_rwsem); if (p->res) { - remove_resource(p->res); + /* + * Autodiscovered regions may not have been able to insert their + * resource. + */ + if (p->res->parent) + remove_resource(p->res); kfree(p->res); p->res = NULL; } @@ -1105,12 +1111,35 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } - cxld->interleave_ways = iw; - cxld->interleave_granularity = ig; - cxld->hpa_range = (struct range) { - .start = p->res->start, - .end = p->res->end, - }; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + if (cxld->interleave_ways != iw || + cxld->interleave_granularity != ig || + cxld->hpa_range.start != p->res->start || + cxld->hpa_range.end != p->res->end || + ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { + dev_err(&cxlr->dev, + "%s:%s %s expected iw: %d ig: %d %pr\n", + dev_name(port->uport), dev_name(&port->dev), + __func__, iw, ig, p->res); + dev_err(&cxlr->dev, + "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n", + dev_name(port->uport), dev_name(&port->dev), + __func__, cxld->interleave_ways, + cxld->interleave_granularity, + (cxld->flags & CXL_DECODER_F_ENABLE) ? + "enabled" : + "disabled", + cxld->hpa_range.start, cxld->hpa_range.end); + return -ENXIO; + } + } else { + cxld->interleave_ways = iw; + cxld->interleave_granularity = ig; + cxld->hpa_range = (struct range) { + .start = p->res->start, + .end = p->res->end, + }; + } dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport), dev_name(&port->dev), iw, ig); add_target: @@ -1121,7 +1150,17 @@ add_target: dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); return -ENXIO; } - cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) { + dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxlsd->cxld.dev), + dev_name(ep->dport->dport), + cxl_rr->nr_targets_set); + return -ENXIO; + } + } else + cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; inc = 1; out_target_set: cxl_rr->nr_targets_set += inc; @@ -1163,6 +1202,13 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) struct cxl_ep *ep; int i; + /* + * In the auto-discovery case skip automatic teardown since the + * address space is already active + */ + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + return; + for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); @@ -1195,8 +1241,8 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent); /* - * Descend the topology tree programming targets while - * looking for conflicts. + * Descend the topology tree programming / validating + * targets while looking for conflicts. */ for (ep = cxl_ep_load(iter, cxlmd); iter; iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { @@ -1291,6 +1337,191 @@ err: return rc; } +static int cxl_region_attach_auto(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_region_params *p = &cxlr->params; + + if (cxled->state != CXL_DECODER_STATE_AUTO) { + dev_err(&cxlr->dev, + "%s: unable to add decoder to autodetected region\n", + dev_name(&cxled->cxld.dev)); + return -EINVAL; + } + + if (pos >= 0) { + dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n", + dev_name(&cxled->cxld.dev), pos); + return -EINVAL; + } + + if (p->nr_targets >= p->interleave_ways) { + dev_err(&cxlr->dev, "%s: no more target slots available\n", + dev_name(&cxled->cxld.dev)); + return -ENXIO; + } + + /* + * Temporarily record the endpoint decoder into the target array. Yes, + * this means that userspace can view devices in the wrong position + * before the region activates, and must be careful to understand when + * it might be racing region autodiscovery. + */ + pos = p->nr_targets; + p->targets[pos] = cxled; + cxled->pos = pos; + p->nr_targets++; + + return 0; +} + +static struct cxl_port *next_port(struct cxl_port *port) +{ + if (!port->parent_dport) + return NULL; + return port->parent_dport->port; +} + +static int decoder_match_range(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled = data; + struct cxl_switch_decoder *cxlsd; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range); +} + +static void find_positions(const struct cxl_switch_decoder *cxlsd, + const struct cxl_port *iter_a, + const struct cxl_port *iter_b, int *a_pos, + int *b_pos) +{ + int i; + + for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) { + if (cxlsd->target[i] == iter_a->parent_dport) + *a_pos = i; + else if (cxlsd->target[i] == iter_b->parent_dport) + *b_pos = i; + if (*a_pos >= 0 && *b_pos >= 0) + break; + } +} + +static int cmp_decode_pos(const void *a, const void *b) +{ + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; + struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); + struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); + struct cxl_port *port_a = cxled_to_port(cxled_a); + struct cxl_port *port_b = cxled_to_port(cxled_b); + struct cxl_port *iter_a, *iter_b, *port = NULL; + struct cxl_switch_decoder *cxlsd; + struct device *dev; + int a_pos, b_pos; + unsigned int seq; + + /* Exit early if any prior sorting failed */ + if (cxled_a->pos < 0 || cxled_b->pos < 0) + return 0; + + /* + * Walk up the hierarchy to find a shared port, find the decoder that + * maps the range, compare the relative position of those dport + * mappings. + */ + for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { + struct cxl_port *next_a, *next_b; + + next_a = next_port(iter_a); + if (!next_a) + break; + + for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { + next_b = next_port(iter_b); + if (next_a != next_b) + continue; + port = next_a; + break; + } + + if (port) + break; + } + + if (!port) { + dev_err(cxlmd_a->dev.parent, + "failed to find shared port with %s\n", + dev_name(cxlmd_b->dev.parent)); + goto err; + } + + dev = device_find_child(&port->dev, cxled_a, decoder_match_range); + if (!dev) { + struct range *range = &cxled_a->cxld.hpa_range; + + dev_err(port->uport, + "failed to find decoder that maps %#llx-%#llx\n", + range->start, range->end); + goto err; + } + + cxlsd = to_cxl_switch_decoder(dev); + do { + seq = read_seqbegin(&cxlsd->target_lock); + find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); + } while (read_seqretry(&cxlsd->target_lock, seq)); + + put_device(dev); + + if (a_pos < 0 || b_pos < 0) { + dev_err(port->uport, + "failed to find shared decoder for %s and %s\n", + dev_name(cxlmd_a->dev.parent), + dev_name(cxlmd_b->dev.parent)); + goto err; + } + + dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent), + a_pos - b_pos < 0 ? "before" : "after", + dev_name(cxlmd_b->dev.parent)); + + return a_pos - b_pos; +err: + cxled_a->pos = -1; + return 0; +} + +static int cxl_region_sort_targets(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + int i, rc = 0; + + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, + NULL); + + for (i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + + /* + * Record that sorting failed, but still continue to restore + * cxled->pos with its ->targets[] position so that follow-on + * code paths can reliably do p->targets[cxled->pos] to + * self-reference their entry. + */ + if (cxled->pos < 0) + rc = -ENXIO; + cxled->pos = i; + } + + dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); + return rc; +} + static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos) { @@ -1354,6 +1585,50 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + int i; + + rc = cxl_region_attach_auto(cxlr, cxled, pos); + if (rc) + return rc; + + /* await more targets to arrive... */ + if (p->nr_targets < p->interleave_ways) + return 0; + + /* + * All targets are here, which implies all PCI enumeration that + * affects this region has been completed. Walk the topology to + * sort the devices into their relative region decode position. + */ + rc = cxl_region_sort_targets(cxlr); + if (rc) + return rc; + + for (i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + ep_port = cxled_to_port(cxled); + dport = cxl_find_dport_by_dev(root_port, + ep_port->host_bridge); + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, + dport, i); + if (rc) + return rc; + } + + rc = cxl_region_setup_targets(cxlr); + if (rc) + return rc; + + /* + * If target setup succeeds in the autodiscovery case + * then the region is already committed. + */ + p->state = CXL_CONFIG_COMMIT; + + return 0; + } + rc = cxl_region_validate_position(cxlr, cxled, pos); if (rc) return rc; @@ -2087,6 +2362,192 @@ err_bridge: return rc; } +static int match_decoder_by_range(struct device *dev, void *data) +{ + struct range *r1, *r2 = data; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + r1 = &cxlrd->cxlsd.cxld.hpa_range; + return range_contains(r1, r2); +} + +static int match_region_by_range(struct device *dev, void *data) +{ + struct cxl_region_params *p; + struct cxl_region *cxlr; + struct range *r = data; + int rc = 0; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + down_read(&cxl_region_rwsem); + if (p->res && p->res->start == r->start && p->res->end == r->end) + rc = 1; + up_read(&cxl_region_rwsem); + + return rc; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_region_params *p; + struct cxl_region *cxlr; + struct resource *res; + int rc; + + do { + cxlr = __create_region(cxlrd, cxled->mode, + atomic_read(&cxlrd->region_id)); + } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); + + if (IS_ERR(cxlr)) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s failed assign region: %ld\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, PTR_ERR(cxlr)); + return cxlr; + } + + down_write(&cxl_region_rwsem); + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s autodiscovery interrupted\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__); + rc = -EBUSY; + goto err; + } + + set_bit(CXL_REGION_F_AUTO, &cxlr->flags); + + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + rc = -ENOMEM; + goto err; + } + + *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), + dev_name(&cxlr->dev)); + rc = insert_resource(cxlrd->res, res); + if (rc) { + /* + * Platform-firmware may not have split resources like "System + * RAM" on CXL window boundaries see cxl_region_iomem_release() + */ + dev_warn(cxlmd->dev.parent, + "%s:%s: %s %s cannot insert resource\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, dev_name(&cxlr->dev)); + } + + p->res = res; + p->interleave_ways = cxled->cxld.interleave_ways; + p->interleave_granularity = cxled->cxld.interleave_granularity; + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); + if (rc) + goto err; + + dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, + dev_name(&cxlr->dev), p->res, p->interleave_ways, + p->interleave_granularity); + + /* ...to match put_device() in cxl_add_to_region() */ + get_device(&cxlr->dev); + up_write(&cxl_region_rwsem); + + return cxlr; + +err: + up_write(&cxl_region_rwsem); + devm_release_action(port->uport, unregister_region, cxlr); + return ERR_PTR(rc); +} + +int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_decoder *cxld = &cxled->cxld; + struct device *cxlrd_dev, *region_dev; + struct cxl_root_decoder *cxlrd; + struct cxl_region_params *p; + struct cxl_region *cxlr; + bool attach = false; + int rc; + + cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range, + match_decoder_by_range); + if (!cxlrd_dev) { + dev_err(cxlmd->dev.parent, + "%s:%s no CXL window for range %#llx:%#llx\n", + dev_name(&cxlmd->dev), dev_name(&cxld->dev), + cxld->hpa_range.start, cxld->hpa_range.end); + return -ENXIO; + } + + cxlrd = to_cxl_root_decoder(cxlrd_dev); + + /* + * Ensure that if multiple threads race to construct_region() for @hpa + * one does the construction and the others add to that. + */ + mutex_lock(&cxlrd->range_lock); + region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa, + match_region_by_range); + if (!region_dev) { + cxlr = construct_region(cxlrd, cxled); + region_dev = &cxlr->dev; + } else + cxlr = to_cxl_region(region_dev); + mutex_unlock(&cxlrd->range_lock); + + if (IS_ERR(cxlr)) { + rc = PTR_ERR(cxlr); + goto out; + } + + attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); + + down_read(&cxl_region_rwsem); + p = &cxlr->params; + attach = p->state == CXL_CONFIG_COMMIT; + up_read(&cxl_region_rwsem); + + if (attach) { + /* + * If device_attach() fails the range may still be active via + * the platform-firmware memory map, otherwise the driver for + * regions is local to this file, so driver matching can't fail. + */ + if (device_attach(&cxlr->dev) < 0) + dev_err(&cxlr->dev, "failed to enable, range: %pr\n", + p->res); + } + + put_device(region_dev); +out: + put_device(cxlrd_dev); + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); + static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) { if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags)) @@ -2111,6 +2572,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) return 0; } +static int is_system_ram(struct resource *res, void *arg) +{ + struct cxl_region *cxlr = arg; + struct cxl_region_params *p = &cxlr->params; + + dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res); + return 1; +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -2144,6 +2614,18 @@ out: switch (cxlr->mode) { case CXL_DECODER_PMEM: return devm_cxl_add_pmem_region(cxlr); + case CXL_DECODER_RAM: + /* + * The region can not be manged by CXL if any portion of + * it is already online as 'System RAM' + */ + if (walk_iomem_res_desc(IORES_DESC_NONE, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + p->res->start, p->res->end, cxlr, + is_system_ram) > 0) + return 0; + dev_dbg(dev, "TODO: hookup devdax\n"); + return 0; default: dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", cxlr->mode); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ca76879af1de..c8ee4bb8cce6 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -261,6 +261,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev, * cxl_decoder flags that define the type of memory / devices this * decoder supports as well as configuration lock status See "CXL 2.0 * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + * Additionally indicate whether decoder settings were autodetected, + * user customized. */ #define CXL_DECODER_F_RAM BIT(0) #define CXL_DECODER_F_PMEM BIT(1) @@ -334,12 +336,22 @@ static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) return "mixed"; } +/* + * Track whether this decoder is reserved for region autodiscovery, or + * free for userspace provisioning. + */ +enum cxl_decoder_state { + CXL_DECODER_STATE_MANUAL, + CXL_DECODER_STATE_AUTO, +}; + /** * struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder * @cxld: base cxl_decoder_object * @dpa_res: actively claimed DPA span of this decoder * @skip: offset into @dpa_res where @cxld.hpa_range maps * @mode: which memory type / access-mode-partition this decoder targets + * @state: autodiscovery state * @pos: interleave position in @cxld.region */ struct cxl_endpoint_decoder { @@ -347,6 +359,7 @@ struct cxl_endpoint_decoder { struct resource *dpa_res; resource_size_t skip; enum cxl_decoder_mode mode; + enum cxl_decoder_state state; int pos; }; @@ -380,6 +393,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, * @region_id: region id for next region provisioning event * @calc_hb: which host bridge covers the n'th position by granularity * @platform_data: platform specific configuration data + * @range_lock: sync region autodiscovery by address range * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { @@ -387,6 +401,7 @@ struct cxl_root_decoder { atomic_t region_id; cxl_calc_hb_fn calc_hb; void *platform_data; + struct mutex range_lock; struct cxl_switch_decoder cxlsd; }; @@ -436,6 +451,13 @@ struct cxl_region_params { */ #define CXL_REGION_F_INCOHERENT 0 +/* + * Indicate whether this region has been assembled by autodetection or + * userspace assembly. Prevent endpoint decoders outside of automatic + * detection from being added to the region. + */ +#define CXL_REGION_F_AUTO 1 + /** * struct cxl_region - CXL region * @dev: This region's device @@ -699,6 +721,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev); #ifdef CONFIG_CXL_REGION bool is_cxl_pmem_region(struct device *dev); struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); +int cxl_add_to_region(struct cxl_port *root, + struct cxl_endpoint_decoder *cxled); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -708,6 +732,11 @@ static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) { return NULL; } +static inline int cxl_add_to_region(struct cxl_port *root, + struct cxl_endpoint_decoder *cxled) +{ + return 0; +} #endif /* diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index a8d46a67b45e..d6c151dabaa7 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -30,6 +30,33 @@ static void schedule_detach(void *cxlmd) schedule_cxl_memdev_detach(cxlmd); } +static int discover_region(struct device *dev, void *root) +{ + struct cxl_endpoint_decoder *cxled; + int rc; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0) + return 0; + + if (cxled->state != CXL_DECODER_STATE_AUTO) + return 0; + + /* + * Region enumeration is opportunistic, if this add-event fails, + * continue to the next endpoint decoder. + */ + rc = cxl_add_to_region(root, cxled); + if (rc) + dev_dbg(dev, "failed to add to region: %#llx-%#llx\n", + cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end); + + return 0; +} + static int cxl_switch_port_probe(struct cxl_port *port) { struct cxl_hdm *cxlhdm; @@ -54,6 +81,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_hdm *cxlhdm; + struct cxl_port *root; int rc; cxlhdm = devm_cxl_setup_hdm(port); @@ -78,7 +106,24 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) return rc; } - return devm_cxl_enumerate_decoders(cxlhdm); + rc = devm_cxl_enumerate_decoders(cxlhdm); + if (rc) + return rc; + + /* + * This can't fail in practice as CXL root exit unregisters all + * descendant ports and that in turn synchronizes with cxl_port_probe() + */ + root = find_cxl_root(&cxlmd->dev); + + /* + * Now that all endpoint decoders are successfully enumerated, try to + * assemble regions from committed decoders + */ + device_for_each_child(&port->dev, root, discover_region); + put_device(&root->dev); + + return 0; } static int cxl_port_probe(struct device *dev) -- cgit From 3d8f7ccaa611a743ae3a1e6f605346993d37c513 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:45 -0800 Subject: tools/testing/cxl: Define a fixed volatile configuration to parse Take two endpoints attached to the first switch on the first host-bridge in the cxl_test topology and define a pre-initialized region. This is a x2 interleave underneath a x1 CXL Window. $ modprobe cxl_test $ # cxl list -Ru { "region":"region3", "resource":"0xf010000000", "size":"512.00 MiB (536.87 MB)", "interleave_ways":2, "interleave_granularity":4096, "decode_state":"commit" } Tested-by: Fan Ni Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602000547.1924368.11613151863880268868.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 3 --- drivers/cxl/core/hdm.c | 3 ++- drivers/cxl/core/port.c | 2 ++ drivers/cxl/cxl.h | 2 ++ drivers/cxl/cxlmem.h | 3 +++ 5 files changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 5eb873da5a30..479f01da6d35 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -57,9 +57,6 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); extern struct rw_semaphore cxl_dpa_rwsem; -bool is_switch_decoder(struct device *dev); -struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); - int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 8c29026a4b9d..80eccae6ba9e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -279,7 +279,7 @@ success: return 0; } -static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, +int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) { @@ -295,6 +295,7 @@ static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } +EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, CXL); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 59620528571a..b45d2796ef35 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -458,6 +458,7 @@ bool is_switch_decoder(struct device *dev) { return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type; } +EXPORT_SYMBOL_NS_GPL(is_switch_decoder, CXL); struct cxl_decoder *to_cxl_decoder(struct device *dev) { @@ -485,6 +486,7 @@ struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev) return NULL; return container_of(dev, struct cxl_switch_decoder, cxld.dev); } +EXPORT_SYMBOL_NS_GPL(to_cxl_switch_decoder, CXL); static void cxl_ep_release(struct cxl_ep *ep) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index c8ee4bb8cce6..2ac344235235 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -653,8 +653,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); +struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); +bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets, diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index c9da3c699a21..bf7d4c5c8612 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -81,6 +81,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) } struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, + resource_size_t base, resource_size_t len, + resource_size_t skipped); static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, struct cxl_memdev *cxlmd) -- cgit From 09d09e04d2fcf88c4620dd28097e0e2a8f720eac Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:19 -0800 Subject: cxl/dax: Create dax devices for CXL RAM regions While platform firmware takes some responsibility for mapping the RAM capacity of CXL devices present at boot, the OS is responsible for mapping the remainder and hot-added devices. Platform firmware is also responsible for identifying the platform general purpose memory pool, typically DDR attached DRAM, and arranging for the remainder to be 'Soft Reserved'. That reservation allows the CXL subsystem to route the memory to core-mm via memory-hotplug (dax_kmem), or leave it for dedicated access (device-dax). The new 'struct cxl_dax_region' object allows for a CXL memory resource (region) to be published, but also allow for udev and module policy to act on that event. It also prevents cxl_core.ko from having a module loading dependency on any drivers/dax/ modules. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602003896.1924368.10335442077318970468.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 3 +- drivers/cxl/core/core.h | 3 ++ drivers/cxl/core/port.c | 4 +- drivers/cxl/core/region.c | 108 +++++++++++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 12 ++++++ 5 files changed, 126 insertions(+), 4 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index ad0849af42d7..8ebb9a74790d 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -731,7 +731,8 @@ static void __exit cxl_acpi_exit(void) cxl_bus_drain(); } -module_init(cxl_acpi_init); +/* load before dax_hmem sees 'Soft Reserved' CXL ranges */ +subsys_initcall(cxl_acpi_init); module_exit(cxl_acpi_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 479f01da6d35..cde475e13216 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -15,12 +15,14 @@ extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; +extern const struct device_type cxl_dax_region_type; extern const struct device_type cxl_region_type; void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) #define CXL_REGION_TYPE(x) (&cxl_region_type) #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type) +#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type) int cxl_region_init(void); void cxl_region_exit(void); #else @@ -38,6 +40,7 @@ static inline void cxl_region_exit(void) #define CXL_REGION_TYPE(x) NULL #define SET_CXL_REGION_ATTR(x) #define CXL_PMEM_REGION_TYPE(x) NULL +#define CXL_DAX_REGION_TYPE(x) NULL #endif struct cxl_send_command; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b45d2796ef35..0bb7a5ff724b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -46,6 +46,8 @@ static int cxl_device_id(struct device *dev) return CXL_DEVICE_NVDIMM; if (dev->type == CXL_PMEM_REGION_TYPE()) return CXL_DEVICE_PMEM_REGION; + if (dev->type == CXL_DAX_REGION_TYPE()) + return CXL_DEVICE_DAX_REGION; if (is_cxl_port(dev)) { if (is_cxl_root(to_cxl_port(dev))) return CXL_DEVICE_ROOT; @@ -2015,6 +2017,6 @@ static void cxl_core_exit(void) debugfs_remove_recursive(cxl_debugfs); } -module_init(cxl_core_init); +subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 8c29204279e9..91bb9ac881ff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2278,6 +2278,75 @@ out: return cxlr_pmem; } +static void cxl_dax_region_release(struct device *dev) +{ + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); + + kfree(cxlr_dax); +} + +static const struct attribute_group *cxl_dax_region_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +const struct device_type cxl_dax_region_type = { + .name = "cxl_dax_region", + .release = cxl_dax_region_release, + .groups = cxl_dax_region_attribute_groups, +}; + +static bool is_cxl_dax_region(struct device *dev) +{ + return dev->type == &cxl_dax_region_type; +} + +struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), + "not a cxl_dax_region device\n")) + return NULL; + return container_of(dev, struct cxl_dax_region, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); + +static struct lock_class_key cxl_dax_region_key; + +static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_dax_region *cxlr_dax; + struct device *dev; + + down_read(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) { + cxlr_dax = ERR_PTR(-ENXIO); + goto out; + } + + cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); + if (!cxlr_dax) { + cxlr_dax = ERR_PTR(-ENOMEM); + goto out; + } + + cxlr_dax->hpa_range.start = p->res->start; + cxlr_dax->hpa_range.end = p->res->end; + + dev = &cxlr_dax->dev; + cxlr_dax->cxlr = cxlr; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_dax_region_key); + device_set_pm_not_required(dev); + dev->parent = &cxlr->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_dax_region_type; +out: + up_read(&cxl_region_rwsem); + + return cxlr_dax; +} + static void cxlr_pmem_unregister(void *_cxlr_pmem) { struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; @@ -2362,6 +2431,42 @@ err_bridge: return rc; } +static void cxlr_dax_unregister(void *_cxlr_dax) +{ + struct cxl_dax_region *cxlr_dax = _cxlr_dax; + + device_unregister(&cxlr_dax->dev); +} + +static int devm_cxl_add_dax_region(struct cxl_region *cxlr) +{ + struct cxl_dax_region *cxlr_dax; + struct device *dev; + int rc; + + cxlr_dax = cxl_dax_region_alloc(cxlr); + if (IS_ERR(cxlr_dax)) + return PTR_ERR(cxlr_dax); + + dev = &cxlr_dax->dev; + rc = dev_set_name(dev, "dax_region%d", cxlr->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, + cxlr_dax); +err: + put_device(dev); + return rc; +} + static int match_decoder_by_range(struct device *dev, void *data) { struct range *r1, *r2 = data; @@ -2624,8 +2729,7 @@ out: p->res->start, p->res->end, cxlr, is_system_ram) > 0) return 0; - dev_dbg(dev, "TODO: hookup devdax\n"); - return 0; + return devm_cxl_add_dax_region(cxlr); default: dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", cxlr->mode); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 2ac344235235..b1395c46baec 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -513,6 +513,12 @@ struct cxl_pmem_region { struct cxl_pmem_region_mapping mapping[]; }; +struct cxl_dax_region { + struct device dev; + struct cxl_region *cxlr; + struct range hpa_range; +}; + /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -707,6 +713,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_MEMORY_EXPANDER 5 #define CXL_DEVICE_REGION 6 #define CXL_DEVICE_PMEM_REGION 7 +#define CXL_DEVICE_DAX_REGION 8 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" @@ -725,6 +732,7 @@ bool is_cxl_pmem_region(struct device *dev); struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled); +struct cxl_dax_region *to_cxl_dax_region(struct device *dev); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -739,6 +747,10 @@ static inline int cxl_add_to_region(struct cxl_port *root, { return 0; } +static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + return NULL; +} #endif /* -- cgit From 860334e59003f684d85c519179038bd655eeb9ef Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 2 Feb 2023 17:04:24 -0800 Subject: cxl/mem: Remove unused CXL_CMD_FLAG_NONE define CXL_CMD_FLAG_NONE is not used, remove it. Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221222-cxl-misc-v4-1-62f701c1cdd1@intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxlmem.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index ab138004f644..2d85776236dd 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -393,7 +393,6 @@ struct cxl_mem_command { struct cxl_command_info info; enum cxl_opcode opcode; u32 flags; -#define CXL_CMD_FLAG_NONE 0 #define CXL_CMD_FLAG_FORCE_ENABLE BIT(0) }; -- cgit From 814a15f3b4131d3205bd47e23b50ccc6c666ce1d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 2 Feb 2023 17:04:26 -0800 Subject: cxl/uapi: Tag commands from cxl_query_cmd() It was pointed out that commands not supported by the device or excluded by the kernel were being returned in cxl_query_cmd().[1] While libcxl correctly handles failing commands, it is more efficient to not issue an invalid command in the first place. This can't be done without additional information being returned from cxl_query_cmd(). In addition, information about the availability of commands can be useful for debugging. Add flags to struct cxl_command_info which reflect if a command is enabled and/or exclusive to the kernel. [1] https://lore.kernel.org/all/63b4ec4e37cc1_5178e2941d@dwillia2-xfh.jf.intel.com.notmuch/ Suggested-by: Dan Williams Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221222-cxl-misc-v4-3-62f701c1cdd1@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 202d49dd9911..fd3b13b0fb7f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -451,9 +451,14 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * structures. */ cxl_for_each_cmd(cmd) { - const struct cxl_command_info *info = &cmd->info; + struct cxl_command_info info = cmd->info; - if (copy_to_user(&q->commands[j++], info, sizeof(*info))) + if (test_bit(info.id, cxlmd->cxlds->enabled_cmds)) + info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED; + if (test_bit(info.id, cxlmd->cxlds->exclusive_cmds)) + info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE; + + if (copy_to_user(&q->commands[j++], &info, sizeof(info))) return -EFAULT; if (j == n_commands) -- cgit From f57aec443c24d2e8e1f3b5b4856aea12ddda4254 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Feb 2023 17:01:05 -0800 Subject: cxl/pmem: Fix nvdimm registration races A loop of the form: while true; do modprobe cxl_pci; modprobe -r cxl_pci; done ...fails with the following crash signature: BUG: kernel NULL pointer dereference, address: 0000000000000040 [..] RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core] [..] Call Trace: cxl_pmem_ctl+0x121/0x240 [cxl_pmem] nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm] nd_label_data_init+0x135/0x7e0 [libnvdimm] nvdimm_probe+0xd6/0x1c0 [libnvdimm] nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __device_attach_driver+0x85/0x110 bus_for_each_drv+0x7d/0xc0 __device_attach+0xb4/0x1e0 bus_probe_device+0x9f/0xc0 device_add+0x445/0x9c0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x30/0x130 ...namely that the bottom half of async nvdimm device registration runs after the CXL has already torn down the context that cxl_pmem_ctl() needs. Unlike the ACPI NFIT case that benefits from launching multiple nvdimm device registrations in parallel from those listed in the table, CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a synchronous registration path to preclude this scenario. Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices") Cc: Reported-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 08bbbac9a6d0..71cfa1fdf902 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev) return rc; set_bit(NDD_LABELING, &flags); + set_bit(NDD_REGISTER_SYNC, &flags); set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); -- cgit From 7abcb0b10668eaf3c174ff383f3b2a7a8c95fb34 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Feb 2023 11:12:11 +0100 Subject: cxl: avoid returning uninitialized error code The new cxl_add_to_region() function returns an uninitialized value on success: drivers/cxl/core/region.c:2628:6: error: variable 'rc' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] if (IS_ERR(cxlr)) { ^~~~~~~~~~~~ drivers/cxl/core/region.c:2654:9: note: uninitialized use occurs here return rc; Simplify the logic to have the rc variable always initialized in the same place. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230213101220.3821689-1-arnd@kernel.org Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 91bb9ac881ff..8ba71ca4135c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2623,10 +2623,9 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) cxlr = to_cxl_region(region_dev); mutex_unlock(&cxlrd->range_lock); - if (IS_ERR(cxlr)) { - rc = PTR_ERR(cxlr); + rc = PTR_ERR_OR_ZERO(cxlr); + if (rc) goto out; - } attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); -- cgit From 1922a6dc0502ed3fd0786f57cc9e5f515c902009 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Feb 2023 13:28:42 -0700 Subject: cxl: remove unnecessary calling of pci_enable_pcie_error_reporting() With this [1] commit upstream, pci_enable_pci_error_report() is no longer necessary for the driver to call. Remove call and related cleanups. [1]: f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167632012093.4153151.5360778069735064322.stgit@djiang5-mobl3.local Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2bbebbc7e032..cc8b5a766a82 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -412,11 +412,6 @@ static bool is_cxl_restricted(struct pci_dev *pdev) return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; } -static void disable_aer(void *pdev) -{ - pci_disable_pcie_error_reporting(pdev); -} - static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -494,12 +489,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (cxlds->regs.ras) { - pci_enable_pcie_error_reporting(pdev); - rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); - if (rc) - return rc; - } pci_save_state(pdev); return rc; -- cgit From 248529edc86f8d7d390a15a86bd1904951311665 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 10:00:24 -0700 Subject: cxl: add RAS status unmasking for CXL By default the CXL RAS mask registers bits are defaulted to 1's and suppress all error reporting. If the kernel has negotiated ownership of error handling for CXL then unmask the mask registers by writing 0s. PCI_EXP_DEVCTL capability is checked to see uncorrectable or correctable errors bits are set before unmasking the respective errors. Acked-by: Bjorn Helgaas # pci_regs.h Reviewed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167639402301.778884.12556849214955646539.stgit@djiang5-mobl3.local Signed-off-by: Dan Williams --- drivers/cxl/cxl.h | 1 + drivers/cxl/pci.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index aa3af3bb73b2..197ecffce4d0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index cc8b5a766a82..a509640994d7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -412,6 +412,67 @@ static bool is_cxl_restricted(struct pci_dev *pdev) return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; } +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u16 lnksta2; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + +static int cxl_pci_ras_unmask(struct pci_dev *pdev) +{ + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + void __iomem *addr; + u32 orig_val, val, mask; + u16 cap; + int rc; + + if (!cxlds->regs.ras) { + dev_dbg(&pdev->dev, "No RAS registers.\n"); + return 0; + } + + /* BIOS has CXL error control */ + if (!host_bridge->native_cxl_error) + return -ENXIO; + + rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); + if (rc) + return rc; + + if (cap & PCI_EXP_DEVCTL_URRE) { + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + + mask = CXL_RAS_UNCORRECTABLE_MASK_MASK; + if (!cxl_pci_flit_256(pdev)) + mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; + val = orig_val & ~mask; + writel(val, addr); + dev_dbg(&pdev->dev, + "Uncorrectable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + if (cap & PCI_EXP_DEVCTL_CERE) { + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; + writel(val, addr); + dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -489,6 +550,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); + rc = cxl_pci_ras_unmask(pdev); + if (rc) + dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); + pci_save_state(pdev); return rc; -- cgit From 1acba6e9206c655f8eb6736c7cafbf022492f36d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:08 -0800 Subject: cxl/pci: Break out range register decoding from cxl_hdm_decode_init() There are 2 scenarios that requires additional handling. 1. A device that has active ranges in DVSEC range registers (RR) but no HDM decoder register block. 2. A device that has both RR active and HDM, but the HDM decoders are not programmed. The goal is to create emulated decoder software structs based on the RR. Move the CXL DVSEC range register decoding code block from cxl_hdm_decode_init() to its own function. Refactor code in preparation for the HDM decoder emulation. There is no functionality change to the code. Name the new function to cxl_dvsec_rr_decode(). The only change is to set range->start and range->end to CXL_RESOURCE_NONE and skipping the reading of base registers if the range size is 0, which equates to range not active. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640366839.935665.11816388524993234329.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 64 +++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 57764e9cd19d..52bf6b4d093e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -141,11 +141,10 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); -static int wait_for_valid(struct cxl_dev_state *cxlds) +static int wait_for_valid(struct pci_dev *pdev, int d) { - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec, rc; u32 val; + int rc; /* * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high @@ -334,20 +333,11 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, return true; } -/** - * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint - * @cxlds: Device state - * @cxlhdm: Mapped HDM decoder Capability - * - * Try to enable the endpoint's HDM Decoder Capability - */ -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) +static int cxl_dvsec_rr_decode(struct device *dev, int d, + struct cxl_endpoint_dvsec_info *info) { - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - struct cxl_endpoint_dvsec_info info = { 0 }; + struct pci_dev *pdev = to_pci_dev(dev); int hdm_count, rc, i, ranges = 0; - struct device *dev = &pdev->dev; - int d = cxlds->cxl_dvsec; u16 cap, ctrl; if (!d) { @@ -378,7 +368,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) if (!hdm_count || hdm_count > 2) return -EINVAL; - rc = wait_for_valid(cxlds); + rc = wait_for_valid(pdev, d); if (rc) { dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); return rc; @@ -389,9 +379,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ - info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - if (!info.mem_enabled) - goto hdm_init; + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info->mem_enabled) + return 0; for (i = 0; i < hdm_count; i++) { u64 base, size; @@ -410,6 +400,13 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) return rc; size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + if (!size) { + info->dvsec_range[i] = (struct range) { + .start = 0, + .end = CXL_RESOURCE_NONE, + }; + continue; + } rc = pci_read_config_dword( pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); @@ -425,22 +422,41 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - info.dvsec_range[i] = (struct range) { + info->dvsec_range[i] = (struct range) { .start = base, .end = base + size - 1 }; - if (size) - ranges++; + ranges++; } - info.ranges = ranges; + info->ranges = ranges; + + return 0; +} + +/** + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * @cxlhdm: Mapped HDM decoder Capability + * + * Try to enable the endpoint's HDM Decoder Capability + */ +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) +{ + struct cxl_endpoint_dvsec_info info = { 0 }; + struct device *dev = cxlds->dev; + int d = cxlds->cxl_dvsec; + int rc; + + rc = cxl_dvsec_rr_decode(dev, d, &info); + if (rc < 0) + return rc; /* * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ -hdm_init: if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); -- cgit From 59c3368b2e69eb7da7f271286a0bd80930dfc070 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:13 -0800 Subject: cxl/port: Export cxl_dvsec_rr_decode() to cxl_port Call cxl_dvsec_rr_decode() in the beginning of cxl_port_probe() and preserve the decoded information in a local 'struct cxl_endpoint_dvsec_info'. This info can be passed to various functions later on in order to support the HDM decoder emulation. The invocation of cxl_dvsec_rr_decode() in cxl_hdm_decode_init() is removed and a pointer to the 'struct cxl_endpoint_dvsec_info' is passed in. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640367377.935665.2848747799651019676.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 18 +++++++----------- drivers/cxl/cxl.h | 14 ++++++++++++++ drivers/cxl/cxlmem.h | 12 ------------ drivers/cxl/cxlpci.h | 3 ++- drivers/cxl/port.c | 20 +++++++++++++------- 5 files changed, 36 insertions(+), 31 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 52bf6b4d093e..948fa3724a0f 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -333,8 +333,8 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, return true; } -static int cxl_dvsec_rr_decode(struct device *dev, int d, - struct cxl_endpoint_dvsec_info *info) +int cxl_dvsec_rr_decode(struct device *dev, int d, + struct cxl_endpoint_dvsec_info *info) { struct pci_dev *pdev = to_pci_dev(dev); int hdm_count, rc, i, ranges = 0; @@ -434,30 +434,26 @@ static int cxl_dvsec_rr_decode(struct device *dev, int d, return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, CXL); /** * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state * @cxlhdm: Mapped HDM decoder Capability + * @info: Cached DVSEC range registers info * * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_dvsec_info info = { 0 }; struct device *dev = cxlds->dev; - int d = cxlds->cxl_dvsec; - int rc; - - rc = cxl_dvsec_rr_decode(dev, d, &info); - if (rc < 0) - return rc; /* * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { + if (!__cxl_hdm_decode_init(cxlds, cxlhdm, info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1b1cf459ac77..fc01ce96d326 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -630,10 +630,24 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); +/** + * struct cxl_endpoint_dvsec_info - Cached DVSEC info + * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE + * @ranges: Number of active HDM ranges this device uses. + * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE + */ +struct cxl_endpoint_dvsec_info { + bool mem_enabled; + int ranges; + struct range dvsec_range[2]; +}; + struct cxl_hdm; struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); +int cxl_dvsec_rr_decode(struct device *dev, int dvsec, + struct cxl_endpoint_dvsec_info *info); bool is_cxl_region(struct device *dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index ab138004f644..187a310780a9 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -181,18 +181,6 @@ static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) */ #define CXL_CAPACITY_MULTIPLIER SZ_256M -/** - * struct cxl_endpoint_dvsec_info - Cached DVSEC info - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE - * @ranges: Number of active HDM ranges this device uses. - * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE - */ -struct cxl_endpoint_dvsec_info { - bool mem_enabled; - int ranges; - struct range dvsec_range[2]; -}; - /** * struct cxl_dev_state - The driver device state * diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 920909791bb9..430e23345a16 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -64,6 +64,7 @@ enum cxl_regloc_type { int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 5453771bf330..9e09728b20d9 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -32,12 +32,21 @@ static void schedule_detach(void *cxlmd) static int cxl_port_probe(struct device *dev) { + struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_port *port = to_cxl_port(dev); + bool is_ep = is_cxl_endpoint(port); + struct cxl_dev_state *cxlds; + struct cxl_memdev *cxlmd; struct cxl_hdm *cxlhdm; int rc; - - if (!is_cxl_endpoint(port)) { + if (is_ep) { + cxlmd = to_cxl_memdev(port->uport); + cxlds = cxlmd->cxlds; + rc = cxl_dvsec_rr_decode(cxlds->dev, cxlds->cxl_dvsec, &info); + if (rc < 0) + return rc; + } else { rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; @@ -49,10 +58,7 @@ static int cxl_port_probe(struct device *dev) if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); - if (is_cxl_endpoint(port)) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - + if (is_ep) { /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); @@ -61,7 +67,7 @@ static int cxl_port_probe(struct device *dev) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, cxlhdm); + rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); if (rc) return rc; -- cgit From 9de321e93c3b3fd7fd2621a2557c42fa7d43e314 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:19 -0800 Subject: cxl/pci: Refactor cxl_hdm_decode_init() With the previous refactoring of DVSEC range registers out of cxl_hdm_decode_init(), it basically becomes a skeleton function. Squash __cxl_hdm_decode_init() with cxl_hdm_decode_init() to simplify the code. cxl_hdm_decode_init() now returns more error codes than just -EBUSY. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640367916.935665.12898404758336059003.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 135 ++++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 81 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 948fa3724a0f..d0b25481bdce 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -259,80 +259,6 @@ static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) return devm_add_action_or_reset(host, disable_hdm, cxlhdm); } -static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) -{ - void __iomem *hdm = cxlhdm->regs.hdm_decoder; - struct cxl_port *port = cxlhdm->port; - struct device *dev = cxlds->dev; - struct cxl_port *root; - int i, rc, allowed; - u32 global_ctrl; - - global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); - - /* - * If the HDM Decoder Capability is already enabled then assume - * that some other agent like platform firmware set it up. - */ - if (global_ctrl & CXL_HDM_DECODER_ENABLE) { - rc = devm_cxl_enable_mem(&port->dev, cxlds); - if (rc) - return false; - return true; - } - - root = to_cxl_port(port->dev.parent); - while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) - root = to_cxl_port(root->dev.parent); - if (!is_cxl_root(root)) { - dev_err(dev, "Failed to acquire root port for HDM enable\n"); - return false; - } - - for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { - struct device *cxld_dev; - - cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], - dvsec_range_allowed); - if (!cxld_dev) { - dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); - continue; - } - dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); - put_device(cxld_dev); - allowed++; - } - - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; - } - - /* - * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base - * [High,Low] when HDM operation is enabled the range register values - * are ignored by the device, but the spec also recommends matching the - * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges - * are expected even though Linux does not require or maintain that - * match. If at least one DVSEC range is enabled and allowed, skip HDM - * Decoder Capability Enable. - */ - if (info->mem_enabled) - return false; - - rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); - if (rc) - return false; - - rc = devm_cxl_enable_mem(&port->dev, cxlds); - if (rc) - return false; - - return true; -} - int cxl_dvsec_rr_decode(struct device *dev, int d, struct cxl_endpoint_dvsec_info *info) { @@ -447,19 +373,66 @@ EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, CXL); int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) { + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; struct device *dev = cxlds->dev; + struct cxl_port *root; + int i, rc, allowed; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. + * If the HDM Decoder Capability is already enabled then assume + * that some other agent like platform firmware set it up. */ - if (!__cxl_hdm_decode_init(cxlds, cxlhdm, info)) { - dev_err(dev, - "Legacy range registers configuration prevents HDM operation.\n"); - return -EBUSY; + if (global_ctrl & CXL_HDM_DECODER_ENABLE) + return devm_cxl_enable_mem(&port->dev, cxlds); + + root = to_cxl_port(port->dev.parent); + while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) + root = to_cxl_port(root->dev.parent); + if (!is_cxl_root(root)) { + dev_err(dev, "Failed to acquire root port for HDM enable\n"); + return -ENODEV; } - return 0; + for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { + struct device *cxld_dev; + + cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], + dvsec_range_allowed); + if (!cxld_dev) { + dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); + continue; + } + dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); + put_device(cxld_dev); + allowed++; + } + + if (!allowed) { + cxl_set_mem_enable(cxlds, 0); + info->mem_enabled = 0; + } + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. If at least one DVSEC range is enabled and allowed, skip HDM + * Decoder Capability Enable. + */ + if (info->mem_enabled) + return -EBUSY; + + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); + if (rc) + return rc; + + return devm_cxl_enable_mem(&port->dev, cxlds); } EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); -- cgit From b777e9bec960a29374dc486d47784c73b7ac4cef Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:24 -0800 Subject: cxl/hdm: Emulate HDM decoder from DVSEC range registers In the case where HDM decoder register block exists but is not programmed and at the same time the DVSEC range register range is active, populate the CXL decoder object 'cxl_decoder' with info from DVSEC range registers. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640368454.935665.13806415120298330717.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 36 +++++++++++++++++++++++++++++++++--- drivers/cxl/core/pci.c | 2 +- drivers/cxl/cxl.h | 3 ++- drivers/cxl/port.c | 2 +- 4 files changed, 37 insertions(+), 6 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index dcc16d7cb8f3..c0f224454447 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -679,9 +679,34 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) return 0; } +static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, + struct cxl_decoder *cxld, int which, + struct cxl_endpoint_dvsec_info *info) +{ + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + if (!range_len(&info->dvsec_range[which])) + return -ENOENT; + + cxld->target_type = CXL_DECODER_EXPANDER; + cxld->commit = NULL; + cxld->reset = NULL; + cxld->hpa_range = info->dvsec_range[which]; + + /* + * Set the emulated decoder as locked pending additional support to + * change the range registers at run time. + */ + cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK; + port->commit_end = cxld->id; + + return 0; +} + static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, - u64 *dpa_base) + u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { struct cxl_endpoint_decoder *cxled = NULL; u64 size, base, skip, dpa_size; @@ -717,6 +742,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, .end = base + size - 1, }; + if (cxled && !committed && range_len(&info->dvsec_range[which])) + return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); + /* decoders are enabled if committed */ if (committed) { cxld->flags |= CXL_DECODER_F_ENABLE; @@ -790,7 +818,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, * devm_cxl_enumerate_decoders - add decoder objects per HDM register set * @cxlhdm: Structure to populate with HDM capabilities */ -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; @@ -842,7 +871,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxld = &cxlsd->cxld; } - rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base); + rc = init_hdm_decoder(port, cxld, target_map, hdm, i, + &dpa_base, info); if (rc) { put_device(&cxld->dev); return rc; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index d0b25481bdce..4df0b35c9b1a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -426,7 +426,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, * Decoder Capability Enable. */ if (info->mem_enabled) - return -EBUSY; + return 0; rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); if (rc) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index fc01ce96d326..fe9d75989c8a 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -644,7 +644,8 @@ struct cxl_endpoint_dvsec_info { struct cxl_hdm; struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm); +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); int cxl_dvsec_rr_decode(struct device *dev, int dvsec, struct cxl_endpoint_dvsec_info *info); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 9e09728b20d9..d3a708e32565 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -78,7 +78,7 @@ static int cxl_port_probe(struct device *dev) } } - rc = devm_cxl_enumerate_decoders(cxlhdm); + rc = devm_cxl_enumerate_decoders(cxlhdm, &info); if (rc) { dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); return rc; -- cgit From 4474ce565ee4490fb4e6d8443b617a9d98ae10ff Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:30 -0800 Subject: cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders CXL rev3 spec 8.1.3 RCDs may not have HDM register blocks. Create a fake HDM with information from the CXL PCIe DVSEC registers. The decoder count will be set to the HDM count retrieved from the DVSEC cap register. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640368994.935665.15831225724059704620.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 58 +++++++++++++++++++++++++++++++++++++++++--------- drivers/cxl/core/pci.c | 9 +++++--- drivers/cxl/cxl.h | 3 ++- drivers/cxl/port.c | 2 +- 4 files changed, 57 insertions(+), 15 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index c0f224454447..a49543f22dca 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -101,11 +101,34 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, BIT(CXL_CM_CAP_CAP_ID_HDM)); } +static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info) +{ + struct device *dev = &port->dev; + struct cxl_hdm *cxlhdm; + + if (!info->mem_enabled) + return ERR_PTR(-ENODEV); + + cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + cxlhdm->decoder_count = info->ranges; + cxlhdm->target_count = info->ranges; + dev_set_drvdata(&port->dev, cxlhdm); + + return cxlhdm; +} + /** * devm_cxl_setup_hdm - map HDM decoder component registers * @port: cxl_port to map + * @info: cached DVSEC range register info */ -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info) { struct device *dev = &port->dev; struct cxl_hdm *cxlhdm; @@ -119,6 +142,9 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) cxlhdm->port = port; crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { + if (info->mem_enabled) + return devm_cxl_setup_emulated_hdm(port, info); + dev_err(dev, "No component registers mapped\n"); return ERR_PTR(-ENXIO); } @@ -814,19 +840,15 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return 0; } -/** - * devm_cxl_enumerate_decoders - add decoder objects per HDM register set - * @cxlhdm: Structure to populate with HDM capabilities - */ -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +static void cxl_settle_decoders(struct cxl_hdm *cxlhdm) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; - struct cxl_port *port = cxlhdm->port; - int i, committed; - u64 dpa_base = 0; + int committed, i; u32 ctrl; + if (!hdm) + return; + /* * Since the register resource was recently claimed via request_region() * be careful about trusting the "not-committed" status until the commit @@ -843,6 +865,22 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, /* ensure that future checks of committed can be trusted */ if (committed != cxlhdm->decoder_count) msleep(20); +} + +/** + * devm_cxl_enumerate_decoders - add decoder objects per HDM register set + * @cxlhdm: Structure to populate with HDM capabilities + * @info: cached DVSEC range register info + */ +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; + int i; + u64 dpa_base = 0; + + cxl_settle_decoders(cxlhdm); for (i = 0; i < cxlhdm->decoder_count; i++) { int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 4df0b35c9b1a..4eb34dee7c95 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -378,16 +378,19 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, struct device *dev = cxlds->dev; struct cxl_port *root; int i, rc, allowed; - u32 global_ctrl; + u32 global_ctrl = 0; - global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + if (hdm) + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); /* * If the HDM Decoder Capability is already enabled then assume * that some other agent like platform firmware set it up. */ - if (global_ctrl & CXL_HDM_DECODER_ENABLE) + if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled)) return devm_cxl_enable_mem(&port->dev, cxlds); + else if (!hdm) + return -ENODEV; root = to_cxl_port(port->dev.parent); while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index fe9d75989c8a..f8cbc5275451 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -643,7 +643,8 @@ struct cxl_endpoint_dvsec_info { }; struct cxl_hdm; -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info); int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); int devm_cxl_add_passthrough_decoder(struct cxl_port *port); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d3a708e32565..9f9cc268b597 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -54,7 +54,7 @@ static int cxl_port_probe(struct device *dev) return devm_cxl_add_passthrough_decoder(port); } - cxlhdm = devm_cxl_setup_hdm(port); + cxlhdm = devm_cxl_setup_hdm(port, &info); if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); -- cgit From d7a2153762c7369534c432ac7e86996063e246df Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:35 -0800 Subject: cxl/hdm: Add emulation when HDM decoders are not committed For the case where DVSEC range register(s) are active and HDM decoders are not committed, use RR to provide emulation. A first pass is done to note whether any decoders are committed. If there are no committed endpoint decoders, then DVSEC ranges will be used for emulation. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640369536.935665.611974113442400127.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index a49543f22dca..39e02f28b6a6 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -730,6 +730,32 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port, return 0; } +static bool should_emulate_decoders(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + u32 ctrl; + int i; + + if (!is_cxl_endpoint(cxlhdm->port)) + return false; + + if (!hdm) + return true; + + /* + * If any decoders are committed already, there should not be any + * emulated DVSEC decoders. + */ + for (i = 0; i < cxlhdm->decoder_count; i++) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) + return false; + } + + return true; +} + static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) @@ -745,6 +771,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, unsigned char target_id[8]; } target_list; + if (should_emulate_decoders(port)) + return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info); + if (is_endpoint_decoder(&cxld->dev)) cxled = to_cxl_endpoint_decoder(&cxld->dev); -- cgit From 6980daaa3ed5959bf4fe2719d96b1da437026b58 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 11:41:40 -0800 Subject: cxl/pci: Remove locked check for dvsec_range_allowed() Remove the CXL_DECODER_F_LOCK check to be permissive of platform BIOSes that allow CXL.mem to be remapped. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167640370085.935665.13128321011001358077.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 4eb34dee7c95..c18ed1bbb54d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -228,8 +228,6 @@ static int dvsec_range_allowed(struct device *dev, void *arg) cxld = to_cxl_decoder(dev); - if (!(cxld->flags & CXL_DECODER_F_LOCK)) - return 0; if (!(cxld->flags & CXL_DECODER_F_RAM)) return 0; -- cgit From 0c8393dcdb1ef4dc358d95a2523297175135874b Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 14 Feb 2023 20:12:43 -0800 Subject: cxl/trace: Standardize device information output The trace points were written to take a struct device input for the trace. In CXL multiple device objects are associated with each CXL hardware device. Using different device objects in the trace point can lead to confusion for users. The PCIe device is nice to have, but the user space tooling relies on the memory device naming. It is better to have those device names reported. Change all trace points to take struct cxl_memdev as a standard and report that name. Furthermore, standardize on the name 'memdev' in both /sys/kernel/tracing/trace and cxl-cli monitor output. Cc: Steven Rostedt Cc: Dave Jiang Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20230208-cxl-event-names-v2-1-fca130c2c68b@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 14 +++++----- drivers/cxl/core/pci.c | 8 ++---- drivers/cxl/core/trace.h | 70 ++++++++++++++++++++++++------------------------ 3 files changed, 44 insertions(+), 48 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index a12255907852..eec100f0007b 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -743,7 +743,7 @@ static const uuid_t mem_mod_event_uuid = UUID_INIT(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74); -static void cxl_event_trace_record(const struct device *dev, +static void cxl_event_trace_record(const struct cxl_memdev *cxlmd, enum cxl_event_log_type type, struct cxl_event_record_raw *record) { @@ -753,19 +753,19 @@ static void cxl_event_trace_record(const struct device *dev, struct cxl_event_gen_media *rec = (struct cxl_event_gen_media *)record; - trace_cxl_general_media(dev, type, rec); + trace_cxl_general_media(cxlmd, type, rec); } else if (uuid_equal(id, &dram_event_uuid)) { struct cxl_event_dram *rec = (struct cxl_event_dram *)record; - trace_cxl_dram(dev, type, rec); + trace_cxl_dram(cxlmd, type, rec); } else if (uuid_equal(id, &mem_mod_event_uuid)) { struct cxl_event_mem_module *rec = (struct cxl_event_mem_module *)record; - trace_cxl_memory_module(dev, type, rec); + trace_cxl_memory_module(cxlmd, type, rec); } else { /* For unknown record types print just the header */ - trace_cxl_generic_event(dev, type, record); + trace_cxl_generic_event(cxlmd, type, record); } } @@ -872,11 +872,11 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds, break; for (i = 0; i < nr_rec; i++) - cxl_event_trace_record(cxlds->dev, type, + cxl_event_trace_record(cxlds->cxlmd, type, &payload->records[i]); if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW) - trace_cxl_overflow(cxlds->dev, type, payload); + trace_cxl_overflow(cxlds->cxlmd, type, payload); rc = cxl_clear_event_record(cxlds, type, payload); if (rc) { diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 1d1492440287..fa263370d65a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -627,8 +627,6 @@ EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); void cxl_cor_error_detected(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; void __iomem *addr; u32 status; @@ -639,7 +637,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(dev, status); + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); @@ -667,8 +665,6 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) */ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) { - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; u32 status; @@ -691,7 +687,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) } header_log_copy(cxlds, hl); - trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index c72ef9321cfe..93ac651a2f21 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -47,16 +47,16 @@ ) TRACE_EVENT(cxl_aer_uncorrectable_error, - TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl), - TP_ARGS(dev, status, fe, hl), + TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), + TP_ARGS(cxlmd, status, fe, hl), TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) + __string(memdev, dev_name(&cxlmd->dev)) __field(u32, status) __field(u32, first_error) __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) ), TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); + __assign_str(memdev, dev_name(&cxlmd->dev)); __entry->status = status; __entry->first_error = fe; /* @@ -65,8 +65,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, */ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); ), - TP_printk("%s: status: '%s' first_error: '%s'", - __get_str(dev_name), + TP_printk("memdev=%s: status: '%s' first_error: '%s'", + __get_str(memdev), show_uc_errs(__entry->status), show_uc_errs(__entry->first_error) ) @@ -91,18 +91,18 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, ) TRACE_EVENT(cxl_aer_correctable_error, - TP_PROTO(const struct device *dev, u32 status), - TP_ARGS(dev, status), + TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), + TP_ARGS(cxlmd, status), TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) + __string(memdev, dev_name(&cxlmd->dev)) __field(u32, status) ), TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); + __assign_str(memdev, dev_name(&cxlmd->dev)); __entry->status = status; ), - TP_printk("%s: status: '%s'", - __get_str(dev_name), show_ce_errs(__entry->status) + TP_printk("memdev=%s: status: '%s'", + __get_str(memdev), show_ce_errs(__entry->status) ) ); @@ -115,13 +115,13 @@ TRACE_EVENT(cxl_aer_correctable_error, TRACE_EVENT(cxl_overflow, - TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, struct cxl_get_event_payload *payload), - TP_ARGS(dev, log, payload), + TP_ARGS(cxlmd, log, payload), TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) + __string(memdev, dev_name(&cxlmd->dev)) __field(int, log) __field(u64, first_ts) __field(u64, last_ts) @@ -129,15 +129,15 @@ TRACE_EVENT(cxl_overflow, ), TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); + __assign_str(memdev, dev_name(&cxlmd->dev)); __entry->log = log; __entry->count = le16_to_cpu(payload->overflow_err_count); __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp); __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp); ), - TP_printk("%s: log=%s : %u records from %llu to %llu", - __get_str(dev_name), cxl_event_log_type_str(__entry->log), + TP_printk("memdev=%s: log=%s : %u records from %llu to %llu", + __get_str(memdev), cxl_event_log_type_str(__entry->log), __entry->count, __entry->first_ts, __entry->last_ts) ); @@ -170,7 +170,7 @@ TRACE_EVENT(cxl_overflow, * See the generic_event tracepoint as an example. */ #define CXL_EVT_TP_entry \ - __string(dev_name, dev_name(dev)) \ + __string(memdev, dev_name(&cxlmd->dev)) \ __field(int, log) \ __field_struct(uuid_t, hdr_uuid) \ __field(u32, hdr_flags) \ @@ -180,8 +180,8 @@ TRACE_EVENT(cxl_overflow, __field(u8, hdr_length) \ __field(u8, hdr_maint_op_class) -#define CXL_EVT_TP_fast_assign(dev, l, hdr) \ - __assign_str(dev_name, dev_name(dev)); \ +#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ + __assign_str(memdev, dev_name(&(cxlmd)->dev)); \ __entry->log = (l); \ memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ __entry->hdr_length = (hdr).length; \ @@ -192,10 +192,10 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_maint_op_class = (hdr).maint_op_class #define CXL_EVT_TP_printk(fmt, ...) \ - TP_printk("%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \ + TP_printk("memdev=%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \ "handle=%x related_handle=%x maint_op_class=%u" \ " : " fmt, \ - __get_str(dev_name), cxl_event_log_type_str(__entry->log), \ + __get_str(memdev), cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ @@ -203,10 +203,10 @@ TRACE_EVENT(cxl_overflow, TRACE_EVENT(cxl_generic_event, - TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, struct cxl_event_record_raw *rec), - TP_ARGS(dev, log, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -214,7 +214,7 @@ TRACE_EVENT(cxl_generic_event, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH); ), @@ -293,10 +293,10 @@ TRACE_EVENT(cxl_generic_event, TRACE_EVENT(cxl_general_media, - TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, struct cxl_event_gen_media *rec), - TP_ARGS(dev, log, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -315,7 +315,7 @@ TRACE_EVENT(cxl_general_media, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); /* General Media */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -376,10 +376,10 @@ TRACE_EVENT(cxl_general_media, TRACE_EVENT(cxl_dram, - TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, struct cxl_event_dram *rec), - TP_ARGS(dev, log, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -401,7 +401,7 @@ TRACE_EVENT(cxl_dram, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); /* DRAM */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -525,10 +525,10 @@ TRACE_EVENT(cxl_dram, TRACE_EVENT(cxl_memory_module, - TP_PROTO(const struct device *dev, enum cxl_event_log_type log, + TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log, struct cxl_event_mem_module *rec), - TP_ARGS(dev, log, rec), + TP_ARGS(cxlmd, log, rec), TP_STRUCT__entry( CXL_EVT_TP_entry @@ -548,7 +548,7 @@ TRACE_EVENT(cxl_memory_module, ), TP_fast_assign( - CXL_EVT_TP_fast_assign(dev, log, rec->hdr); + CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); /* Memory Module Event */ __entry->event_type = rec->event_type; -- cgit From cd0570172dd8427e0bf36b4d70f0b595616da55d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 14 Feb 2023 20:12:44 -0800 Subject: cxl/trace: Add host output to trace points The host parameter of where the memdev is connected is useful information. Report host consistently in all trace points. Cc: Steven Rostedt Cc: Dave Jiang Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20230208-cxl-event-names-v2-2-fca130c2c68b@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/trace.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 93ac651a2f21..ed69e558187b 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -51,12 +51,14 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, TP_ARGS(cxlmd, status, fe, hl), TP_STRUCT__entry( __string(memdev, dev_name(&cxlmd->dev)) + __string(host, dev_name(cxlmd->dev.parent)) __field(u32, status) __field(u32, first_error) __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) ), TP_fast_assign( __assign_str(memdev, dev_name(&cxlmd->dev)); + __assign_str(host, dev_name(cxlmd->dev.parent)); __entry->status = status; __entry->first_error = fe; /* @@ -65,8 +67,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, */ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); ), - TP_printk("memdev=%s: status: '%s' first_error: '%s'", - __get_str(memdev), + TP_printk("memdev=%s host=%s: status: '%s' first_error: '%s'", + __get_str(memdev), __get_str(host), show_uc_errs(__entry->status), show_uc_errs(__entry->first_error) ) @@ -95,14 +97,17 @@ TRACE_EVENT(cxl_aer_correctable_error, TP_ARGS(cxlmd, status), TP_STRUCT__entry( __string(memdev, dev_name(&cxlmd->dev)) + __string(host, dev_name(cxlmd->dev.parent)) __field(u32, status) ), TP_fast_assign( __assign_str(memdev, dev_name(&cxlmd->dev)); + __assign_str(host, dev_name(cxlmd->dev.parent)); __entry->status = status; ), - TP_printk("memdev=%s: status: '%s'", - __get_str(memdev), show_ce_errs(__entry->status) + TP_printk("memdev=%s host=%s: status: '%s'", + __get_str(memdev), __get_str(host), + show_ce_errs(__entry->status) ) ); @@ -122,6 +127,7 @@ TRACE_EVENT(cxl_overflow, TP_STRUCT__entry( __string(memdev, dev_name(&cxlmd->dev)) + __string(host, dev_name(cxlmd->dev.parent)) __field(int, log) __field(u64, first_ts) __field(u64, last_ts) @@ -130,15 +136,17 @@ TRACE_EVENT(cxl_overflow, TP_fast_assign( __assign_str(memdev, dev_name(&cxlmd->dev)); + __assign_str(host, dev_name(cxlmd->dev.parent)); __entry->log = log; __entry->count = le16_to_cpu(payload->overflow_err_count); __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp); __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp); ), - TP_printk("memdev=%s: log=%s : %u records from %llu to %llu", - __get_str(memdev), cxl_event_log_type_str(__entry->log), - __entry->count, __entry->first_ts, __entry->last_ts) + TP_printk("memdev=%s host=%s: log=%s : %u records from %llu to %llu", + __get_str(memdev), __get_str(host), + cxl_event_log_type_str(__entry->log), __entry->count, + __entry->first_ts, __entry->last_ts) ); @@ -171,6 +179,7 @@ TRACE_EVENT(cxl_overflow, */ #define CXL_EVT_TP_entry \ __string(memdev, dev_name(&cxlmd->dev)) \ + __string(host, dev_name(cxlmd->dev.parent)) \ __field(int, log) \ __field_struct(uuid_t, hdr_uuid) \ __field(u32, hdr_flags) \ @@ -182,6 +191,7 @@ TRACE_EVENT(cxl_overflow, #define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \ __assign_str(memdev, dev_name(&(cxlmd)->dev)); \ + __assign_str(host, dev_name((cxlmd)->dev.parent)); \ __entry->log = (l); \ memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ __entry->hdr_length = (hdr).length; \ @@ -192,10 +202,11 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_maint_op_class = (hdr).maint_op_class #define CXL_EVT_TP_printk(fmt, ...) \ - TP_printk("memdev=%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \ - "handle=%x related_handle=%x maint_op_class=%u" \ + TP_printk("memdev=%s host=%s log=%s : time=%llu uuid=%pUb len=%d " \ + "flags='%s' handle=%x related_handle=%x maint_op_class=%u" \ " : " fmt, \ - __get_str(memdev), cxl_event_log_type_str(__entry->log), \ + __get_str(memdev), __get_str(host), \ + cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \ -- cgit From 279676c9aa5e5f11377dddfdecfd22df526ee9b8 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 14 Feb 2023 20:12:45 -0800 Subject: cxl/trace: Add serial number to trace points Device serial numbers are useful information for the user. Add device serial numbers to all the trace points. Cc: Steven Rostedt Cc: Dave Jiang Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20230208-cxl-event-names-v2-3-fca130c2c68b@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/trace.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index ed69e558187b..9b8d3d997834 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -52,6 +52,7 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, TP_STRUCT__entry( __string(memdev, dev_name(&cxlmd->dev)) __string(host, dev_name(cxlmd->dev.parent)) + __field(u64, serial) __field(u32, status) __field(u32, first_error) __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) @@ -59,6 +60,7 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, TP_fast_assign( __assign_str(memdev, dev_name(&cxlmd->dev)); __assign_str(host, dev_name(cxlmd->dev.parent)); + __entry->serial = cxlmd->cxlds->serial; __entry->status = status; __entry->first_error = fe; /* @@ -67,8 +69,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, */ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); ), - TP_printk("memdev=%s host=%s: status: '%s' first_error: '%s'", - __get_str(memdev), __get_str(host), + TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: '%s'", + __get_str(memdev), __get_str(host), __entry->serial, show_uc_errs(__entry->status), show_uc_errs(__entry->first_error) ) @@ -98,15 +100,17 @@ TRACE_EVENT(cxl_aer_correctable_error, TP_STRUCT__entry( __string(memdev, dev_name(&cxlmd->dev)) __string(host, dev_name(cxlmd->dev.parent)) + __field(u64, serial) __field(u32, status) ), TP_fast_assign( __assign_str(memdev, dev_name(&cxlmd->dev)); __assign_str(host, dev_name(cxlmd->dev.parent)); + __entry->serial = cxlmd->cxlds->serial; __entry->status = status; ), - TP_printk("memdev=%s host=%s: status: '%s'", - __get_str(memdev), __get_str(host), + TP_printk("memdev=%s host=%s serial=%lld: status: '%s'", + __get_str(memdev), __get_str(host), __entry->serial, show_ce_errs(__entry->status) ) ); @@ -129,6 +133,7 @@ TRACE_EVENT(cxl_overflow, __string(memdev, dev_name(&cxlmd->dev)) __string(host, dev_name(cxlmd->dev.parent)) __field(int, log) + __field(u64, serial) __field(u64, first_ts) __field(u64, last_ts) __field(u16, count) @@ -137,14 +142,15 @@ TRACE_EVENT(cxl_overflow, TP_fast_assign( __assign_str(memdev, dev_name(&cxlmd->dev)); __assign_str(host, dev_name(cxlmd->dev.parent)); + __entry->serial = cxlmd->cxlds->serial; __entry->log = log; __entry->count = le16_to_cpu(payload->overflow_err_count); __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp); __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp); ), - TP_printk("memdev=%s host=%s: log=%s : %u records from %llu to %llu", - __get_str(memdev), __get_str(host), + TP_printk("memdev=%s host=%s serial=%lld: log=%s : %u records from %llu to %llu", + __get_str(memdev), __get_str(host), __entry->serial, cxl_event_log_type_str(__entry->log), __entry->count, __entry->first_ts, __entry->last_ts) @@ -182,6 +188,7 @@ TRACE_EVENT(cxl_overflow, __string(host, dev_name(cxlmd->dev.parent)) \ __field(int, log) \ __field_struct(uuid_t, hdr_uuid) \ + __field(u64, serial) \ __field(u32, hdr_flags) \ __field(u16, hdr_handle) \ __field(u16, hdr_related_handle) \ @@ -193,6 +200,7 @@ TRACE_EVENT(cxl_overflow, __assign_str(memdev, dev_name(&(cxlmd)->dev)); \ __assign_str(host, dev_name((cxlmd)->dev.parent)); \ __entry->log = (l); \ + __entry->serial = (cxlmd)->cxlds->serial; \ memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \ __entry->hdr_length = (hdr).length; \ __entry->hdr_flags = get_unaligned_le24((hdr).flags); \ @@ -202,10 +210,10 @@ TRACE_EVENT(cxl_overflow, __entry->hdr_maint_op_class = (hdr).maint_op_class #define CXL_EVT_TP_printk(fmt, ...) \ - TP_printk("memdev=%s host=%s log=%s : time=%llu uuid=%pUb len=%d " \ - "flags='%s' handle=%x related_handle=%x maint_op_class=%u" \ - " : " fmt, \ - __get_str(memdev), __get_str(host), \ + TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \ + "len=%d flags='%s' handle=%x related_handle=%x " \ + "maint_op_class=%u : " fmt, \ + __get_str(memdev), __get_str(host), __entry->serial, \ cxl_event_log_type_str(__entry->log), \ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \ -- cgit From 1bb31131231ddef851bd01299f19603d38a111de Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Thu, 16 Feb 2023 11:24:26 -0800 Subject: cxl/mem: Add kdoc param for event log driver state This makes the kernel-doc for cxl_dev_state complete. Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/20230216192426.1184606-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxlmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 802b5b396daf..fb160c354a88 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -257,6 +257,7 @@ struct cxl_event_state { * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number * @doe_mbs: PCI DOE mailbox array + * @event: event log driver state * @mbox_send: @dev specific transport for transmitting mailbox commands * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for -- cgit