From 36a1c2ee50f573972aea3c3019555f47ee0094c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 17 Nov 2023 13:18:48 -0700 Subject: cxl/hdm: Fix a benign lockdep splat The new helper "cxl_num_decoders_committed()" added a lockdep assertion to validate that port->commit_end is protected against modification. That assertion fires in init_hdm_decoder() where it is initializing port->commit_end. Given that it is both accessing and writing that property it obstensibly needs the lock. In practice, CXL decoder commit rules (must commit in order) and the in-order discovery of device decoders makes the manipulation of ->commit_end in init_hdm_decoder() safe. However, rather than rely on the subtle rules of CXL hardware, just make the implementation obviously correct from a software perspective. The Fixes: tag is only for cleaning up a lockdep splat, there is no functional issue addressed by this fix. Fixes: 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170025232811.2147250.16376901801315194121.stgit@djiang5-mobl3 Acked-by: Davidlohr Bueso Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 1cc9be85ba4c..529baa8a1759 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -839,6 +839,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->target_type = CXL_DECODER_HOSTONLYMEM; else cxld->target_type = CXL_DECODER_DEVMEM; + + guard(rwsem_write)(&cxl_region_rwsem); if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", -- cgit From 5558b92e8d39e18aa19619be2ee37274e9592528 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sun, 26 Nov 2023 16:09:29 -0800 Subject: cxl/core: Always hold region_rwsem while reading poison lists A read of a device poison list is triggered via a sysfs attribute and the results are logged as kernel trace events of type cxl_poison. The work is managed by either: a) the region driver when one of more regions map the device, or by b) the memdev driver when no regions map the device. In the case of a) the region driver holds the region_rwsem while reading the poison by committed endpoint decoder mappings and for any unmapped resources. This makes sure that the cxl_poison trace event trace reports valid region info. (Region name, HPA, and UUID). In the case of b) the memdev driver holds the dpa_rwsem preventing new DPA resources from being attached to a region. However, it leaves a gap between region attach and decoder commit actions. If a DPA in the gap is in the poison list, the cxl_poison trace event will omit the region info. Close the gap by holding the region_rwsem and the dpa_rwsem when reading poison per memdev. Since both methods now hold both locks, down_read both from the caller. Doing so also addresses the lockdep assert that found this issue: Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") Fixes: f0832a586396 ("cxl/region: Provide region info to the cxl_poison trace event") Signed-off-by: Alison Schofield Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/08e8e7ec9a3413b91d51de39e385653494b1eed0.1701041440.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 9 ++++++++- drivers/cxl/core/region.c | 5 ----- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index fc5c2b414793..5ad1b13e780a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -227,10 +227,16 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (!port || !is_cxl_endpoint(port)) return -EINVAL; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ rc = cxl_get_poison_by_memdev(cxlmd); @@ -239,6 +245,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) rc = cxl_get_poison_by_endpoint(port); } up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 56e575c79bb4..3e817a6f94c6 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2467,10 +2467,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) struct cxl_poison_context ctx; int rc = 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) - return rc; - ctx = (struct cxl_poison_context) { .port = port }; @@ -2480,7 +2476,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev), &ctx); - up_read(&cxl_region_rwsem); return rc; } -- cgit From 0e33ac9c3ffe5e4f55c68345f44cea7fec2fe750 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Sun, 26 Nov 2023 16:09:30 -0800 Subject: cxl/memdev: Hold region_rwsem during inject and clear poison ops Poison inject and clear are supported via debugfs where a privileged user can inject and clear poison to a device physical address. Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") added a lockdep assert that highlighted a gap in poison inject and clear functions where holding the dpa_rwsem does not assure that a a DPA is not added to a region. The impact for inject and clear is that if the DPA address being injected or cleared has been attached to a region, but not yet committed, the dev_dbg() message intended to alert the debug user that they are acting on a mapped address is not emitted. Also, the cxl_poison trace event that serves as a log of the inject and clear activity will not include region info. Close this gap by snapshotting an unchangeable region state during poison inject and clear operations. That means holding both the region_rwsem and the dpa_rwsem during the inject and clear ops. Fixes: d2fbc4865802 ("cxl/memdev: Add support for the Inject Poison mailbox command") Fixes: 9690b07748d1 ("cxl/memdev: Add support for the Clear Poison mailbox command") Signed-off-by: Alison Schofield Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/08721dc1df0a51e4e38fecd02425c3475912dfd5.1701041440.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 5ad1b13e780a..2f43d368ba07 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -331,10 +331,16 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) goto out; @@ -362,6 +368,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); out: up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } @@ -379,10 +386,16 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) goto out; @@ -419,6 +432,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); out: up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } -- cgit From 6f5c4eca48ffe18307b4e1d375817691c9005c87 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Dec 2023 19:11:14 -0800 Subject: cxl/hdm: Fix dpa translation locking The helper, cxl_dpa_resource_start(), snapshots the dpa-address of an endpoint-decoder after acquiring the cxl_dpa_rwsem. However, it is sufficient to assert that cxl_dpa_rwsem is held rather than acquire it in the helper. Otherwise, it triggers multiple lockdep reports: 1/ Tracing callbacks are in an atomic context that can not acquire sleeping locks: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1525 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1288, name: bash preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 [..] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc38 05/24/2023 Call Trace: dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] 2/ The rwsem is already held in the inject poison path: WARNING: possible recursive locking detected 6.7.0-rc2+ #12 Tainted: G W OE N -------------------------------------------- bash/1288 is trying to acquire lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_dpa_resource_start+0x15/0x50 [cxl_core] but task is already holding lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_inject_poison+0x7d/0x1e0 [cxl_core] [..] Call Trace: dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] __traceiter_cxl_poison+0x5c/0x80 [cxl_core] cxl_inject_poison+0x1bc/0x1e0 [cxl_core] This appears to have been an issue since the initial implementation and uncovered by the new cxl-poison.sh test [1]. That test is now passing with these changes. Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events") Link: http://lore.kernel.org/r/e4f2716646918135ddbadf4146e92abb659de734.1700615159.git.alison.schofield@intel.com [1] Cc: Cc: Alison Schofield Cc: Jonathan Cameron Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 3 +-- drivers/cxl/core/port.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 529baa8a1759..7d97790b893d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -363,10 +363,9 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) { resource_size_t base = -1; - down_read(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_dpa_rwsem); if (cxled->dpa_res) base = cxled->dpa_res->start; - up_read(&cxl_dpa_rwsem); return base; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 38441634e4c6..b7c93bb18f6e 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -226,9 +226,9 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - u64 base = cxl_dpa_resource_start(cxled); - return sysfs_emit(buf, "%#llx\n", base); + guard(rwsem_read)(&cxl_dpa_rwsem); + return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled)); } static DEVICE_ATTR_RO(dpa_resource); -- cgit From c65efe3685f5d150eeca5599afeabdc85da899d1 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 16 Nov 2023 16:03:29 -0800 Subject: cxl/cdat: Free correct buffer on checksum error The new 6.7-rc1 kernel now checks the checksum on CDAT data. While using a branch of Fan's DCD qemu work (and specifying DCD devices), the following splat was observed. WARNING: CPU: 1 PID: 1384 at drivers/base/devres.c:1064 devm_kfree+0x4f/0x60 ... RIP: 0010:devm_kfree+0x4f/0x60 ... ? devm_kfree+0x4f/0x60 read_cdat_data+0x1a0/0x2a0 [cxl_core] cxl_port_probe+0xdf/0x200 [cxl_port] ... The issue in qemu is still unknown but the spat is a straight forward bug in the CDAT checksum processing code. Use a CDAT buffer variable to ensure the devm_free() works correctly on error. Fixes: 670e4e88f3b1 ("cxl: Add checksum verification to CDAT from CXL") Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Robert Richter Link: http://lore.kernel.org/r/20231116-fix-cdat-devm-free-v1-1-b148b40707d7@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index eff20e83d0a6..37e1652afbc7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -620,7 +620,7 @@ void read_cdat_data(struct cxl_port *port) struct pci_dev *pdev = NULL; struct cxl_memdev *cxlmd; size_t cdat_length; - void *cdat_table; + void *cdat_table, *cdat_buf; int rc; if (is_cxl_memdev(uport)) { @@ -651,16 +651,15 @@ void read_cdat_data(struct cxl_port *port) return; } - cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32), - GFP_KERNEL); - if (!cdat_table) + cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL); + if (!cdat_buf) return; - rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); + rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length); if (rc) goto err; - cdat_table = cdat_table + sizeof(__le32); + cdat_table = cdat_buf + sizeof(__le32); if (cdat_checksum(cdat_table, cdat_length)) goto err; @@ -670,7 +669,7 @@ void read_cdat_data(struct cxl_port *port) err: /* Don't leave table data allocated on error */ - devm_kfree(dev, cdat_table); + devm_kfree(dev, cdat_buf); dev_err(dev, "Failed to read/validate CDAT.\n"); } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -- cgit From ef3d5cf9c59cccb012aa6b93d99f4c6eb5d6648e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 16 Oct 2023 16:25:05 -0700 Subject: cxl/pmu: Ensure put_device on pmu devices The following kmemleaks were detected when removing the cxl module stack: unreferenced object 0xffff88822616b800 (size 1024): ... backtrace: [<00000000bedc6f83>] kmalloc_trace+0x26/0x90 [<00000000448d1afc>] devm_cxl_pmu_add+0x3a/0x110 [cxl_core] [<00000000ca3bfe16>] 0xffffffffa105213b [<00000000ba7f78dc>] local_pci_probe+0x41/0x90 [<000000005bb027ac>] pci_device_probe+0xb0/0x1c0 ... unreferenced object 0xffff8882260abcc0 (size 16): ... hex dump (first 16 bytes): 70 6d 75 5f 6d 65 6d 30 2e 30 00 26 82 88 ff ff pmu_mem0.0.&.... backtrace: ... [<00000000152b5e98>] dev_set_name+0x43/0x50 [<00000000c228798b>] devm_cxl_pmu_add+0x102/0x110 [cxl_core] [<00000000ca3bfe16>] 0xffffffffa105213b [<00000000ba7f78dc>] local_pci_probe+0x41/0x90 [<000000005bb027ac>] pci_device_probe+0xb0/0x1c0 ... unreferenced object 0xffff8882272af200 (size 256): ... backtrace: [<00000000bedc6f83>] kmalloc_trace+0x26/0x90 [<00000000a14d1813>] device_add+0x4ea/0x890 [<00000000a3f07b47>] devm_cxl_pmu_add+0xbe/0x110 [cxl_core] [<00000000ca3bfe16>] 0xffffffffa105213b [<00000000ba7f78dc>] local_pci_probe+0x41/0x90 [<000000005bb027ac>] pci_device_probe+0xb0/0x1c0 ... devm_cxl_pmu_add() correctly registers a device remove function but it only calls device_del() which is only part of device unregistration. Properly call device_unregister() to free up the memory associated with the device. Fixes: 1ad3f701c399 ("cxl/pci: Find and register CXL PMU devices") Cc: Jonathan Cameron Signed-off-by: Ira Weiny Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231016-pmu-unregister-fix-v1-1-1e2eb2fa3c69@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c index 7684c843e5a5..5d8e06b0ba6e 100644 --- a/drivers/cxl/core/pmu.c +++ b/drivers/cxl/core/pmu.c @@ -23,7 +23,7 @@ const struct device_type cxl_pmu_type = { static void remove_dev(void *dev) { - device_del(dev); + device_unregister(dev); } int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, -- cgit From ad6f04c0269b0b7908f09621d3b3c90def39a297 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:13 -0700 Subject: cxl: Add callback to parse the DSMAS subtables from CDAT Provide a callback function to the CDAT parser in order to parse the Device Scoped Memory Affinity Structure (DSMAS). Each DSMAS structure contains the DPA range and its associated attributes in each entry. See the CDAT specification for details. The device handle and the DPA range is saved and to be associated with the DSLBIS locality data when the DSLBIS entries are parsed. The xarray is a local variable. When the total path performance data is calculated and storred this xarray can be discarded. Coherent Device Attribute Table 1.03 2.1 Device Scoped memory Affinity Structure (DSMAS) Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319619355.2212653.2675953129671561293.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/Kconfig | 3 ++ drivers/cxl/core/Makefile | 1 + drivers/cxl/core/cdat.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ drivers/cxl/port.c | 1 + 5 files changed, 99 insertions(+) create mode 100644 drivers/cxl/core/cdat.c (limited to 'drivers/cxl') diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 8ea1d340e438..67998dbd1d46 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -5,6 +5,7 @@ menuconfig CXL_BUS select FW_LOADER select FW_UPLOAD select PCI_DOE + select FIRMWARE_TABLE help CXL is a bus that is electrically compatible with PCI Express, but layers three protocols on that signalling (CXL.io, CXL.cache, and @@ -54,8 +55,10 @@ config CXL_MEM_RAW_COMMANDS config CXL_ACPI tristate "CXL ACPI: Platform Support" depends on ACPI + depends on ACPI_NUMA default CXL_BUS select ACPI_TABLE_LIB + select ACPI_HMAT help Enable support for host managed device memory (HDM) resources published by a platform's ACPI CXL memory layout description. See diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 1f66b5d4d935..9259bcc6773c 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -13,5 +13,6 @@ cxl_core-y += mbox.o cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o +cxl_core-y += cdat.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c new file mode 100644 index 000000000000..9bf4f53bf77f --- /dev/null +++ b/drivers/cxl/core/cdat.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include "cxlpci.h" +#include "cxl.h" + +struct dsmas_entry { + struct range dpa_range; + u8 handle; +}; + +static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_header *hdr = &header->cdat; + struct acpi_cdat_dsmas *dsmas; + int size = sizeof(*hdr) + sizeof(*dsmas); + struct xarray *dsmas_xa = arg; + struct dsmas_entry *dent; + u16 len; + int rc; + + len = le16_to_cpu((__force __le16)hdr->length); + if (len != size || (unsigned long)hdr + len > end) { + pr_warn("Malformed DSMAS table length: (%u:%u)\n", size, len); + return -EINVAL; + } + + /* Skip common header */ + dsmas = (struct acpi_cdat_dsmas *)(hdr + 1); + + dent = kzalloc(sizeof(*dent), GFP_KERNEL); + if (!dent) + return -ENOMEM; + + dent->handle = dsmas->dsmad_handle; + dent->dpa_range.start = le64_to_cpu((__force __le64)dsmas->dpa_base_address); + dent->dpa_range.end = le64_to_cpu((__force __le64)dsmas->dpa_base_address) + + le64_to_cpu((__force __le64)dsmas->dpa_length) - 1; + + rc = xa_insert(dsmas_xa, dent->handle, dent, GFP_KERNEL); + if (rc) { + kfree(dent); + return rc; + } + + return 0; +} + +static int cxl_cdat_endpoint_process(struct cxl_port *port, + struct xarray *dsmas_xa) +{ + return cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, + dsmas_xa, port->cdat.table); +} + +static void discard_dsmas(struct xarray *xa) +{ + unsigned long index; + void *ent; + + xa_for_each(xa, index, ent) { + xa_erase(xa, index); + kfree(ent); + } + xa_destroy(xa); +} +DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T)) + +void cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + struct xarray __dsmas_xa; + struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa; + int rc; + + xa_init(&__dsmas_xa); + if (!port->cdat.table) + return; + + rc = cxl_cdat_endpoint_process(port, dsmas_xa); + if (rc < 0) { + dev_dbg(&port->dev, "Failed to parse CDAT: %d\n", rc); + return; + } + + /* Performance data processing */ +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); + +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 687043ece101..be3b5eda875c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -839,6 +839,8 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) } #endif +void cxl_endpoint_parse_cdat(struct cxl_port *port); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 47bc8e0b8590..a889c4e6cb27 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -109,6 +109,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); + cxl_endpoint_parse_cdat(port); get_device(&cxlmd->dev); rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd); -- cgit From 63cef81b9dca6ddf1c34d697016f830ddcfadf28 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:20 -0700 Subject: cxl: Add callback to parse the DSLBIS subtable from CDAT Provide a callback to parse the Device Scoped Latency and Bandwidth Information Structure (DSLBIS) in the CDAT structures. The DSLBIS contains the bandwidth and latency information that's tied to a DSMAS handle. The driver will retrieve the read and write latency and bandwidth associated with the DSMAS which is tied to a DPA range. Coherent Device Attribute Table 1.03 2.1 Device Scoped Latency and Bandwidth Information Structure (DSLBIS) Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319620005.2212653.7475488478229720542.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 9bf4f53bf77f..97d8ef8848c6 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -3,12 +3,15 @@ #include #include #include +#include +#include #include "cxlpci.h" #include "cxl.h" struct dsmas_entry { struct range dpa_range; u8 handle; + struct access_coordinate coord; }; static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, @@ -49,11 +52,106 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, return 0; } +static void cxl_access_coordinate_set(struct access_coordinate *coord, + int access, unsigned int val) +{ + switch (access) { + case ACPI_HMAT_ACCESS_LATENCY: + coord->read_latency = val; + coord->write_latency = val; + break; + case ACPI_HMAT_READ_LATENCY: + coord->read_latency = val; + break; + case ACPI_HMAT_WRITE_LATENCY: + coord->write_latency = val; + break; + case ACPI_HMAT_ACCESS_BANDWIDTH: + coord->read_bandwidth = val; + coord->write_bandwidth = val; + break; + case ACPI_HMAT_READ_BANDWIDTH: + coord->read_bandwidth = val; + break; + case ACPI_HMAT_WRITE_BANDWIDTH: + coord->write_bandwidth = val; + break; + } +} + +static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_header *hdr = &header->cdat; + struct acpi_cdat_dslbis *dslbis; + int size = sizeof(*hdr) + sizeof(*dslbis); + struct xarray *dsmas_xa = arg; + struct dsmas_entry *dent; + __le64 le_base; + __le16 le_val; + u64 val; + u16 len; + int rc; + + len = le16_to_cpu((__force __le16)hdr->length); + if (len != size || (unsigned long)hdr + len > end) { + pr_warn("Malformed DSLBIS table length: (%u:%u)\n", size, len); + return -EINVAL; + } + + /* Skip common header */ + dslbis = (struct acpi_cdat_dslbis *)(hdr + 1); + + /* Skip unrecognized data type */ + if (dslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH) + return 0; + + /* Not a memory type, skip */ + if ((dslbis->flags & ACPI_HMAT_MEMORY_HIERARCHY) != ACPI_HMAT_MEMORY) + return 0; + + dent = xa_load(dsmas_xa, dslbis->handle); + if (!dent) { + pr_warn("No matching DSMAS entry for DSLBIS entry.\n"); + return 0; + } + + le_base = (__force __le64)dslbis->entry_base_unit; + le_val = (__force __le16)dslbis->entry[0]; + rc = check_mul_overflow(le64_to_cpu(le_base), + le16_to_cpu(le_val), &val); + if (rc) + pr_warn("DSLBIS value overflowed.\n"); + + cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val); + + return 0; +} + +static int cdat_table_parse_output(int rc) +{ + if (rc < 0) + return rc; + if (rc == 0) + return -ENOENT; + + return 0; +} + static int cxl_cdat_endpoint_process(struct cxl_port *port, struct xarray *dsmas_xa) { - return cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, - dsmas_xa, port->cdat.table); + int rc; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler, + dsmas_xa, port->cdat.table); + rc = cdat_table_parse_output(rc); + if (rc) + return rc; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler, + dsmas_xa, port->cdat.table); + return cdat_table_parse_output(rc); } static void discard_dsmas(struct xarray *xa) -- cgit From 80aa780dda20618be76162bf991d49cf962fda38 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:26 -0700 Subject: cxl: Add callback to parse the SSLBIS subtable from CDAT Provide a callback to parse the Switched Scoped Latency and Bandwidth Information Structure (SSLBIS) in the CDAT structures. The SSLBIS contains the bandwidth and latency information that's tied to the CXL switch that the data table has been read from. The extracted values are stored to the cxl_dport correlated by the port_id depending on the SSLBIS entry. Coherent Device Attribute Table 1.03 2.1 Switched Scoped Latency and Bandwidth Information Structure (DSLBIS) Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319620635.2212653.5194389158785365150.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 4 ++ drivers/cxl/port.c | 2 + 3 files changed, 104 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 97d8ef8848c6..b3ab47d250e1 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -187,4 +187,102 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); +static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cdat_sslbis *sslbis; + int size = sizeof(header->cdat) + sizeof(*sslbis); + struct cxl_port *port = arg; + struct device *dev = &port->dev; + struct acpi_cdat_sslbe *entry; + int remain, entries, i; + u16 len; + + len = le16_to_cpu((__force __le16)header->cdat.length); + remain = len - size; + if (!remain || remain % sizeof(*entry) || + (unsigned long)header + len > end) { + dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len); + return -EINVAL; + } + + /* Skip common header */ + sslbis = (struct acpi_cdat_sslbis *)((unsigned long)header + + sizeof(header->cdat)); + + /* Unrecognized data type, we can skip */ + if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH) + return 0; + + entries = remain / sizeof(*entry); + entry = (struct acpi_cdat_sslbe *)((unsigned long)header + sizeof(*sslbis)); + + for (i = 0; i < entries; i++) { + u16 x = le16_to_cpu((__force __le16)entry->portx_id); + u16 y = le16_to_cpu((__force __le16)entry->porty_id); + __le64 le_base; + __le16 le_val; + struct cxl_dport *dport; + unsigned long index; + u16 dsp_id; + u64 val; + + switch (x) { + case ACPI_CDAT_SSLBIS_US_PORT: + dsp_id = y; + break; + case ACPI_CDAT_SSLBIS_ANY_PORT: + switch (y) { + case ACPI_CDAT_SSLBIS_US_PORT: + dsp_id = x; + break; + case ACPI_CDAT_SSLBIS_ANY_PORT: + dsp_id = ACPI_CDAT_SSLBIS_ANY_PORT; + break; + default: + dsp_id = y; + break; + } + break; + default: + dsp_id = x; + break; + } + + le_base = (__force __le64)sslbis->entry_base_unit; + le_val = (__force __le16)entry->latency_or_bandwidth; + + if (check_mul_overflow(le64_to_cpu(le_base), + le16_to_cpu(le_val), &val)) + dev_warn(dev, "SSLBIS value overflowed!\n"); + + xa_for_each(&port->dports, index, dport) { + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) + cxl_access_coordinate_set(&dport->sw_coord, + sslbis->data_type, + val); + } + + entry++; + } + + return 0; +} + +void cxl_switch_parse_cdat(struct cxl_port *port) +{ + int rc; + + if (!port->cdat.table) + return; + + rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, + port, port->cdat.table); + rc = cdat_table_parse_output(rc); + if (rc) + dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); +} +EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL); + MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index be3b5eda875c..22f664b9f4c6 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /** @@ -634,6 +635,7 @@ struct cxl_rcrb_info { * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks + * @sw_coord: access coordinates (performance) for switch from CDAT */ struct cxl_dport { struct device *dport_dev; @@ -643,6 +645,7 @@ struct cxl_dport { bool rch; struct cxl_port *port; struct cxl_regs regs; + struct access_coordinate sw_coord; }; /** @@ -840,6 +843,7 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_port *port); /* * Unit test builds overrides this to __weak, find the 'strong' version diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index a889c4e6cb27..da3c3a08bd62 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -69,6 +69,8 @@ static int cxl_switch_port_probe(struct cxl_port *port) if (rc < 0) return rc; + cxl_switch_parse_cdat(port); + cxlhdm = devm_cxl_setup_hdm(port, NULL); if (!IS_ERR(cxlhdm)) return devm_cxl_enumerate_decoders(cxlhdm, NULL); -- cgit From 790815902ec61ba1715fd67d3cb9036e13c942bc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:32 -0700 Subject: cxl: Add support for _DSM Function for retrieving QTG ID CXL spec v3.0 9.17.3 CXL Root Device Specific Methods (_DSM) Add support to retrieve QTG ID via ACPI _DSM call. The _DSM call requires an input of an ACPI package with 4 dwords (read latency, write latency, read bandwidth, write bandwidth). The call returns a package with 1 WORD that provides the max supported QTG ID and a package that may contain 0 or more WORDs as the recommended QTG IDs in the recommended order. Create a cxl_root container for the root cxl_port and provide a callback ->get_qos_class() in order to retrieve the QoS class. For the ACPI case, the _DSM helper is used to retrieve the QTG ID and returned. A devm_cxl_add_root() function is added for root port setup and registration of the cxl_root callback operation(s). Signed-off-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/170319621294.2212653.1649682083061569256.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++-- drivers/cxl/core/port.c | 49 ++++++++++++++---- drivers/cxl/cxl.h | 25 +++++++++ 3 files changed, 193 insertions(+), 13 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 2034eb4ce83f..2f7de910ce57 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "cxlpci.h" #include "cxl.h" @@ -17,6 +18,10 @@ struct cxl_cxims_data { u64 xormaps[] __counted_by(nr_maps); }; +static const guid_t acpi_cxl_qtg_id_guid = + GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, + 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); + /* * Find a targets entry (n) in the host bridge interleave list. * CXL Specification 3.0 Table 9-22 @@ -194,6 +199,125 @@ struct cxl_cfmws_context { int id; }; +/** + * cxl_acpi_evaluate_qtg_dsm - Retrieve QTG ids via ACPI _DSM + * @handle: ACPI handle + * @coord: performance access coordinates + * @entries: number of QTG IDs to return + * @qos_class: int array provided by caller to return QTG IDs + * + * Return: number of QTG IDs returned, or -errno for errors + * + * Issue QTG _DSM with accompanied bandwidth and latency data in order to get + * the QTG IDs that are suitable for the performance point in order of most + * suitable to least suitable. Write back array of QTG IDs and return the + * actual number of QTG IDs written back. + */ +static int +cxl_acpi_evaluate_qtg_dsm(acpi_handle handle, struct access_coordinate *coord, + int entries, int *qos_class) +{ + union acpi_object *out_obj, *out_buf, *obj; + union acpi_object in_array[4] = { + [0].integer = { ACPI_TYPE_INTEGER, coord->read_latency }, + [1].integer = { ACPI_TYPE_INTEGER, coord->write_latency }, + [2].integer = { ACPI_TYPE_INTEGER, coord->read_bandwidth }, + [3].integer = { ACPI_TYPE_INTEGER, coord->write_bandwidth }, + }; + union acpi_object in_obj = { + .package = { + .type = ACPI_TYPE_PACKAGE, + .count = 4, + .elements = in_array, + }, + }; + int count, pkg_entries, i; + u16 max_qtg; + int rc; + + if (!entries) + return -EINVAL; + + out_obj = acpi_evaluate_dsm(handle, &acpi_cxl_qtg_id_guid, 1, 1, &in_obj); + if (!out_obj) + return -ENXIO; + + if (out_obj->type != ACPI_TYPE_PACKAGE) { + rc = -ENXIO; + goto out; + } + + /* Check Max QTG ID */ + obj = &out_obj->package.elements[0]; + if (obj->type != ACPI_TYPE_INTEGER) { + rc = -ENXIO; + goto out; + } + + max_qtg = obj->integer.value; + + /* It's legal to have 0 QTG entries */ + pkg_entries = out_obj->package.count; + if (pkg_entries <= 1) { + rc = 0; + goto out; + } + + /* Retrieve QTG IDs package */ + obj = &out_obj->package.elements[1]; + if (obj->type != ACPI_TYPE_PACKAGE) { + rc = -ENXIO; + goto out; + } + + pkg_entries = obj->package.count; + count = min(entries, pkg_entries); + for (i = 0; i < count; i++) { + u16 qtg_id; + + out_buf = &obj->package.elements[i]; + if (out_buf->type != ACPI_TYPE_INTEGER) { + rc = -ENXIO; + goto out; + } + + qtg_id = out_buf->integer.value; + if (qtg_id > max_qtg) + pr_warn("QTG ID %u greater than MAX %u\n", + qtg_id, max_qtg); + + qos_class[i] = qtg_id; + } + rc = count; + +out: + ACPI_FREE(out_obj); + return rc; +} + +static int cxl_acpi_qos_class(struct cxl_port *root_port, + struct access_coordinate *coord, int entries, + int *qos_class) +{ + acpi_handle handle; + struct device *dev; + + dev = root_port->uport_dev; + + if (!dev_is_platform(dev)) + return -ENODEV; + + handle = ACPI_HANDLE(dev); + if (!handle) + return -ENODEV; + + return cxl_acpi_evaluate_qtg_dsm(handle, coord, entries, qos_class); +} + +static const struct cxl_root_ops acpi_root_ops = { + .qos_class = cxl_acpi_qos_class, +}; + static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, const unsigned long end) { @@ -656,6 +780,7 @@ static int cxl_acpi_probe(struct platform_device *pdev) { int rc; struct resource *cxl_res; + struct cxl_root *cxl_root; struct cxl_port *root_port; struct device *host = &pdev->dev; struct acpi_device *adev = ACPI_COMPANION(host); @@ -675,9 +800,10 @@ static int cxl_acpi_probe(struct platform_device *pdev) cxl_res->end = -1; cxl_res->flags = IORESOURCE_MEM; - root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); - if (IS_ERR(root_port)) - return PTR_ERR(root_port); + cxl_root = devm_cxl_add_root(host, &acpi_root_ops); + if (IS_ERR(cxl_root)) + return PTR_ERR(cxl_root); + root_port = &cxl_root->port; rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, add_host_bridge_dport); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b7c93bb18f6e..9393cbf04652 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -541,7 +541,10 @@ static void cxl_port_release(struct device *dev) xa_destroy(&port->dports); xa_destroy(&port->regions); ida_free(&cxl_port_ida, port->id); - kfree(port); + if (is_cxl_root(port)) + kfree(to_cxl_root(port)); + else + kfree(port); } static ssize_t decoders_committed_show(struct device *dev, @@ -669,17 +672,31 @@ static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport_dev, struct cxl_dport *parent_dport) { - struct cxl_port *port; + struct cxl_root *cxl_root __free(kfree) = NULL; + struct cxl_port *port, *_port __free(kfree) = NULL; struct device *dev; int rc; - port = kzalloc(sizeof(*port), GFP_KERNEL); - if (!port) - return ERR_PTR(-ENOMEM); + /* No parent_dport, root cxl_port */ + if (!parent_dport) { + cxl_root = kzalloc(sizeof(*cxl_root), GFP_KERNEL); + if (!cxl_root) + return ERR_PTR(-ENOMEM); + } else { + _port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!_port) + return ERR_PTR(-ENOMEM); + } rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); if (rc < 0) - goto err; + return ERR_PTR(rc); + + if (cxl_root) + port = &no_free_ptr(cxl_root)->port; + else + port = no_free_ptr(_port); + port->id = rc; port->uport_dev = uport_dev; @@ -731,10 +748,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, dev->type = &cxl_port_type; return port; - -err: - kfree(port); - return ERR_PTR(rc); } static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map, @@ -884,6 +897,22 @@ struct cxl_port *devm_cxl_add_port(struct device *host, } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); +struct cxl_root *devm_cxl_add_root(struct device *host, + const struct cxl_root_ops *ops) +{ + struct cxl_root *cxl_root; + struct cxl_port *port; + + port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); + if (IS_ERR(port)) + return (struct cxl_root *)port; + + cxl_root = to_cxl_root(port); + cxl_root->ops = ops; + return cxl_root; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_root, CXL); + struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) { /* There is no pci_bus associated with a CXL platform-root port */ diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 22f664b9f4c6..abbdcd3a7596 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -615,6 +615,29 @@ struct cxl_port { bool cdat_available; }; +struct cxl_root_ops { + int (*qos_class)(struct cxl_port *root_port, + struct access_coordinate *coord, int entries, + int *qos_class); +}; + +/** + * struct cxl_root - logical collection of root cxl_port items + * + * @port: cxl_port member + * @ops: cxl root operations + */ +struct cxl_root { + struct cxl_port port; + const struct cxl_root_ops *ops; +}; + +static inline struct cxl_root * +to_cxl_root(const struct cxl_port *port) +{ + return container_of(port, struct cxl_root, port); +} + static inline struct cxl_dport * cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev) { @@ -703,6 +726,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport_dev, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); +struct cxl_root *devm_cxl_add_root(struct device *host, + const struct cxl_root_ops *ops); struct cxl_port *find_cxl_root(struct cxl_port *port); int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); -- cgit From 4d07a05397c8c15c37c8c3abb7afaea1dcd2f0e7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:39 -0700 Subject: cxl: Calculate and store PCI link latency for the downstream ports The latency is calculated by dividing the flit size over the bandwidth. Add support to retrieve the flit size for the CXL switch device and calculate the latency of the PCIe link. Cache the latency number with cxl_dport. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319621931.2212653.6800240203604822886.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 2 ++ drivers/cxl/core/pci.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c | 6 ++++++ drivers/cxl/cxl.h | 4 ++++ drivers/cxl/cxlpci.h | 13 +++++++++++++ 5 files changed, 61 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 86d7ba23235e..3b64fb1b9ed0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -88,4 +88,6 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +long cxl_pci_get_latency(struct pci_dev *pdev); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 37e1652afbc7..6c9c8d92f8f7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -979,3 +980,38 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL); + +static int cxl_flit_size(struct pci_dev *pdev) +{ + if (cxl_pci_flit_256(pdev)) + return 256; + + return 68; +} + +/** + * cxl_pci_get_latency - calculate the link latency for the PCIe link + * @pdev: PCI device + * + * return: calculated latency or 0 for no latency + * + * CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation + * Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency + * LinkProgationLatency is negligible, so 0 will be used + * RetimerLatency is assumed to be negligible and 0 will be used + * FlitLatency = FlitSize / LinkBandwidth + * FlitSize is defined by spec. CXL rev3.0 4.2.1. + * 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used. + * The FlitLatency is converted to picoseconds. + */ +long cxl_pci_get_latency(struct pci_dev *pdev) +{ + long bw; + + bw = pcie_link_speed_mbps(pdev); + if (bw < 0) + return 0; + bw /= BITS_PER_BYTE; + + return cxl_flit_size(pdev) * MEGA / bw; +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 9393cbf04652..b5ad227fe0d8 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -854,6 +854,9 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, if (rc) return ERR_PTR(rc); + if (parent_dport && dev_is_pci(uport_dev)) + port->pci_latency = cxl_pci_get_latency(to_pci_dev(uport_dev)); + return port; err: @@ -1137,6 +1140,9 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + if (dev_is_pci(dport_dev)) + dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev)); + return dport; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index abbdcd3a7596..7da8db919a20 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -591,6 +591,7 @@ struct cxl_dax_region { * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs + * @pci_latency: Upstream latency in picoseconds */ struct cxl_port { struct device dev; @@ -613,6 +614,7 @@ struct cxl_port { size_t length; } cdat; bool cdat_available; + long pci_latency; }; struct cxl_root_ops { @@ -659,6 +661,7 @@ struct cxl_rcrb_info { * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks * @sw_coord: access coordinates (performance) for switch from CDAT + * @link_latency: calculated PCIe downstream latency */ struct cxl_dport { struct device *dport_dev; @@ -669,6 +672,7 @@ struct cxl_dport { struct cxl_port *port; struct cxl_regs regs; struct access_coordinate sw_coord; + long link_latency; }; /** diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 0fa4799ea316..711b05d9a370 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -85,6 +85,19 @@ struct cdat_entry_header { __le16 length; } __packed; +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static inline bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u16 lnksta2; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, -- cgit From 1037b82fccfe9c001ffa7a883651bb4cde7b705c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:51 -0700 Subject: cxl: Store the access coordinates for the generic ports Each CXL host bridge is represented by an ACPI0016 device. A generic port device handle that is an ACPI device is represented by a string of ACPI0016 device HID and UID. Create a device handle from the ACPI device and retrieve the access coordinates from the stored memory targets. The access coordinates are stored under the cxl_dport that is associated with the CXL host bridge. The access coordinates struct is dynamically allocated under cxl_dport in order for code later on to detect whether the data exists or not. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319623196.2212653.17916695743464172534.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 25 +++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ 2 files changed, 27 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 2f7de910ce57..afc712264d1c 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -513,8 +513,29 @@ static int cxl_get_chbs(struct device *dev, struct acpi_device *hb, return 0; } +static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport) +{ + struct acpi_device *hb = to_cxl_host_bridge(NULL, dev); + u32 uid; + int rc; + + if (kstrtou32(acpi_device_uid(hb), 0, &uid)) + return -EINVAL; + + rc = acpi_get_genport_coordinates(uid, &dport->hb_coord); + if (rc < 0) + return rc; + + /* Adjust back to picoseconds from nanoseconds */ + dport->hb_coord.read_latency *= 1000; + dport->hb_coord.write_latency *= 1000; + + return 0; +} + static int add_host_bridge_dport(struct device *match, void *arg) { + int ret; acpi_status rc; struct device *bridge; struct cxl_dport *dport; @@ -564,6 +585,10 @@ static int add_host_bridge_dport(struct device *match, void *arg) if (IS_ERR(dport)) return PTR_ERR(dport); + ret = get_genport_coordinates(match, dport); + if (ret) + dev_dbg(match, "Failed to get generic port perf coordinates.\n"); + return 0; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 7da8db919a20..dd234f3b9ed4 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -661,6 +661,7 @@ struct cxl_rcrb_info { * @port: reference to cxl_port that contains this downstream port * @regs: Dport parsed register blocks * @sw_coord: access coordinates (performance) for switch from CDAT + * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge) * @link_latency: calculated PCIe downstream latency */ struct cxl_dport { @@ -672,6 +673,7 @@ struct cxl_dport { struct cxl_port *port; struct cxl_regs regs; struct access_coordinate sw_coord; + struct access_coordinate hb_coord; long link_latency; }; -- cgit From 14a6960b3e928ccea22d687fb0626237885a20bd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:03:58 -0700 Subject: cxl: Add helper function that calculate performance data for downstream ports The CDAT information from the switch, Switch Scoped Latency and Bandwidth Information Structure (SSLBIS), is parsed and stored under a cxl_dport based on the correlated downstream port id from the SSLBIS entry. Walk the entire CXL port paths and collect all the performance data. Also pick up the link latency number that's stored under the dports. The entire path PCIe bandwidth can be retrieved using the pcie_bandwidth_available() call. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319623824.2212653.10302079766473698427.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 3 ++ 2 files changed, 78 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b5ad227fe0d8..8c00fd6be730 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2094,6 +2095,80 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); +static void combine_coordinates(struct access_coordinate *c1, + struct access_coordinate *c2) +{ + if (c2->write_bandwidth) + c1->write_bandwidth = min(c1->write_bandwidth, + c2->write_bandwidth); + c1->write_latency += c2->write_latency; + + if (c2->read_bandwidth) + c1->read_bandwidth = min(c1->read_bandwidth, + c2->read_bandwidth); + c1->read_latency += c2->read_latency; +} + +/** + * cxl_endpoint_get_perf_coordinates - Retrieve performance numbers stored in dports + * of CXL path + * @port: endpoint cxl_port + * @coord: output performance data + * + * Return: errno on failure, 0 on success. + */ +int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord) +{ + struct access_coordinate c = { + .read_bandwidth = UINT_MAX, + .write_bandwidth = UINT_MAX, + }; + struct cxl_port *iter = port; + struct cxl_dport *dport; + struct pci_dev *pdev; + unsigned int bw; + + if (!is_cxl_endpoint(port)) + return -EINVAL; + + dport = iter->parent_dport; + + /* + * Exit the loop when the parent port of the current port is cxl root. + * The iterative loop starts at the endpoint and gathers the + * latency of the CXL link from the current iter to the next downstream + * port each iteration. If the parent is cxl root then there is + * nothing to gather. + */ + while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { + combine_coordinates(&c, &dport->sw_coord); + c.write_latency += dport->link_latency; + c.read_latency += dport->link_latency; + + iter = to_cxl_port(iter->dev.parent); + dport = iter->parent_dport; + } + + /* Augment with the generic port (host bridge) perf data */ + combine_coordinates(&c, &dport->hb_coord); + + /* Get the calculated PCI paths bandwidth */ + pdev = to_pci_dev(port->uport_dev->parent); + bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); + if (bw == 0) + return -ENXIO; + bw /= BITS_PER_BYTE; + + c.write_bandwidth = min(c.write_bandwidth, bw); + c.read_bandwidth = min(c.read_bandwidth, bw); + + *coord = c; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_get_perf_coordinates, CXL); + /* for user tooling to ensure port disable work has completed */ static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index dd234f3b9ed4..492dbf63935f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -876,6 +876,9 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) void cxl_endpoint_parse_cdat(struct cxl_port *port); void cxl_switch_parse_cdat(struct cxl_port *port); +int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. -- cgit From 7a4f148dd8d518bc1e012aa738b0ed6244959293 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:04 -0700 Subject: cxl: Compute the entire CXL path latency and bandwidth data CXL Memory Device SW Guide [1] rev1.0 2.11.2 provides instruction on how to calculate latency and bandwidth for CXL memory device. Calculate minimum bandwidth and total latency for the path from the CXL device to the root port. The QTG id is retrieved by providing the performance data as input and calling the root port callback ->get_qos_class(). The retrieved id is stored with the cxl_port of the CXL device. For example for a device that is directly attached to a host bus: Total Latency = Device Latency (from CDAT) + Dev to Host Bus (HB) Link Latency + Generic Port Latency Min Bandwidth = Min bandwidth for link bandwidth between HB and CXL device, device CDAT bandwidth, and Generic Port Bandwidth For a device that has a switch in between host bus and CXL device: Total Latency = Device (CDAT) Latency + Dev to Switch Link Latency + Switch (CDAT) Latency + Switch to HB Link Latency + Generic Port Latency Min Bandwidth = Min bandwidth for link bandwidth between CXL device to CXL switch, CXL device CDAT bandwidth, CXL switch CDAT bandwidth, CXL switch to HB bandwidth, and Generic Port Bandwidth. [1]: https://cdrdv2-public.intel.com/643805/643805_CXL%20Memory%20Device%20SW%20Guide_Rev1p0.pdf Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319624458.2212653.13252496567443656371.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index b3ab47d250e1..43dfef80fb84 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -12,6 +12,9 @@ struct dsmas_entry { struct range dpa_range; u8 handle; struct access_coordinate coord; + + int entries; + int qos_class; }; static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg, @@ -154,6 +157,55 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port, return cdat_table_parse_output(rc); } +static int cxl_port_perf_data_calculate(struct cxl_port *port, + struct xarray *dsmas_xa) +{ + struct access_coordinate c; + struct cxl_port *root_port; + struct cxl_root *cxl_root; + struct dsmas_entry *dent; + int valid_entries = 0; + unsigned long index; + int rc; + + rc = cxl_endpoint_get_perf_coordinates(port, &c); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n"); + return rc; + } + + root_port = find_cxl_root(port); + cxl_root = to_cxl_root(root_port); + if (!cxl_root->ops || !cxl_root->ops->qos_class) + return -EOPNOTSUPP; + + xa_for_each(dsmas_xa, index, dent) { + int qos_class; + + dent->coord.read_latency = dent->coord.read_latency + + c.read_latency; + dent->coord.write_latency = dent->coord.write_latency + + c.write_latency; + dent->coord.read_bandwidth = min_t(int, c.read_bandwidth, + dent->coord.read_bandwidth); + dent->coord.write_bandwidth = min_t(int, c.write_bandwidth, + dent->coord.write_bandwidth); + + dent->entries = 1; + rc = cxl_root->ops->qos_class(root_port, &dent->coord, 1, &qos_class); + if (rc != 1) + continue; + + valid_entries++; + dent->qos_class = qos_class; + } + + if (!valid_entries) + return -ENOENT; + + return 0; +} + static void discard_dsmas(struct xarray *xa) { unsigned long index; @@ -183,7 +235,12 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) return; } - /* Performance data processing */ + rc = cxl_port_perf_data_calculate(port, dsmas_xa); + if (rc) { + dev_dbg(&port->dev, "Failed to do perf coord calculations.\n"); + return; + } + } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); -- cgit From 86557b7edf77d2a3835136c325c8baa6fe803234 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:11 -0700 Subject: cxl: Store QTG IDs and related info to the CXL memory device context Once the QTG ID _DSM is executed successfully, the QTG ID is retrieved from the return package. Create a list of entries in the cxl_memdev context and store the QTG ID as qos_class token and the associated DPA range. This information can be exposed to user space via sysfs in order to help region setup for hot-plugged CXL memory devices. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319625109.2212653.11872111896220384056.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/mbox.c | 2 ++ drivers/cxl/cxlmem.h | 21 +++++++++++++++ 3 files changed, 92 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 43dfef80fb84..6189d967f399 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -6,6 +6,7 @@ #include #include #include "cxlpci.h" +#include "cxlmem.h" #include "cxl.h" struct dsmas_entry { @@ -206,6 +207,71 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, return 0; } +static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, + struct list_head *list) +{ + struct cxl_dpa_perf *dpa_perf; + + dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL); + if (!dpa_perf) + return; + + dpa_perf->dpa_range = dent->dpa_range; + dpa_perf->coord = dent->coord; + dpa_perf->qos_class = dent->qos_class; + list_add_tail(&dpa_perf->list, list); + dev_dbg(dev, + "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", + dent->dpa_range.start, dpa_perf->qos_class, + dent->coord.read_bandwidth, dent->coord.write_bandwidth, + dent->coord.read_latency, dent->coord.write_latency); +} + +static void free_perf_ents(void *data) +{ + struct cxl_memdev_state *mds = data; + struct cxl_dpa_perf *dpa_perf, *n; + LIST_HEAD(discard); + + list_splice_tail_init(&mds->ram_perf_list, &discard); + list_splice_tail_init(&mds->pmem_perf_list, &discard); + list_for_each_entry_safe(dpa_perf, n, &discard, list) { + list_del(&dpa_perf->list); + kfree(dpa_perf); + } +} + +static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, + struct xarray *dsmas_xa) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct device *dev = cxlds->dev; + struct range pmem_range = { + .start = cxlds->pmem_res.start, + .end = cxlds->pmem_res.end, + }; + struct range ram_range = { + .start = cxlds->ram_res.start, + .end = cxlds->ram_res.end, + }; + struct dsmas_entry *dent; + unsigned long index; + + xa_for_each(dsmas_xa, index, dent) { + if (resource_size(&cxlds->ram_res) && + range_contains(&ram_range, &dent->dpa_range)) + add_perf_entry(dev, dent, &mds->ram_perf_list); + else if (resource_size(&cxlds->pmem_res) && + range_contains(&pmem_range, &dent->dpa_range)) + add_perf_entry(dev, dent, &mds->pmem_perf_list); + else + dev_dbg(dev, "no partition for dsmas dpa: %#llx\n", + dent->dpa_range.start); + } + + devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); +} + static void discard_dsmas(struct xarray *xa) { unsigned long index; @@ -221,6 +287,8 @@ DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T)) void cxl_endpoint_parse_cdat(struct cxl_port *port) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct xarray __dsmas_xa; struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa; int rc; @@ -241,6 +309,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) return; } + cxl_memdev_set_qos_class(cxlds, dsmas_xa); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 36270dcfb42e..fbaa508ab245 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1404,6 +1404,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; + INIT_LIST_HEAD(&mds->ram_perf_list); + INIT_LIST_HEAD(&mds->pmem_perf_list); return mds; } diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index a2fcbca253f3..205bc2a016b2 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -391,6 +392,20 @@ enum cxl_devtype { CXL_DEVTYPE_CLASSMEM, }; +/** + * struct cxl_dpa_perf - DPA performance property entry + * @list - list entry + * @dpa_range - range for DPA address + * @coord - QoS performance data (i.e. latency, bandwidth) + * @qos_class - QoS Class cookies + */ +struct cxl_dpa_perf { + struct list_head list; + struct range dpa_range; + struct access_coordinate coord; + int qos_class; +}; + /** * struct cxl_dev_state - The driver device state * @@ -455,6 +470,8 @@ struct cxl_dev_state { * @security: security driver state info * @fw: firmware upload / activation state * @mbox_send: @dev specific transport for transmitting mailbox commands + * @ram_perf_list: performance data entries matched to RAM + * @pmem_perf_list: performance data entries matched to PMEM * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -475,6 +492,10 @@ struct cxl_memdev_state { u64 active_persistent_bytes; u64 next_volatile_bytes; u64 next_persistent_bytes; + + struct list_head ram_perf_list; + struct list_head pmem_perf_list; + struct cxl_event_state event; struct cxl_poison_state poison; struct cxl_security_state security; -- cgit From 42834b17cf1f00fa79ff1f02134f9c576a125252 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:16 -0700 Subject: cxl: Export sysfs attributes for memory device QoS class Export qos_class sysfs attributes for the CXL memory device. The QoS clas should show up as /sys/bus/cxl/devices/memX/ram/qos_class for the volatile partition and /sys/bus/cxl/devices/memX/pmem/qos_class for the persistent partition. The QTG ID is retrieved via _DSM after supplying the calculated bandwidth and latency for the entire CXL path from device to the CPU. This ID is used to match up to the root decoder QoS class to determine which CFMWS the memory range of a hotplugged CXL mem device should be assigned under. While there may be multiple DSMAS exported by the device CDAT, the driver will only expose the first QTG ID per partition in sysfs for now. In the future when multiple QTG IDs are necessary, they can be exposed. [1] [1]: https://lore.kernel.org/linux-cxl/167571650007.587790.10040913293130712882.stgit@djiang5-mobl3.local/T/#md2a47b1ead3e1ba08f50eab29a4af1aed1d215ab Suggested-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319625698.2212653.17544381274847420961.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 6 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index e087febf9af0..c5c9d8e0d88d 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -215,23 +215,78 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); +static ssize_t ram_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *dpa_perf; + + if (!dev->driver) + return -ENOENT; + + if (list_empty(&mds->ram_perf_list)) + return -ENOENT; + + dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf, + list); + + return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); +} + +static struct device_attribute dev_attr_ram_qos_class = + __ATTR(qos_class, 0444, ram_qos_class_show, NULL); + +static ssize_t pmem_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_dpa_perf *dpa_perf; + + if (!dev->driver) + return -ENOENT; + + if (list_empty(&mds->pmem_perf_list)) + return -ENOENT; + + dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf, + list); + + return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); +} + +static struct device_attribute dev_attr_pmem_qos_class = + __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); + static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) { - if (a == &dev_attr_trigger_poison_list.attr) { - struct device *dev = kobj_to_dev(kobj); - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = - to_cxl_memdev_state(cxlmd->cxlds); + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + if (a == &dev_attr_trigger_poison_list.attr) if (!test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) return 0; - } + + if (a == &dev_attr_pmem_qos_class.attr) + if (list_empty(&mds->pmem_perf_list)) + return 0; + + if (a == &dev_attr_ram_qos_class.attr) + if (list_empty(&mds->ram_perf_list)) + return 0; + return a->mode; } static struct attribute *cxl_mem_attrs[] = { &dev_attr_trigger_poison_list.attr, + &dev_attr_ram_qos_class.attr, + &dev_attr_pmem_qos_class.attr, NULL }; -- cgit From 185c1a489f873cb71520fc089401e02dbf302dcd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 21 Dec 2023 15:04:23 -0700 Subject: cxl: Check qos_class validity on memdev probe Add a check to make sure the qos_class for the device will match one of the root decoders qos_class. If no match is found, then the qos_class for the device is set to invalid. Also add a check to ensure that the device's host bridge matches to one of the root decoder's downstream targets. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/170319626313.2212653.9021004640856081917.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 6189d967f399..cd84d87f597a 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -272,6 +272,108 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); } +static int match_cxlrd_qos_class(struct device *dev, void *data) +{ + int dev_qos_class = *(int *)data; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + if (cxlrd->qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + if (cxlrd->qos_class == dev_qos_class) + return 1; + + return 0; +} + +static void cxl_qos_match(struct cxl_port *root_port, + struct list_head *work_list, + struct list_head *discard_list) +{ + struct cxl_dpa_perf *dpa_perf, *n; + + list_for_each_entry_safe(dpa_perf, n, work_list, list) { + int rc; + + if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) + return; + + rc = device_for_each_child(&root_port->dev, + (void *)&dpa_perf->qos_class, + match_cxlrd_qos_class); + if (!rc) + list_move_tail(&dpa_perf->list, discard_list); + } +} + +static int match_cxlrd_hb(struct device *dev, void *data) +{ + struct device *host_bridge = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + unsigned int seq; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + + do { + seq = read_seqbegin(&cxlsd->target_lock); + for (int i = 0; i < cxlsd->nr_targets; i++) { + if (host_bridge == cxlsd->target[i]->dport_dev) + return 1; + } + } while (read_seqretry(&cxlsd->target_lock, seq)); + + return 0; +} + +static void discard_dpa_perf(struct list_head *list) +{ + struct cxl_dpa_perf *dpa_perf, *n; + + list_for_each_entry_safe(dpa_perf, n, list, list) { + list_del(&dpa_perf->list); + kfree(dpa_perf); + } +} +DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T)) + +static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct cxl_port *root_port __free(put_device) = NULL; + LIST_HEAD(__discard); + struct list_head *discard __free(dpa_perf) = &__discard; + int rc; + + root_port = find_cxl_root(cxlmd->endpoint); + if (!root_port) + return -ENODEV; + + /* Check that the QTG IDs are all sane between end device and root decoders */ + cxl_qos_match(root_port, &mds->ram_perf_list, discard); + cxl_qos_match(root_port, &mds->pmem_perf_list, discard); + + /* Check to make sure that the device's host bridge is under a root decoder */ + rc = device_for_each_child(&root_port->dev, + (void *)cxlmd->endpoint->host_bridge, + match_cxlrd_hb); + if (!rc) { + list_splice_tail_init(&mds->ram_perf_list, discard); + list_splice_tail_init(&mds->pmem_perf_list, discard); + } + + return rc; +} + static void discard_dsmas(struct xarray *xa) { unsigned long index; @@ -310,6 +412,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) } cxl_memdev_set_qos_class(cxlds, dsmas_xa); + cxl_qos_class_verify(cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); -- cgit