From 2345df54249c6fb7779e2a72b427ee79ed3eaad5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 17:29:09 -0800 Subject: cxl/memdev: Fix endpoint port removal Testing of ram region support [1], stimulates a long standing bug in cxl_detach_ep() where some cxl_ep_remove() cleanup is skipped due to inability to walk ports after dports have been unregistered. That results in a failure to re-register a memdev after the port is re-enabled leading to a crash like the following: cxl_port_setup_targets: cxl region4: cxl_host_bridge.0:port4 iw: 1 ig: 256 general protection fault, ... [..] RIP: 0010:cxl_region_setup_targets+0x897/0x9e0 [cxl_core] dev_name at include/linux/device.h:700 (inlined by) cxl_port_setup_targets at drivers/cxl/core/region.c:1155 (inlined by) cxl_region_setup_targets at drivers/cxl/core/region.c:1249 [..] Call Trace: attach_target+0x39a/0x760 [cxl_core] ? __mutex_unlock_slowpath+0x3a/0x290 cxl_add_to_region+0xb8/0x340 [cxl_core] ? lockdep_hardirqs_on+0x7d/0x100 discover_region+0x4b/0x80 [cxl_port] ? __pfx_discover_region+0x10/0x10 [cxl_port] device_for_each_child+0x58/0x90 cxl_port_probe+0x10e/0x130 [cxl_port] cxl_bus_probe+0x17/0x50 [cxl_core] Change the port ancestry walk to be by depth rather than by dport. This ensures that even if a port has unregistered its dports a deferred memdev cleanup will still be able to cleanup the memdev's interest in that port. The parent_port->dev.driver check is only needed for determining if the bottom up removal beat the top-down removal, but cxl_ep_remove() can always proceed given the port is pinned. That is, the two sources of cxl_ep_remove() are in cxl_detach_ep() and cxl_port_release(), and cxl_port_release() can not run if cxl_detach_ep() holds a reference. Fixes: 2703c16c75ae ("cxl/core/port: Add switch port enumeration") Link: http://lore.kernel.org/r/167564534874.847146.5222419648551436750.stgit@dwillia2-xfh.jf.intel.com [1] Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/167601992789.1924368.8083994227892600608.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 1 + drivers/cxl/core/port.c | 58 ++++++++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 26 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index a74a93310d26..3a8bc2b06047 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -246,6 +246,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, if (rc < 0) goto err; cxlmd->id = rc; + cxlmd->depth = -1; dev = &cxlmd->dev; device_initialize(dev); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 410c036c09fa..317bcf4dbd9d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1207,6 +1207,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) get_device(&endpoint->dev); dev_set_drvdata(dev, endpoint); + cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); @@ -1241,50 +1242,55 @@ static void reap_dports(struct cxl_port *port) } } +struct detach_ctx { + struct cxl_memdev *cxlmd; + int depth; +}; + +static int port_has_memdev(struct device *dev, const void *data) +{ + const struct detach_ctx *ctx = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + if (port->depth != ctx->depth) + return 0; + + return !!cxl_ep_load(port, ctx->cxlmd); +} + static void cxl_detach_ep(void *data) { struct cxl_memdev *cxlmd = data; - struct device *iter; - for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) { - struct device *dport_dev = grandparent(iter); + for (int i = cxlmd->depth - 1; i >= 1; i--) { struct cxl_port *port, *parent_port; + struct detach_ctx ctx = { + .cxlmd = cxlmd, + .depth = i, + }; + struct device *dev; struct cxl_ep *ep; bool died = false; - if (!dport_dev) - break; - - port = find_cxl_port(dport_dev, NULL); - if (!port) - continue; - - if (is_cxl_root(port)) { - put_device(&port->dev); + dev = bus_find_device(&cxl_bus_type, NULL, &ctx, + port_has_memdev); + if (!dev) continue; - } + port = to_cxl_port(dev); parent_port = to_cxl_port(port->dev.parent); device_lock(&parent_port->dev); - if (!parent_port->dev.driver) { - /* - * The bottom-up race to delete the port lost to a - * top-down port disable, give up here, because the - * parent_port ->remove() will have cleaned up all - * descendants. - */ - device_unlock(&parent_port->dev); - put_device(&port->dev); - continue; - } - device_lock(&port->dev); ep = cxl_ep_load(port, cxlmd); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); cxl_ep_remove(port, ep); if (ep && !port->dead && xa_empty(&port->endpoints) && - !is_cxl_root(parent_port)) { + !is_cxl_root(parent_port) && parent_port->dev.driver) { /* * This was the last ep attached to a dynamically * enumerated port. Block new cxl_add_ep() and garbage -- cgit From 7d505f982f53189da819581ad4cc13c8cef76803 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:39 -0800 Subject: cxl/region: Add a mode attribute for regions In preparation for a new region type, "ram" regions, add a mode attribute to clarify the mode of the decoders that can be added to a region. Share the internals of mode_show() (for decoders) with the region case. Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601993930.1924368.4305018565539515665.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 12 +----------- drivers/cxl/core/region.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 317bcf4dbd9d..1e541956f605 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -180,17 +180,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - switch (cxled->mode) { - case CXL_DECODER_RAM: - return sysfs_emit(buf, "ram\n"); - case CXL_DECODER_PMEM: - return sysfs_emit(buf, "pmem\n"); - case CXL_DECODER_NONE: - return sysfs_emit(buf, "none\n"); - case CXL_DECODER_MIXED: - default: - return sysfs_emit(buf, "mixed\n"); - } + return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 60828d01972a..17d2d0c12725 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -458,6 +458,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(resource); +static ssize_t mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + + return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); +} +static DEVICE_ATTR_RO(mode); + static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); @@ -585,6 +594,7 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_interleave_granularity.attr, &dev_attr_resource.attr, &dev_attr_size.attr, + &dev_attr_mode.attr, NULL, }; -- cgit From a8e7d558f7e9c2921de8b487baa01d23f068aa93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:45 -0800 Subject: cxl/region: Support empty uuids for non-pmem regions Shipping versions of the cxl-cli utility expect all regions to have a 'uuid' attribute. In preparation for 'ram' regions, update the 'uuid' attribute to return an empty string which satisfies the current expectations of 'cxl list -R'. Otherwise, 'cxl list -R' fails in the presence of regions with the 'uuid' attribute missing. Force the attribute to be read-only as there is no facility or expectation for a 'ram' region to recall its uuid from one boot to the next. Reviewed-by: Vishal Verma Tested-by: Fan Ni Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601994558.1924368.12612811533724694444.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 17d2d0c12725..0fc80478ff6b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -45,7 +45,10 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; - rc = sysfs_emit(buf, "%pUb\n", &p->uuid); + if (cxlr->mode != CXL_DECODER_PMEM) + rc = sysfs_emit(buf, "\n"); + else + rc = sysfs_emit(buf, "%pUb\n", &p->uuid); up_read(&cxl_region_rwsem); return rc; @@ -300,8 +303,12 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, struct device *dev = kobj_to_dev(kobj); struct cxl_region *cxlr = to_cxl_region(dev); + /* + * Support tooling that expects to find a 'uuid' attribute for all + * regions regardless of mode. + */ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) - return 0; + return 0444; return a->mode; } -- cgit From 1b9b7a6fd618239db47a83da39dff9e725a5865a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:51 -0800 Subject: cxl/region: Validate region mode vs decoder mode In preparation for a new region mode, do not, for example, allow 'ram' decoders to be assigned to 'pmem' regions and vice versa. Reviewed-by: Vishal Verma Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601995111.1924368.7459128614177994602.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 0fc80478ff6b..285835145e9b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1221,6 +1221,12 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_dport *dport; int i, rc = -ENXIO; + if (cxled->mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", + dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); + return -EINVAL; + } + if (cxled->mode == CXL_DECODER_DEAD) { dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); return -ENODEV; -- cgit From 6e099264185d05f50400ea494f5029264a4fe995 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:05:57 -0800 Subject: cxl/region: Add volatile region creation support Expand the region creation infrastructure to enable 'ram' (volatile-memory) regions. The internals of create_pmem_region_store() and create_pmem_region_show() are factored out into helpers __create_region() and __create_region_show() for the 'ram' case to reuse. Reviewed-by: Vishal Verma Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601995775.1924368.352616146815830591.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 1 + drivers/cxl/core/port.c | 14 +++++++++- drivers/cxl/core/region.c | 71 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 72 insertions(+), 14 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 8c04672dca56..5eb873da5a30 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -11,6 +11,7 @@ extern struct attribute_group cxl_base_attribute_group; #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; +extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 1e541956f605..9e5df64ea6b5 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -294,6 +294,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_type3.attr, &dev_attr_target_list.attr, SET_CXL_REGION_ATTR(create_pmem_region) + SET_CXL_REGION_ATTR(create_ram_region) SET_CXL_REGION_ATTR(delete_region) NULL, }; @@ -305,6 +306,13 @@ static bool can_create_pmem(struct cxl_root_decoder *cxlrd) return (cxlrd->cxlsd.cxld.flags & flags) == flags; } +static bool can_create_ram(struct cxl_root_decoder *cxlrd) +{ + unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_RAM; + + return (cxlrd->cxlsd.cxld.flags & flags) == flags; +} + static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); @@ -313,7 +321,11 @@ static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute * if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd)) return 0; - if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd)) + if (a == CXL_REGION_ATTR(create_ram_region) && !can_create_ram(cxlrd)) + return 0; + + if (a == CXL_REGION_ATTR(delete_region) && + !(can_create_pmem(cxlrd) || can_create_ram(cxlrd))) return 0; return a->mode; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 285835145e9b..e440db8611a4 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1689,6 +1689,15 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, struct device *dev; int rc; + switch (mode) { + case CXL_DECODER_RAM: + case CXL_DECODER_PMEM: + break; + default: + dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); + return ERR_PTR(-EINVAL); + } + cxlr = cxl_region_alloc(cxlrd, id); if (IS_ERR(cxlr)) return cxlr; @@ -1717,12 +1726,38 @@ err: return ERR_PTR(rc); } +static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) +{ + return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); +} + static ssize_t create_pmem_region_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + return __create_region_show(to_cxl_root_decoder(dev), buf); +} - return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); +static ssize_t create_ram_region_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return __create_region_show(to_cxl_root_decoder(dev), buf); +} + +static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, + enum cxl_decoder_mode mode, int id) +{ + int rc; + + rc = memregion_alloc(GFP_KERNEL); + if (rc < 0) + return ERR_PTR(rc); + + if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) { + memregion_free(rc); + return ERR_PTR(-EBUSY); + } + + return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER); } static ssize_t create_pmem_region_store(struct device *dev, @@ -1731,29 +1766,39 @@ static ssize_t create_pmem_region_store(struct device *dev, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; - int id, rc; + int rc, id; rc = sscanf(buf, "region%d\n", &id); if (rc != 1) return -EINVAL; - rc = memregion_alloc(GFP_KERNEL); - if (rc < 0) - return rc; + cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id); + if (IS_ERR(cxlr)) + return PTR_ERR(cxlr); - if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) { - memregion_free(rc); - return -EBUSY; - } + return len; +} +DEVICE_ATTR_RW(create_pmem_region); + +static ssize_t create_ram_region_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + struct cxl_region *cxlr; + int rc, id; - cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM, - CXL_DECODER_EXPANDER); + rc = sscanf(buf, "region%d\n", &id); + if (rc != 1) + return -EINVAL; + + cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); return len; } -DEVICE_ATTR_RW(create_pmem_region); +DEVICE_ATTR_RW(create_ram_region); static ssize_t region_show(struct device *dev, struct device_attribute *attr, char *buf) -- cgit From 3528b1e101a40ff75c8130e0b94e9555d2976f45 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:04 -0800 Subject: cxl/region: Refactor attach_target() for autodiscovery Region autodiscovery is the process of kernel creating 'struct cxl_region' object to represent active CXL memory ranges it finds already active in hardware when the driver loads. Typically this happens when platform firmware establishes CXL memory regions and then publishes them in the memory map. However, this can also happen in the case of kexec-reboot after the kernel has created regions. In the autodiscovery case the region creation process starts with a known endpoint decoder. Refactor attach_target() into a helper that is suitable to be called from either sysfs, for runtime region creation, or from cxl_port_probe() after it has enumerated all endpoint decoders. The cxl_port_probe() context is an async device-core probing context, so it is not appropriate to allow SIGTERM to interrupt the assembly process. Refactor attach_target() to take @cxled and @state as arguments where @state indicates whether waiting from the region rwsem is interruptible or not. No behavior change is intended. Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601996393.1924368.2202255054618600069.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 47 ++++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e440db8611a4..040bbd39c81d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1422,31 +1422,25 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) up_write(&cxl_region_rwsem); } -static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos) +static int attach_target(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + unsigned int state) { - struct device *dev; - int rc; - - dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder); - if (!dev) - return -ENODEV; - - if (!is_endpoint_decoder(dev)) { - put_device(dev); - return -EINVAL; - } + int rc = 0; - rc = down_write_killable(&cxl_region_rwsem); + if (state == TASK_INTERRUPTIBLE) + rc = down_write_killable(&cxl_region_rwsem); + else + down_write(&cxl_region_rwsem); if (rc) - goto out; + return rc; + down_read(&cxl_dpa_rwsem); - rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos); + rc = cxl_region_attach(cxlr, cxled, pos); if (rc == 0) set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); up_read(&cxl_dpa_rwsem); up_write(&cxl_region_rwsem); -out: - put_device(dev); return rc; } @@ -1484,8 +1478,23 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, if (sysfs_streq(buf, "\n")) rc = detach_target(cxlr, pos); - else - rc = attach_target(cxlr, buf, pos); + else { + struct device *dev; + + dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf); + if (!dev) + return -ENODEV; + + if (!is_endpoint_decoder(dev)) { + rc = -EINVAL; + goto out; + } + + rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos, + TASK_INTERRUPTIBLE); +out: + put_device(dev); + } if (rc < 0) return rc; -- cgit From 86987c766276acf1289700cd38bd6d5b5a167fea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:09 -0800 Subject: cxl/region: Cleanup target list on attach error Jonathan noticed that the target list setup is not unwound completely upon error. Undo all the setup in the 'err_decrement:' exit path. Fixes: 27b3f8d13830 ("cxl/region: Program target lists") Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20230208123031.00006990@Huawei.com Reviewed-by: Ira Weiny Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601996980.1924368.390423634911157277.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 040bbd39c81d..ae7d3adcd41a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1347,6 +1347,8 @@ static int cxl_region_attach(struct cxl_region *cxlr, err_decrement: p->nr_targets--; + cxled->pos = -1; + p->targets[pos] = NULL; err: for (iter = ep_port; !is_cxl_root(iter); iter = to_cxl_port(iter->dev.parent)) -- cgit From 9995576cef48dcbb0ba3de068292ed14f72fa0eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:15 -0800 Subject: cxl/region: Move region-position validation to a helper In preparation for region autodiscovery, that needs all devices discovered before their relative position in the region can be determined, consolidate all position dependent validation in a helper. Recall that in the on-demand region creation flow the end-user picks the position of a given endpoint decoder in a region. In the autodiscovery case the position of an endpoint decoder can only be determined after all other endpoint decoders that claim to decode the region's address range have been enumerated and attached. So, in the autodiscovery case endpoint decoders may be attached before their relative position is known. Once all decoders arrive, then positions can be determined and validated with cxl_region_validate_position() the same as user initiated on-demand creation. Reviewed-by: Vishal Verma Tested-by: Fan Ni Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601997584.1924368.4615769326126138969.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 119 +++++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 43 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ae7d3adcd41a..691605f1e120 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1211,35 +1211,13 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) return 0; } -static int cxl_region_attach(struct cxl_region *cxlr, - struct cxl_endpoint_decoder *cxled, int pos) +static int cxl_region_validate_position(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + int pos) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *ep_port, *root_port, *iter; struct cxl_region_params *p = &cxlr->params; - struct cxl_dport *dport; - int i, rc = -ENXIO; - - if (cxled->mode != cxlr->mode) { - dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", - dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); - return -EINVAL; - } - - if (cxled->mode == CXL_DECODER_DEAD) { - dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); - return -ENODEV; - } - - /* all full of members, or interleave config not established? */ - if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { - dev_dbg(&cxlr->dev, "region already active\n"); - return -EBUSY; - } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { - dev_dbg(&cxlr->dev, "interleave config missing\n"); - return -ENXIO; - } + int i; if (pos < 0 || pos >= p->interleave_ways) { dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, @@ -1278,6 +1256,71 @@ static int cxl_region_attach(struct cxl_region *cxlr, } } + return 0; +} + +static int cxl_region_attach_position(struct cxl_region *cxlr, + struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled, + const struct cxl_dport *dport, int pos) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *iter; + int rc; + + if (cxlrd->calc_hb(cxlrd, pos) != dport) { + dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + dev_name(&cxlrd->cxlsd.cxld.dev)); + return -ENXIO; + } + + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) { + rc = cxl_port_attach_region(iter, cxlr, cxled, pos); + if (rc) + goto err; + } + + return 0; + +err: + for (iter = cxled_to_port(cxled); !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) + cxl_port_detach_region(iter, cxlr, cxled); + return rc; +} + +static int cxl_region_attach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_region_params *p = &cxlr->params; + struct cxl_port *ep_port, *root_port; + struct cxl_dport *dport; + int rc = -ENXIO; + + if (cxled->mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", + dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); + return -EINVAL; + } + + if (cxled->mode == CXL_DECODER_DEAD) { + dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); + return -ENODEV; + } + + /* all full of members, or interleave config not established? */ + if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_dbg(&cxlr->dev, "region already active\n"); + return -EBUSY; + } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_dbg(&cxlr->dev, "interleave config missing\n"); + return -ENXIO; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1288,13 +1331,6 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } - if (cxlrd->calc_hb(cxlrd, pos) != dport) { - dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), - dev_name(&cxlrd->cxlsd.cxld.dev)); - return -ENXIO; - } - if (cxled->cxld.target_type != cxlr->type) { dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), @@ -1318,12 +1354,13 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } - for (iter = ep_port; !is_cxl_root(iter); - iter = to_cxl_port(iter->dev.parent)) { - rc = cxl_port_attach_region(iter, cxlr, cxled, pos); - if (rc) - goto err; - } + rc = cxl_region_validate_position(cxlr, cxled, pos); + if (rc) + return rc; + + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos); + if (rc) + return rc; p->targets[pos] = cxled; cxled->pos = pos; @@ -1349,10 +1386,6 @@ err_decrement: p->nr_targets--; cxled->pos = -1; p->targets[pos] = NULL; -err: - for (iter = ep_port; !is_cxl_root(iter); - iter = to_cxl_port(iter->dev.parent)) - cxl_port_detach_region(iter, cxlr, cxled); return rc; } -- cgit From 93c177fd6ff0655a5fa43ec945a57d7b0200ad80 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:21 -0800 Subject: kernel/range: Uplevel the cxl subsystem's range_contains() helper In support of the CXL subsystem's use of 'struct range' to track decode address ranges, add a common range_contains() implementation with identical semantics as resource_contains(); The existing 'range_contains()' in lib/stackinit_kunit.c is namespaced with a 'stackinit_' prefix. Cc: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601998163.1924368.6067392174077323935.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 1d1492440287..9ed2120dbf8a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -214,11 +214,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) return devm_add_action_or_reset(host, clear_mem_enable, cxlds); } -static bool range_contains(struct range *r1, struct range *r2) -{ - return r1->start <= r2->start && r1->end >= r2->end; -} - /* require dvsec ranges to be covered by a locked platform window */ static int dvsec_range_allowed(struct device *dev, void *arg) { -- cgit From 32ce3f185bbb3802cd0ac925bc8fddf1797e0ad4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:33 -0800 Subject: cxl/port: Split endpoint and switch port probe Jonathan points out that the shared code between the switch and endpoint case is small. Before adding another is_cxl_endpoint() conditional, just split the two cases. Rather than duplicate the "Couldn't enumerate decoders" error message take the opportunity to improve the error messages in devm_cxl_enumerate_decoders(). Reported-by: Jonathan Cameron Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601999378.1924368.15071142145866277623.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index dcc16d7cb8f3..a0891c3464f1 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -826,7 +826,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxled = cxl_endpoint_decoder_alloc(port); if (IS_ERR(cxled)) { dev_warn(&port->dev, - "Failed to allocate the decoder\n"); + "Failed to allocate decoder%d.%d\n", + port->id, i); return PTR_ERR(cxled); } cxld = &cxled->cxld; @@ -836,7 +837,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) cxlsd = cxl_switch_decoder_alloc(port, target_count); if (IS_ERR(cxlsd)) { dev_warn(&port->dev, - "Failed to allocate the decoder\n"); + "Failed to allocate decoder%d.%d\n", + port->id, i); return PTR_ERR(cxlsd); } cxld = &cxlsd->cxld; @@ -844,13 +846,16 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base); if (rc) { + dev_warn(&port->dev, + "Failed to initialize decoder%d.%d\n", + port->id, i); put_device(&cxld->dev); return rc; } rc = add_hdm_decoder(port, cxld, target_map); if (rc) { dev_warn(&port->dev, - "Failed to add decoder to port\n"); + "Failed to add decoder%d.%d\n", port->id, i); return rc; } } -- cgit From a32320b71f085f8d82afedcf285f1682c8c00aed Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 17:31:17 -0800 Subject: cxl/region: Add region autodiscovery Region autodiscovery is an asynchronous state machine advanced by cxl_port_probe(). After the decoders on an endpoint port are enumerated they are scanned for actively enabled instances. Each active decoder is flagged for auto-assembly CXL_DECODER_F_AUTO and attached to a region. If a region does not already exist for the address range setting of the decoder one is created. That creation process may race with other decoders of the same region being discovered since cxl_port_probe() is asynchronous. A new 'struct cxl_root_decoder' lock, @range_lock, is introduced to mitigate that race. Once all decoders have arrived, "p->nr_targets == p->interleave_ways", they are sorted by their relative decode position. The sort algorithm involves finding the point in the cxl_port topology where one leg of the decode leads to deviceA and the other deviceB. At that point in the topology the target order in the 'struct cxl_switch_decoder' indicates the relative position of those endpoint decoders in the region. >From that point the region goes through the same setup and validation steps as user-created regions, but instead of programming the decoders it validates that driver would have written the same values to the decoders as were already present. Tested-by: Fan Ni Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167601999958.1924368.9366954455835735048.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 11 + drivers/cxl/core/port.c | 2 + drivers/cxl/core/region.c | 502 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 505 insertions(+), 10 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index a0891c3464f1..8c29026a4b9d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -676,6 +676,14 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld) port->commit_end--; cxld->flags &= ~CXL_DECODER_F_ENABLE; + /* Userspace is now responsible for reconfiguring this decoder */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled; + + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxled->state = CXL_DECODER_STATE_MANUAL; + } + return 0; } @@ -783,6 +791,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return rc; } *dpa_base += dpa_size + skip; + + cxled->state = CXL_DECODER_STATE_AUTO; + return 0; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 9e5df64ea6b5..59620528571a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -446,6 +446,7 @@ bool is_endpoint_decoder(struct device *dev) { return dev->type == &cxl_decoder_endpoint_type; } +EXPORT_SYMBOL_NS_GPL(is_endpoint_decoder, CXL); bool is_root_decoder(struct device *dev) { @@ -1628,6 +1629,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, } cxlrd->calc_hb = calc_hb; + mutex_init(&cxlrd->range_lock); cxld = &cxlsd->cxld; cxld->dev.type = &cxl_decoder_root_type; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 691605f1e120..8c29204279e9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -524,7 +525,12 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr) if (device_is_registered(&cxlr->dev)) lockdep_assert_held_write(&cxl_region_rwsem); if (p->res) { - remove_resource(p->res); + /* + * Autodiscovered regions may not have been able to insert their + * resource. + */ + if (p->res->parent) + remove_resource(p->res); kfree(p->res); p->res = NULL; } @@ -1105,12 +1111,35 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } - cxld->interleave_ways = iw; - cxld->interleave_granularity = ig; - cxld->hpa_range = (struct range) { - .start = p->res->start, - .end = p->res->end, - }; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + if (cxld->interleave_ways != iw || + cxld->interleave_granularity != ig || + cxld->hpa_range.start != p->res->start || + cxld->hpa_range.end != p->res->end || + ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { + dev_err(&cxlr->dev, + "%s:%s %s expected iw: %d ig: %d %pr\n", + dev_name(port->uport), dev_name(&port->dev), + __func__, iw, ig, p->res); + dev_err(&cxlr->dev, + "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n", + dev_name(port->uport), dev_name(&port->dev), + __func__, cxld->interleave_ways, + cxld->interleave_granularity, + (cxld->flags & CXL_DECODER_F_ENABLE) ? + "enabled" : + "disabled", + cxld->hpa_range.start, cxld->hpa_range.end); + return -ENXIO; + } + } else { + cxld->interleave_ways = iw; + cxld->interleave_granularity = ig; + cxld->hpa_range = (struct range) { + .start = p->res->start, + .end = p->res->end, + }; + } dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport), dev_name(&port->dev), iw, ig); add_target: @@ -1121,7 +1150,17 @@ add_target: dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos); return -ENXIO; } - cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) { + dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n", + dev_name(port->uport), dev_name(&port->dev), + dev_name(&cxlsd->cxld.dev), + dev_name(ep->dport->dport), + cxl_rr->nr_targets_set); + return -ENXIO; + } + } else + cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; inc = 1; out_target_set: cxl_rr->nr_targets_set += inc; @@ -1163,6 +1202,13 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr) struct cxl_ep *ep; int i; + /* + * In the auto-discovery case skip automatic teardown since the + * address space is already active + */ + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + return; + for (i = 0; i < p->nr_targets; i++) { cxled = p->targets[i]; cxlmd = cxled_to_memdev(cxled); @@ -1195,8 +1241,8 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent); /* - * Descend the topology tree programming targets while - * looking for conflicts. + * Descend the topology tree programming / validating + * targets while looking for conflicts. */ for (ep = cxl_ep_load(iter, cxlmd); iter; iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) { @@ -1291,6 +1337,191 @@ err: return rc; } +static int cxl_region_attach_auto(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos) +{ + struct cxl_region_params *p = &cxlr->params; + + if (cxled->state != CXL_DECODER_STATE_AUTO) { + dev_err(&cxlr->dev, + "%s: unable to add decoder to autodetected region\n", + dev_name(&cxled->cxld.dev)); + return -EINVAL; + } + + if (pos >= 0) { + dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n", + dev_name(&cxled->cxld.dev), pos); + return -EINVAL; + } + + if (p->nr_targets >= p->interleave_ways) { + dev_err(&cxlr->dev, "%s: no more target slots available\n", + dev_name(&cxled->cxld.dev)); + return -ENXIO; + } + + /* + * Temporarily record the endpoint decoder into the target array. Yes, + * this means that userspace can view devices in the wrong position + * before the region activates, and must be careful to understand when + * it might be racing region autodiscovery. + */ + pos = p->nr_targets; + p->targets[pos] = cxled; + cxled->pos = pos; + p->nr_targets++; + + return 0; +} + +static struct cxl_port *next_port(struct cxl_port *port) +{ + if (!port->parent_dport) + return NULL; + return port->parent_dport->port; +} + +static int decoder_match_range(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled = data; + struct cxl_switch_decoder *cxlsd; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range); +} + +static void find_positions(const struct cxl_switch_decoder *cxlsd, + const struct cxl_port *iter_a, + const struct cxl_port *iter_b, int *a_pos, + int *b_pos) +{ + int i; + + for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) { + if (cxlsd->target[i] == iter_a->parent_dport) + *a_pos = i; + else if (cxlsd->target[i] == iter_b->parent_dport) + *b_pos = i; + if (*a_pos >= 0 && *b_pos >= 0) + break; + } +} + +static int cmp_decode_pos(const void *a, const void *b) +{ + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; + struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); + struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); + struct cxl_port *port_a = cxled_to_port(cxled_a); + struct cxl_port *port_b = cxled_to_port(cxled_b); + struct cxl_port *iter_a, *iter_b, *port = NULL; + struct cxl_switch_decoder *cxlsd; + struct device *dev; + int a_pos, b_pos; + unsigned int seq; + + /* Exit early if any prior sorting failed */ + if (cxled_a->pos < 0 || cxled_b->pos < 0) + return 0; + + /* + * Walk up the hierarchy to find a shared port, find the decoder that + * maps the range, compare the relative position of those dport + * mappings. + */ + for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { + struct cxl_port *next_a, *next_b; + + next_a = next_port(iter_a); + if (!next_a) + break; + + for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { + next_b = next_port(iter_b); + if (next_a != next_b) + continue; + port = next_a; + break; + } + + if (port) + break; + } + + if (!port) { + dev_err(cxlmd_a->dev.parent, + "failed to find shared port with %s\n", + dev_name(cxlmd_b->dev.parent)); + goto err; + } + + dev = device_find_child(&port->dev, cxled_a, decoder_match_range); + if (!dev) { + struct range *range = &cxled_a->cxld.hpa_range; + + dev_err(port->uport, + "failed to find decoder that maps %#llx-%#llx\n", + range->start, range->end); + goto err; + } + + cxlsd = to_cxl_switch_decoder(dev); + do { + seq = read_seqbegin(&cxlsd->target_lock); + find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); + } while (read_seqretry(&cxlsd->target_lock, seq)); + + put_device(dev); + + if (a_pos < 0 || b_pos < 0) { + dev_err(port->uport, + "failed to find shared decoder for %s and %s\n", + dev_name(cxlmd_a->dev.parent), + dev_name(cxlmd_b->dev.parent)); + goto err; + } + + dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent), + a_pos - b_pos < 0 ? "before" : "after", + dev_name(cxlmd_b->dev.parent)); + + return a_pos - b_pos; +err: + cxled_a->pos = -1; + return 0; +} + +static int cxl_region_sort_targets(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + int i, rc = 0; + + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, + NULL); + + for (i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + + /* + * Record that sorting failed, but still continue to restore + * cxled->pos with its ->targets[] position so that follow-on + * code paths can reliably do p->targets[cxled->pos] to + * self-reference their entry. + */ + if (cxled->pos < 0) + rc = -ENXIO; + cxled->pos = i; + } + + dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); + return rc; +} + static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos) { @@ -1354,6 +1585,50 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + int i; + + rc = cxl_region_attach_auto(cxlr, cxled, pos); + if (rc) + return rc; + + /* await more targets to arrive... */ + if (p->nr_targets < p->interleave_ways) + return 0; + + /* + * All targets are here, which implies all PCI enumeration that + * affects this region has been completed. Walk the topology to + * sort the devices into their relative region decode position. + */ + rc = cxl_region_sort_targets(cxlr); + if (rc) + return rc; + + for (i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + ep_port = cxled_to_port(cxled); + dport = cxl_find_dport_by_dev(root_port, + ep_port->host_bridge); + rc = cxl_region_attach_position(cxlr, cxlrd, cxled, + dport, i); + if (rc) + return rc; + } + + rc = cxl_region_setup_targets(cxlr); + if (rc) + return rc; + + /* + * If target setup succeeds in the autodiscovery case + * then the region is already committed. + */ + p->state = CXL_CONFIG_COMMIT; + + return 0; + } + rc = cxl_region_validate_position(cxlr, cxled, pos); if (rc) return rc; @@ -2087,6 +2362,192 @@ err_bridge: return rc; } +static int match_decoder_by_range(struct device *dev, void *data) +{ + struct range *r1, *r2 = data; + struct cxl_root_decoder *cxlrd; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + r1 = &cxlrd->cxlsd.cxld.hpa_range; + return range_contains(r1, r2); +} + +static int match_region_by_range(struct device *dev, void *data) +{ + struct cxl_region_params *p; + struct cxl_region *cxlr; + struct range *r = data; + int rc = 0; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + down_read(&cxl_region_rwsem); + if (p->res && p->res->start == r->start && p->res->end == r->end) + rc = 1; + up_read(&cxl_region_rwsem); + + return rc; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_region_params *p; + struct cxl_region *cxlr; + struct resource *res; + int rc; + + do { + cxlr = __create_region(cxlrd, cxled->mode, + atomic_read(&cxlrd->region_id)); + } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); + + if (IS_ERR(cxlr)) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s failed assign region: %ld\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, PTR_ERR(cxlr)); + return cxlr; + } + + down_write(&cxl_region_rwsem); + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s autodiscovery interrupted\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__); + rc = -EBUSY; + goto err; + } + + set_bit(CXL_REGION_F_AUTO, &cxlr->flags); + + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + rc = -ENOMEM; + goto err; + } + + *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), + dev_name(&cxlr->dev)); + rc = insert_resource(cxlrd->res, res); + if (rc) { + /* + * Platform-firmware may not have split resources like "System + * RAM" on CXL window boundaries see cxl_region_iomem_release() + */ + dev_warn(cxlmd->dev.parent, + "%s:%s: %s %s cannot insert resource\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + __func__, dev_name(&cxlr->dev)); + } + + p->res = res; + p->interleave_ways = cxled->cxld.interleave_ways; + p->interleave_granularity = cxled->cxld.interleave_granularity; + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); + if (rc) + goto err; + + dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, + dev_name(&cxlr->dev), p->res, p->interleave_ways, + p->interleave_granularity); + + /* ...to match put_device() in cxl_add_to_region() */ + get_device(&cxlr->dev); + up_write(&cxl_region_rwsem); + + return cxlr; + +err: + up_write(&cxl_region_rwsem); + devm_release_action(port->uport, unregister_region, cxlr); + return ERR_PTR(rc); +} + +int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_decoder *cxld = &cxled->cxld; + struct device *cxlrd_dev, *region_dev; + struct cxl_root_decoder *cxlrd; + struct cxl_region_params *p; + struct cxl_region *cxlr; + bool attach = false; + int rc; + + cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range, + match_decoder_by_range); + if (!cxlrd_dev) { + dev_err(cxlmd->dev.parent, + "%s:%s no CXL window for range %#llx:%#llx\n", + dev_name(&cxlmd->dev), dev_name(&cxld->dev), + cxld->hpa_range.start, cxld->hpa_range.end); + return -ENXIO; + } + + cxlrd = to_cxl_root_decoder(cxlrd_dev); + + /* + * Ensure that if multiple threads race to construct_region() for @hpa + * one does the construction and the others add to that. + */ + mutex_lock(&cxlrd->range_lock); + region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa, + match_region_by_range); + if (!region_dev) { + cxlr = construct_region(cxlrd, cxled); + region_dev = &cxlr->dev; + } else + cxlr = to_cxl_region(region_dev); + mutex_unlock(&cxlrd->range_lock); + + if (IS_ERR(cxlr)) { + rc = PTR_ERR(cxlr); + goto out; + } + + attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE); + + down_read(&cxl_region_rwsem); + p = &cxlr->params; + attach = p->state == CXL_CONFIG_COMMIT; + up_read(&cxl_region_rwsem); + + if (attach) { + /* + * If device_attach() fails the range may still be active via + * the platform-firmware memory map, otherwise the driver for + * regions is local to this file, so driver matching can't fail. + */ + if (device_attach(&cxlr->dev) < 0) + dev_err(&cxlr->dev, "failed to enable, range: %pr\n", + p->res); + } + + put_device(region_dev); +out: + put_device(cxlrd_dev); + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); + static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) { if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags)) @@ -2111,6 +2572,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) return 0; } +static int is_system_ram(struct resource *res, void *arg) +{ + struct cxl_region *cxlr = arg; + struct cxl_region_params *p = &cxlr->params; + + dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res); + return 1; +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -2144,6 +2614,18 @@ out: switch (cxlr->mode) { case CXL_DECODER_PMEM: return devm_cxl_add_pmem_region(cxlr); + case CXL_DECODER_RAM: + /* + * The region can not be manged by CXL if any portion of + * it is already online as 'System RAM' + */ + if (walk_iomem_res_desc(IORES_DESC_NONE, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + p->res->start, p->res->end, cxlr, + is_system_ram) > 0) + return 0; + dev_dbg(dev, "TODO: hookup devdax\n"); + return 0; default: dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", cxlr->mode); -- cgit From 3d8f7ccaa611a743ae3a1e6f605346993d37c513 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:45 -0800 Subject: tools/testing/cxl: Define a fixed volatile configuration to parse Take two endpoints attached to the first switch on the first host-bridge in the cxl_test topology and define a pre-initialized region. This is a x2 interleave underneath a x1 CXL Window. $ modprobe cxl_test $ # cxl list -Ru { "region":"region3", "resource":"0xf010000000", "size":"512.00 MiB (536.87 MB)", "interleave_ways":2, "interleave_granularity":4096, "decode_state":"commit" } Tested-by: Fan Ni Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602000547.1924368.11613151863880268868.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 3 --- drivers/cxl/core/hdm.c | 3 ++- drivers/cxl/core/port.c | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 5eb873da5a30..479f01da6d35 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -57,9 +57,6 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled); extern struct rw_semaphore cxl_dpa_rwsem; -bool is_switch_decoder(struct device *dev); -struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); - int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 8c29026a4b9d..80eccae6ba9e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -279,7 +279,7 @@ success: return 0; } -static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, +int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) { @@ -295,6 +295,7 @@ static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } +EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, CXL); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled) { diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 59620528571a..b45d2796ef35 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -458,6 +458,7 @@ bool is_switch_decoder(struct device *dev) { return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type; } +EXPORT_SYMBOL_NS_GPL(is_switch_decoder, CXL); struct cxl_decoder *to_cxl_decoder(struct device *dev) { @@ -485,6 +486,7 @@ struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev) return NULL; return container_of(dev, struct cxl_switch_decoder, cxld.dev); } +EXPORT_SYMBOL_NS_GPL(to_cxl_switch_decoder, CXL); static void cxl_ep_release(struct cxl_ep *ep) { -- cgit From 09d09e04d2fcf88c4620dd28097e0e2a8f720eac Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:19 -0800 Subject: cxl/dax: Create dax devices for CXL RAM regions While platform firmware takes some responsibility for mapping the RAM capacity of CXL devices present at boot, the OS is responsible for mapping the remainder and hot-added devices. Platform firmware is also responsible for identifying the platform general purpose memory pool, typically DDR attached DRAM, and arranging for the remainder to be 'Soft Reserved'. That reservation allows the CXL subsystem to route the memory to core-mm via memory-hotplug (dax_kmem), or leave it for dedicated access (device-dax). The new 'struct cxl_dax_region' object allows for a CXL memory resource (region) to be published, but also allow for udev and module policy to act on that event. It also prevents cxl_core.ko from having a module loading dependency on any drivers/dax/ modules. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602003896.1924368.10335442077318970468.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/core.h | 3 ++ drivers/cxl/core/port.c | 4 +- drivers/cxl/core/region.c | 108 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 3 deletions(-) (limited to 'drivers/cxl/core') diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 479f01da6d35..cde475e13216 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -15,12 +15,14 @@ extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; +extern const struct device_type cxl_dax_region_type; extern const struct device_type cxl_region_type; void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) #define CXL_REGION_TYPE(x) (&cxl_region_type) #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type) +#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type) int cxl_region_init(void); void cxl_region_exit(void); #else @@ -38,6 +40,7 @@ static inline void cxl_region_exit(void) #define CXL_REGION_TYPE(x) NULL #define SET_CXL_REGION_ATTR(x) #define CXL_PMEM_REGION_TYPE(x) NULL +#define CXL_DAX_REGION_TYPE(x) NULL #endif struct cxl_send_command; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b45d2796ef35..0bb7a5ff724b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -46,6 +46,8 @@ static int cxl_device_id(struct device *dev) return CXL_DEVICE_NVDIMM; if (dev->type == CXL_PMEM_REGION_TYPE()) return CXL_DEVICE_PMEM_REGION; + if (dev->type == CXL_DAX_REGION_TYPE()) + return CXL_DEVICE_DAX_REGION; if (is_cxl_port(dev)) { if (is_cxl_root(to_cxl_port(dev))) return CXL_DEVICE_ROOT; @@ -2015,6 +2017,6 @@ static void cxl_core_exit(void) debugfs_remove_recursive(cxl_debugfs); } -module_init(cxl_core_init); +subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 8c29204279e9..91bb9ac881ff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2278,6 +2278,75 @@ out: return cxlr_pmem; } +static void cxl_dax_region_release(struct device *dev) +{ + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); + + kfree(cxlr_dax); +} + +static const struct attribute_group *cxl_dax_region_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +const struct device_type cxl_dax_region_type = { + .name = "cxl_dax_region", + .release = cxl_dax_region_release, + .groups = cxl_dax_region_attribute_groups, +}; + +static bool is_cxl_dax_region(struct device *dev) +{ + return dev->type == &cxl_dax_region_type; +} + +struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), + "not a cxl_dax_region device\n")) + return NULL; + return container_of(dev, struct cxl_dax_region, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); + +static struct lock_class_key cxl_dax_region_key; + +static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_dax_region *cxlr_dax; + struct device *dev; + + down_read(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) { + cxlr_dax = ERR_PTR(-ENXIO); + goto out; + } + + cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); + if (!cxlr_dax) { + cxlr_dax = ERR_PTR(-ENOMEM); + goto out; + } + + cxlr_dax->hpa_range.start = p->res->start; + cxlr_dax->hpa_range.end = p->res->end; + + dev = &cxlr_dax->dev; + cxlr_dax->cxlr = cxlr; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_dax_region_key); + device_set_pm_not_required(dev); + dev->parent = &cxlr->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_dax_region_type; +out: + up_read(&cxl_region_rwsem); + + return cxlr_dax; +} + static void cxlr_pmem_unregister(void *_cxlr_pmem) { struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; @@ -2362,6 +2431,42 @@ err_bridge: return rc; } +static void cxlr_dax_unregister(void *_cxlr_dax) +{ + struct cxl_dax_region *cxlr_dax = _cxlr_dax; + + device_unregister(&cxlr_dax->dev); +} + +static int devm_cxl_add_dax_region(struct cxl_region *cxlr) +{ + struct cxl_dax_region *cxlr_dax; + struct device *dev; + int rc; + + cxlr_dax = cxl_dax_region_alloc(cxlr); + if (IS_ERR(cxlr_dax)) + return PTR_ERR(cxlr_dax); + + dev = &cxlr_dax->dev; + rc = dev_set_name(dev, "dax_region%d", cxlr->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, + cxlr_dax); +err: + put_device(dev); + return rc; +} + static int match_decoder_by_range(struct device *dev, void *data) { struct range *r1, *r2 = data; @@ -2624,8 +2729,7 @@ out: p->res->start, p->res->end, cxlr, is_system_ram) > 0) return 0; - dev_dbg(dev, "TODO: hookup devdax\n"); - return 0; + return devm_cxl_add_dax_region(cxlr); default: dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", cxlr->mode); -- cgit