summaryrefslogtreecommitdiff
path: root/drivers/cxl/core/region.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl/core/region.c')
-rw-r--r--drivers/cxl/core/region.c2373
1 files changed, 1715 insertions, 658 deletions
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e115ba382e04..ae899f68551f 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2,12 +2,16 @@
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/memregion.h>
#include <linux/genalloc.h>
+#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/module.h>
+#include <linux/memory.h>
#include <linux/slab.h>
#include <linux/uuid.h>
#include <linux/sort.h>
#include <linux/idr.h>
+#include <linux/memory-tiers.h>
+#include <linux/string_choices.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
@@ -29,13 +33,115 @@
*/
/*
- * All changes to the interleave configuration occur with this lock held
- * for write.
+ * nodemask that sets per node when the access_coordinates for the node has
+ * been updated by the CXL memory hotplug notifier.
*/
-static DECLARE_RWSEM(cxl_region_rwsem);
+static nodemask_t nodemask_region_seen = NODE_MASK_NONE;
static struct cxl_region *to_cxl_region(struct device *dev);
+#define __ACCESS_ATTR_RO(_level, _name) { \
+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
+ .show = _name##_access##_level##_show, \
+}
+
+#define ACCESS_DEVICE_ATTR_RO(level, name) \
+ struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
+
+#define ACCESS_ATTR_RO(level, attrib) \
+static ssize_t attrib##_access##level##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct cxl_region *cxlr = to_cxl_region(dev); \
+ \
+ if (cxlr->coord[level].attrib == 0) \
+ return -ENOENT; \
+ \
+ return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \
+} \
+static ACCESS_DEVICE_ATTR_RO(level, attrib)
+
+ACCESS_ATTR_RO(0, read_bandwidth);
+ACCESS_ATTR_RO(0, read_latency);
+ACCESS_ATTR_RO(0, write_bandwidth);
+ACCESS_ATTR_RO(0, write_latency);
+
+#define ACCESS_ATTR_DECLARE(level, attrib) \
+ (&dev_attr_access##level##_##attrib.attr)
+
+static struct attribute *access0_coordinate_attrs[] = {
+ ACCESS_ATTR_DECLARE(0, read_bandwidth),
+ ACCESS_ATTR_DECLARE(0, write_bandwidth),
+ ACCESS_ATTR_DECLARE(0, read_latency),
+ ACCESS_ATTR_DECLARE(0, write_latency),
+ NULL
+};
+
+ACCESS_ATTR_RO(1, read_bandwidth);
+ACCESS_ATTR_RO(1, read_latency);
+ACCESS_ATTR_RO(1, write_bandwidth);
+ACCESS_ATTR_RO(1, write_latency);
+
+static struct attribute *access1_coordinate_attrs[] = {
+ ACCESS_ATTR_DECLARE(1, read_bandwidth),
+ ACCESS_ATTR_DECLARE(1, write_bandwidth),
+ ACCESS_ATTR_DECLARE(1, read_latency),
+ ACCESS_ATTR_DECLARE(1, write_latency),
+ NULL
+};
+
+#define ACCESS_VISIBLE(level) \
+static umode_t cxl_region_access##level##_coordinate_visible( \
+ struct kobject *kobj, struct attribute *a, int n) \
+{ \
+ struct device *dev = kobj_to_dev(kobj); \
+ struct cxl_region *cxlr = to_cxl_region(dev); \
+ \
+ if (a == &dev_attr_access##level##_read_latency.attr && \
+ cxlr->coord[level].read_latency == 0) \
+ return 0; \
+ \
+ if (a == &dev_attr_access##level##_write_latency.attr && \
+ cxlr->coord[level].write_latency == 0) \
+ return 0; \
+ \
+ if (a == &dev_attr_access##level##_read_bandwidth.attr && \
+ cxlr->coord[level].read_bandwidth == 0) \
+ return 0; \
+ \
+ if (a == &dev_attr_access##level##_write_bandwidth.attr && \
+ cxlr->coord[level].write_bandwidth == 0) \
+ return 0; \
+ \
+ return a->mode; \
+}
+
+ACCESS_VISIBLE(0);
+ACCESS_VISIBLE(1);
+
+static const struct attribute_group cxl_region_access0_coordinate_group = {
+ .name = "access0",
+ .attrs = access0_coordinate_attrs,
+ .is_visible = cxl_region_access0_coordinate_visible,
+};
+
+static const struct attribute_group *get_cxl_region_access0_group(void)
+{
+ return &cxl_region_access0_coordinate_group;
+}
+
+static const struct attribute_group cxl_region_access1_coordinate_group = {
+ .name = "access1",
+ .attrs = access1_coordinate_attrs,
+ .is_visible = cxl_region_access1_coordinate_visible,
+};
+
+static const struct attribute_group *get_cxl_region_access1_group(void)
+{
+ return &cxl_region_access1_coordinate_group;
+}
+
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -43,16 +149,12 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
struct cxl_region_params *p = &cxlr->params;
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
return rc;
- if (cxlr->mode != CXL_DECODER_PMEM)
- rc = sysfs_emit(buf, "\n");
- else
- rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ if (cxlr->mode != CXL_PARTMODE_PMEM)
+ return sysfs_emit(buf, "\n");
+ return sysfs_emit(buf, "%pUb\n", &p->uuid);
}
static int is_dup(struct device *match, void *data)
@@ -64,7 +166,7 @@ static int is_dup(struct device *match, void *data)
if (!is_cxl_region(match))
return 0;
- lockdep_assert_held(&cxl_region_rwsem);
+ lockdep_assert_held(&cxl_rwsem.region);
cxlr = to_cxl_region(match);
p = &cxlr->params;
@@ -94,27 +196,22 @@ static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
if (uuid_is_null(&temp))
return -EINVAL;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &region_rwsem)))
return rc;
if (uuid_equal(&p->uuid, &temp))
- goto out;
+ return len;
- rc = -EBUSY;
if (p->state >= CXL_CONFIG_ACTIVE)
- goto out;
+ return -EBUSY;
rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
if (rc < 0)
- goto out;
+ return rc;
uuid_copy(&p->uuid, &temp);
-out:
- up_write(&cxl_region_rwsem);
- if (rc)
- return rc;
return len;
}
static DEVICE_ATTR_RW(uuid);
@@ -129,34 +226,37 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
{
if (!cpu_cache_has_invalidate_memregion()) {
if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
- dev_warn_once(
+ dev_info_once(
&cxlr->dev,
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
return 0;
- } else {
- dev_err(&cxlr->dev,
- "Failed to synchronize CPU cache state\n");
- return -ENXIO;
}
+ dev_WARN(&cxlr->dev,
+ "Failed to synchronize CPU cache state\n");
+ return -ENXIO;
}
- cpu_cache_invalidate_memregion(IORES_DESC_CXL);
+ if (!cxlr->params.res)
+ return -ENXIO;
+ cpu_cache_invalidate_memregion(cxlr->params.res->start,
+ resource_size(cxlr->params.res));
return 0;
}
-static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
+static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
{
struct cxl_region_params *p = &cxlr->params;
- int i, rc = 0;
+ int i;
+
+ if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
+ return;
/*
- * Before region teardown attempt to flush, and if the flush
- * fails cancel the region teardown for data consistency
- * concerns
+ * Before region teardown attempt to flush, evict any data cached for
+ * this region, or scream loudly about missing arch / platform support
+ * for CXL teardown.
*/
- rc = cxl_region_invalidate_memregion(cxlr);
- if (rc)
- return rc;
+ cxl_region_invalidate_memregion(cxlr);
for (i = count - 1; i >= 0; i--) {
struct cxl_endpoint_decoder *cxled = p->targets[i];
@@ -179,23 +279,17 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
if (cxld->reset)
- rc = cxld->reset(cxld);
- if (rc)
- return rc;
+ cxld->reset(cxld);
set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
}
endpoint_reset:
- rc = cxled->cxld.reset(&cxled->cxld);
- if (rc)
- return rc;
+ cxled->cxld.reset(&cxled->cxld);
set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
}
/* all decoders associated with this region have been torn down */
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
-
- return 0;
}
static int commit_decoder(struct cxl_decoder *cxld)
@@ -260,33 +354,40 @@ err:
return rc;
}
-static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
+static int queue_reset(struct cxl_region *cxlr)
{
- struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
- bool commit;
- ssize_t rc;
+ int rc;
- rc = kstrtobool(buf, &commit);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ /* Already in the requested state? */
+ if (p->state < CXL_CONFIG_COMMIT)
+ return 0;
+
+ p->state = CXL_CONFIG_RESET_PENDING;
+
+ return 0;
+}
+
+static int __commit(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int rc;
+
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
/* Already in the requested state? */
- if (commit && p->state >= CXL_CONFIG_COMMIT)
- goto out;
- if (!commit && p->state < CXL_CONFIG_COMMIT)
- goto out;
+ if (p->state >= CXL_CONFIG_COMMIT)
+ return 0;
/* Not ready to commit? */
- if (commit && p->state < CXL_CONFIG_ACTIVE) {
- rc = -ENXIO;
- goto out;
- }
+ if (p->state < CXL_CONFIG_ACTIVE)
+ return -ENXIO;
/*
* Invalidate caches before region setup to drop any speculative
@@ -296,39 +397,62 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
if (rc)
return rc;
- if (commit) {
- rc = cxl_region_decode_commit(cxlr);
- if (rc == 0)
- p->state = CXL_CONFIG_COMMIT;
- } else {
- p->state = CXL_CONFIG_RESET_PENDING;
- up_write(&cxl_region_rwsem);
- device_release_driver(&cxlr->dev);
- down_write(&cxl_region_rwsem);
+ rc = cxl_region_decode_commit(cxlr);
+ if (rc)
+ return rc;
- /*
- * The lock was dropped, so need to revalidate that the reset is
- * still pending.
- */
- if (p->state == CXL_CONFIG_RESET_PENDING) {
- rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
- /*
- * Revert to committed since there may still be active
- * decoders associated with this region, or move forward
- * to active to mark the reset successful
- */
- if (rc)
- p->state = CXL_CONFIG_COMMIT;
- else
- p->state = CXL_CONFIG_ACTIVE;
- }
+ p->state = CXL_CONFIG_COMMIT;
+
+ return 0;
+}
+
+static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ bool commit;
+ ssize_t rc;
+
+ rc = kstrtobool(buf, &commit);
+ if (rc)
+ return rc;
+
+ if (commit) {
+ rc = __commit(cxlr);
+ if (rc)
+ return rc;
+ return len;
}
-out:
- up_write(&cxl_region_rwsem);
+ if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
+ return -EPERM;
+ rc = queue_reset(cxlr);
if (rc)
return rc;
+
+ /*
+ * Unmap the region and depend the reset-pending state to ensure
+ * it does not go active again until post reset
+ */
+ device_release_driver(&cxlr->dev);
+
+ /*
+ * With the reset pending take cxl_rwsem.region unconditionally
+ * to ensure the reset gets handled before returning.
+ */
+ guard(rwsem_write)(&cxl_rwsem.region);
+
+ /*
+ * Revalidate that the reset is still pending in case another
+ * thread already handled this reset.
+ */
+ if (p->state == CXL_CONFIG_RESET_PENDING) {
+ cxl_region_decode_reset(cxlr, p->interleave_ways);
+ p->state = CXL_CONFIG_ACTIVE;
+ }
+
return len;
}
@@ -339,45 +463,24 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
struct cxl_region_params *p = &cxlr->params;
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
- rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
}
static DEVICE_ATTR_RW(commit);
-static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
- int n)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct cxl_region *cxlr = to_cxl_region(dev);
-
- /*
- * Support tooling that expects to find a 'uuid' attribute for all
- * regions regardless of mode.
- */
- if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
- return 0444;
- return a->mode;
-}
-
static ssize_t interleave_ways_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
- ssize_t rc;
+ int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
- rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%d\n", p->interleave_ways);
}
static const struct attribute_group *get_cxl_region_target_group(void);
@@ -403,7 +506,7 @@ static ssize_t interleave_ways_store(struct device *dev,
return rc;
/*
- * Even for x3, x9, and x12 interleaves the region interleave must be a
+ * Even for x3, x6, and x12 interleaves the region interleave must be a
* power of 2 multiple of the host bridge interleave.
*/
if (!is_power_of_2(val / cxld->interleave_ways) ||
@@ -412,23 +515,21 @@ static ssize_t interleave_ways_store(struct device *dev,
return -EINVAL;
}
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
- if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
- rc = -EBUSY;
- goto out;
- }
+
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+ return -EBUSY;
save = p->interleave_ways;
p->interleave_ways = val;
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
- if (rc)
+ if (rc) {
p->interleave_ways = save;
-out:
- up_write(&cxl_region_rwsem);
- if (rc)
return rc;
+ }
+
return len;
}
static DEVICE_ATTR_RW(interleave_ways);
@@ -439,15 +540,12 @@ static ssize_t interleave_granularity_show(struct device *dev,
{
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
- ssize_t rc;
+ int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
- rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%d\n", p->interleave_granularity);
}
static ssize_t interleave_granularity_store(struct device *dev,
@@ -480,19 +578,15 @@ static ssize_t interleave_granularity_store(struct device *dev,
if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
return -EINVAL;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
- if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
- rc = -EBUSY;
- goto out;
- }
+
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+ return -EBUSY;
p->interleave_granularity = val;
-out:
- up_write(&cxl_region_rwsem);
- if (rc)
- return rc;
+
return len;
}
static DEVICE_ATTR_RW(interleave_granularity);
@@ -503,17 +597,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
u64 resource = -1ULL;
- ssize_t rc;
+ int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
+
if (p->res)
resource = p->res->start;
- rc = sysfs_emit(buf, "%#llx\n", resource);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%#llx\n", resource);
}
static DEVICE_ATTR_RO(resource);
@@ -521,8 +613,16 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_region *cxlr = to_cxl_region(dev);
+ const char *desc;
- return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
+ if (cxlr->mode == CXL_PARTMODE_RAM)
+ desc = "ram";
+ else if (cxlr->mode == CXL_PARTMODE_PMEM)
+ desc = "pmem";
+ else
+ desc = "";
+
+ return sysfs_emit(buf, "%s\n", desc);
}
static DEVICE_ATTR_RO(mode);
@@ -531,9 +631,9 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_region_params *p = &cxlr->params;
struct resource *res;
- u32 remainder = 0;
+ u64 remainder = 0;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
/* Nothing to do... */
if (p->res && resource_size(p->res) == size)
@@ -548,18 +648,19 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
/* ways, granularity and uuid (if PMEM) need to be set before HPA */
if (!p->interleave_ways || !p->interleave_granularity ||
- (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
+ (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid)))
return -ENXIO;
- div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
+ div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
if (remainder)
return -EINVAL;
res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
dev_name(&cxlr->dev));
if (IS_ERR(res)) {
- dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
- PTR_ERR(res));
+ dev_dbg(&cxlr->dev,
+ "HPA allocation error (%ld) for size:%pap in %s %pr\n",
+ PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
return PTR_ERR(res);
}
@@ -574,7 +675,7 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr)
struct cxl_region_params *p = &cxlr->params;
if (device_is_registered(&cxlr->dev))
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
if (p->res) {
/*
* Autodiscovered regions may not have been able to insert their
@@ -591,7 +692,7 @@ static int free_hpa(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
if (!p->res)
return 0;
@@ -615,15 +716,14 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr,
if (rc)
return rc;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
if (val)
rc = alloc_hpa(cxlr, val);
else
rc = free_hpa(cxlr);
- up_write(&cxl_region_rwsem);
if (rc)
return rc;
@@ -639,18 +739,30 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr,
u64 size = 0;
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
if (p->res)
size = resource_size(p->res);
- rc = sysfs_emit(buf, "%#llx\n", size);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%#llx\n", size);
}
static DEVICE_ATTR_RW(size);
+static ssize_t extended_linear_cache_size_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ ssize_t rc;
+
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
+ return rc;
+ return sysfs_emit(buf, "%#llx\n", p->cache_size);
+}
+static DEVICE_ATTR_RO(extended_linear_cache_size);
+
static struct attribute *cxl_region_attrs[] = {
&dev_attr_uuid.attr,
&dev_attr_commit.attr,
@@ -659,9 +771,34 @@ static struct attribute *cxl_region_attrs[] = {
&dev_attr_resource.attr,
&dev_attr_size.attr,
&dev_attr_mode.attr,
+ &dev_attr_extended_linear_cache_size.attr,
NULL,
};
+static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
+ int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_region *cxlr = to_cxl_region(dev);
+
+ /*
+ * Support tooling that expects to find a 'uuid' attribute for all
+ * regions regardless of mode.
+ */
+ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
+ return 0444;
+
+ /*
+ * Don't display extended linear cache attribute if there is no
+ * extended linear cache.
+ */
+ if (a == &dev_attr_extended_linear_cache_size.attr &&
+ cxlr->params.cache_size == 0)
+ return 0;
+
+ return a->mode;
+}
+
static const struct attribute_group cxl_region_group = {
.attrs = cxl_region_attrs,
.is_visible = cxl_region_visible,
@@ -673,71 +810,167 @@ static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
struct cxl_endpoint_decoder *cxled;
int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
if (pos >= p->interleave_ways) {
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
p->interleave_ways);
- rc = -ENXIO;
- goto out;
+ return -ENXIO;
}
cxled = p->targets[pos];
if (!cxled)
- rc = sysfs_emit(buf, "\n");
- else
- rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
-out:
- up_read(&cxl_region_rwsem);
+ return sysfs_emit(buf, "\n");
+ return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
+}
- return rc;
+static int check_commit_order(struct device *dev, void *data)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+ /*
+ * if port->commit_end is not the only free decoder, then out of
+ * order shutdown has occurred, block further allocations until
+ * that is resolved
+ */
+ if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0))
+ return -EBUSY;
+ return 0;
}
-static int match_free_decoder(struct device *dev, void *data)
+static int match_free_decoder(struct device *dev, const void *data)
{
+ struct cxl_port *port = to_cxl_port(dev->parent);
struct cxl_decoder *cxld;
- int *id = data;
+ int rc;
if (!is_switch_decoder(dev))
return 0;
cxld = to_cxl_decoder(dev);
- /* enforce ordered allocation */
- if (cxld->id != *id)
+ if (cxld->id != port->commit_end + 1)
return 0;
- if (!cxld->region)
- return 1;
+ if (cxld->region) {
+ dev_dbg(dev->parent,
+ "next decoder to commit (%s) is already reserved (%s)\n",
+ dev_name(dev), dev_name(&cxld->region->dev));
+ return 0;
+ }
- (*id)++;
+ rc = device_for_each_child_reverse_from(dev->parent, dev, NULL,
+ check_commit_order);
+ if (rc) {
+ dev_dbg(dev->parent,
+ "unable to allocate %s due to out of order shutdown\n",
+ dev_name(dev));
+ return 0;
+ }
+ return 1;
+}
+
+static bool spa_maps_hpa(const struct cxl_region_params *p,
+ const struct range *range)
+{
+ if (!p->res)
+ return false;
+
+ /*
+ * The extended linear cache region is constructed by a 1:1 ratio
+ * where the SPA maps equal amounts of DRAM and CXL HPA capacity with
+ * CXL decoders at the high end of the SPA range.
+ */
+ return p->res->start + p->cache_size == range->start &&
+ p->res->end == range->end;
+}
+
+static int match_auto_decoder(struct device *dev, const void *data)
+{
+ const struct cxl_region_params *p = data;
+ struct cxl_decoder *cxld;
+ struct range *r;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+
+ cxld = to_cxl_decoder(dev);
+ r = &cxld->hpa_range;
+
+ if (spa_maps_hpa(p, r))
+ return 1;
return 0;
}
-static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
- struct cxl_region *cxlr)
+/**
+ * cxl_port_pick_region_decoder() - assign or lookup a decoder for a region
+ * @port: a port in the ancestry of the endpoint implied by @cxled
+ * @cxled: endpoint decoder to be, or currently, mapped by @port
+ * @cxlr: region to establish, or validate, decode @port
+ *
+ * In the region creation path cxl_port_pick_region_decoder() is an
+ * allocator to find a free port. In the region assembly path, it is
+ * recalling the decoder that platform firmware picked for validation
+ * purposes.
+ *
+ * The result is recorded in a 'struct cxl_region_ref' in @port.
+ */
+static struct cxl_decoder *
+cxl_port_pick_region_decoder(struct cxl_port *port,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region *cxlr)
{
struct device *dev;
- int id = 0;
- dev = device_find_child(&port->dev, &id, match_free_decoder);
+ if (port == cxled_to_port(cxled))
+ return &cxled->cxld;
+
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
+ dev = device_find_child(&port->dev, &cxlr->params,
+ match_auto_decoder);
+ else
+ dev = device_find_child(&port->dev, NULL, match_free_decoder);
if (!dev)
return NULL;
/*
* This decoder is pinned registered as long as the endpoint decoder is
* registered, and endpoint decoder unregistration holds the
- * cxl_region_rwsem over unregister events, so no need to hold on to
+ * cxl_rwsem.region over unregister events, so no need to hold on to
* this extra reference.
*/
put_device(dev);
return to_cxl_decoder(dev);
}
-static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
- struct cxl_region *cxlr)
+static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
+ struct cxl_decoder *cxld)
+{
+ struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
+ struct cxl_decoder *cxld_iter = rr->decoder;
+
+ /*
+ * Allow the out of order assembly of auto-discovered regions.
+ * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
+ * in HPA order. Confirm that the decoder with the lesser HPA
+ * starting address has the lesser id.
+ */
+ dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
+ dev_name(&cxld->dev), cxld->id,
+ dev_name(&cxld_iter->dev), cxld_iter->id);
+
+ if (cxld_iter->id > cxld->id)
+ return true;
+
+ return false;
+}
+
+static struct cxl_region_ref *
+alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_decoder *cxld)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_region_ref *cxl_rr, *iter;
@@ -747,16 +980,18 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
xa_for_each(&port->regions, index, iter) {
struct cxl_region_params *ip = &iter->region->params;
- if (!ip->res)
+ if (!ip->res || ip->res->start < p->res->start)
continue;
- if (ip->res->start > p->res->start) {
- dev_dbg(&cxlr->dev,
- "%s: HPA order violation %s:%pr vs %pr\n",
- dev_name(&port->dev),
- dev_name(&iter->region->dev), ip->res, p->res);
- return ERR_PTR(-EBUSY);
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (auto_order_ok(port, iter->region, cxld))
+ continue;
}
+ dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
+ dev_name(&port->dev),
+ dev_name(&iter->region->dev), ip->res, p->res);
+
+ return ERR_PTR(-EBUSY);
}
cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
@@ -830,22 +1065,11 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
return 0;
}
-static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled,
- struct cxl_region_ref *cxl_rr)
+static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region_ref *cxl_rr,
+ struct cxl_decoder *cxld)
{
- struct cxl_decoder *cxld;
-
- if (port == cxled_to_port(cxled))
- cxld = &cxled->cxld;
- else
- cxld = cxl_region_find_decoder(port, cxlr);
- if (!cxld) {
- dev_dbg(&cxlr->dev, "%s: no decoder available\n",
- dev_name(&port->dev));
- return -EBUSY;
- }
-
if (cxld->region) {
dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
dev_name(&port->dev), dev_name(&cxld->dev),
@@ -869,6 +1093,16 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
return 0;
}
+static void cxl_region_set_lock(struct cxl_region *cxlr,
+ struct cxl_decoder *cxld)
+{
+ if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
+ return;
+
+ set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
+ clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
+}
+
/**
* cxl_port_attach_region() - track a region's interest in a port by endpoint
* @port: port to add a new region reference 'struct cxl_region_ref'
@@ -905,7 +1139,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
unsigned long index;
int rc = -EBUSY;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
cxl_rr = cxl_rr_load(port, cxlr);
if (cxl_rr) {
@@ -936,7 +1170,16 @@ static int cxl_port_attach_region(struct cxl_port *port,
nr_targets_inc = true;
}
} else {
- cxl_rr = alloc_region_ref(port, cxlr);
+ struct cxl_decoder *cxld;
+
+ cxld = cxl_port_pick_region_decoder(port, cxled, cxlr);
+ if (!cxld) {
+ dev_dbg(&cxlr->dev, "%s: no decoder available\n",
+ dev_name(&port->dev));
+ return -EBUSY;
+ }
+
+ cxl_rr = alloc_region_ref(port, cxlr, cxled, cxld);
if (IS_ERR(cxl_rr)) {
dev_dbg(&cxlr->dev,
"%s: failed to allocate region reference\n",
@@ -945,12 +1188,34 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
nr_targets_inc = true;
- rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
+ rc = cxl_rr_assign_decoder(port, cxlr, cxled, cxl_rr, cxld);
if (rc)
goto out_erase;
}
cxld = cxl_rr->decoder;
+ /*
+ * the number of targets should not exceed the target_count
+ * of the decoder
+ */
+ if (is_switch_decoder(&cxld->dev)) {
+ struct cxl_switch_decoder *cxlsd;
+
+ cxlsd = to_cxl_switch_decoder(&cxld->dev);
+ if (cxl_rr->nr_targets > cxlsd->nr_targets) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s %s add: %s:%s @ %d overflows targets: %d\n",
+ dev_name(port->uport_dev), dev_name(&port->dev),
+ dev_name(&cxld->dev), dev_name(&cxlmd->dev),
+ dev_name(&cxled->cxld.dev), pos,
+ cxlsd->nr_targets);
+ rc = -ENXIO;
+ goto out_erase;
+ }
+ }
+
+ cxl_region_set_lock(cxlr, cxld);
+
rc = cxl_rr_ep_add(cxl_rr, cxled);
if (rc) {
dev_dbg(&cxlr->dev,
@@ -986,7 +1251,7 @@ static void cxl_port_detach_region(struct cxl_port *port,
struct cxl_region_ref *cxl_rr;
struct cxl_ep *ep = NULL;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
cxl_rr = cxl_rr_load(port, cxlr);
if (!cxl_rr)
@@ -1060,12 +1325,56 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled,
return 0;
}
+static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ unsigned int interleave_mask;
+ u8 eiw;
+ u16 eig;
+ int high_pos, low_pos;
+
+ if (!test_bit(iw, &cxlhdm->iw_cap_mask))
+ return -ENXIO;
+ /*
+ * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection),
+ * if eiw < 8:
+ * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw]
+ * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
+ *
+ * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the
+ * interleave bits are none.
+ *
+ * if eiw >= 8:
+ * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3
+ * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0]
+ *
+ * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the
+ * interleave bits are none.
+ */
+ ways_to_eiw(iw, &eiw);
+ if (eiw == 0 || eiw == 8)
+ return 0;
+
+ granularity_to_eig(ig, &eig);
+ if (eiw > 8)
+ high_pos = eiw + eig - 1;
+ else
+ high_pos = eiw + eig + 7;
+ low_pos = eig + 8;
+ interleave_mask = GENMASK(high_pos, low_pos);
+ if (interleave_mask & ~cxlhdm->interleave_mask)
+ return -ENXIO;
+
+ return 0;
+}
+
static int cxl_port_setup_targets(struct cxl_port *port,
struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
- int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
+ int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
@@ -1073,6 +1382,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
struct cxl_region_params *p = &cxlr->params;
struct cxl_decoder *cxld = cxl_rr->decoder;
struct cxl_switch_decoder *cxlsd;
+ struct cxl_port *iter = port;
u16 eig, peig;
u8 eiw, peiw;
@@ -1089,16 +1399,26 @@ static int cxl_port_setup_targets(struct cxl_port *port,
cxlsd = to_cxl_switch_decoder(&cxld->dev);
if (cxl_rr->nr_targets_set) {
- int i, distance;
+ int i, distance = 1;
+ struct cxl_region_ref *cxl_rr_iter;
/*
- * Passthrough decoders impose no distance requirements between
- * peers
+ * The "distance" between peer downstream ports represents which
+ * endpoint positions in the region interleave a given port can
+ * host.
+ *
+ * For example, at the root of a hierarchy the distance is
+ * always 1 as every index targets a different host-bridge. At
+ * each subsequent switch level those ports map every Nth region
+ * position where N is the width of the switch == distance.
*/
- if (cxl_rr->nr_targets == 1)
- distance = 0;
- else
- distance = p->nr_targets / cxl_rr->nr_targets;
+ do {
+ cxl_rr_iter = cxl_rr_load(iter, cxlr);
+ distance *= cxl_rr_iter->nr_targets;
+ iter = to_cxl_port(iter->dev.parent);
+ } while (!is_cxl_root(iter));
+ distance *= cxlrd->cxlsd.cxld.interleave_ways;
+
for (i = 0; i < cxl_rr->nr_targets_set; i++)
if (ep->dport == cxlsd->target[i]) {
rc = check_last_peer(cxled, ep, cxl_rr,
@@ -1111,7 +1431,14 @@ static int cxl_port_setup_targets(struct cxl_port *port,
}
if (is_cxl_root(parent_port)) {
- parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
+ /*
+ * Root decoder IG is always set to value in CFMWS which
+ * may be different than this region's IG. We can use the
+ * region's IG here since interleave_granularity_store()
+ * does not allow interleaved host-bridges with
+ * root IG != region IG.
+ */
+ parent_ig = p->interleave_granularity;
parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
/*
* For purposes of address bit routing, use power-of-2 math for
@@ -1154,16 +1481,15 @@ static int cxl_port_setup_targets(struct cxl_port *port,
}
/*
- * If @parent_port is masking address bits, pick the next unused address
- * bit to route @port's targets.
+ * Interleave granularity is a multiple of @parent_port granularity.
+ * Multiplier is the parent port interleave ways.
*/
- if (parent_iw > 1 && cxl_rr->nr_targets > 1) {
- u32 address_bit = max(peig + peiw, eiw + peig);
-
- eig = address_bit - eiw + 1;
- } else {
- eiw = peiw;
- eig = peig;
+ rc = granularity_to_eig(parent_ig * parent_iw, &eig);
+ if (rc) {
+ dev_dbg(&cxlr->dev,
+ "%s: invalid granularity calculation (%d * %d)\n",
+ dev_name(&parent_port->dev), parent_ig, parent_iw);
+ return rc;
}
rc = eig_to_granularity(eig, &ig);
@@ -1174,11 +1500,18 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
+ if (iw > 8 || iw > cxlsd->nr_targets) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s:%s: ways: %d overflows targets: %d\n",
+ dev_name(port->uport_dev), dev_name(&port->dev),
+ dev_name(&cxld->dev), iw, cxlsd->nr_targets);
+ return -ENXIO;
+ }
+
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
if (cxld->interleave_ways != iw ||
- cxld->interleave_granularity != ig ||
- cxld->hpa_range.start != p->res->start ||
- cxld->hpa_range.end != p->res->end ||
+ (iw > 1 && cxld->interleave_granularity != ig) ||
+ !spa_maps_hpa(p, &cxld->hpa_range) ||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
dev_err(&cxlr->dev,
"%s:%s %s expected iw: %d ig: %d %pr\n",
@@ -1189,13 +1522,20 @@ static int cxl_port_setup_targets(struct cxl_port *port,
dev_name(port->uport_dev), dev_name(&port->dev),
__func__, cxld->interleave_ways,
cxld->interleave_granularity,
- (cxld->flags & CXL_DECODER_F_ENABLE) ?
- "enabled" :
- "disabled",
+ str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE),
cxld->hpa_range.start, cxld->hpa_range.end);
return -ENXIO;
}
} else {
+ rc = check_interleave_cap(cxld, iw, ig);
+ if (rc) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s iw: %d ig: %d is not supported\n",
+ dev_name(port->uport_dev),
+ dev_name(&port->dev), iw, ig);
+ return rc;
+ }
+
cxld->interleave_ways = iw;
cxld->interleave_granularity = ig;
cxld->hpa_range = (struct range) {
@@ -1222,11 +1562,12 @@ add_target:
cxl_rr->nr_targets_set);
return -ENXIO;
}
- } else
+ } else {
cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
- inc = 1;
+ cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id;
+ }
+ cxl_rr->nr_targets_set++;
out_target_set:
- cxl_rr->nr_targets_set += inc;
dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
dev_name(port->uport_dev), dev_name(&port->dev),
cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
@@ -1395,10 +1736,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr,
const struct cxl_dport *dport, int pos)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
+ struct cxl_decoder *cxld = &cxlsd->cxld;
+ int iw = cxld->interleave_ways;
struct cxl_port *iter;
int rc;
- if (cxlrd->calc_hb(cxlrd, pos) != dport) {
+ if (dport != cxlrd->cxlsd.target[pos % iw]) {
dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
dev_name(&cxlrd->cxlsd.cxld.dev));
@@ -1459,126 +1803,144 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr,
return 0;
}
-static struct cxl_port *next_port(struct cxl_port *port)
+static int cmp_interleave_pos(const void *a, const void *b)
{
- if (!port->parent_dport)
- return NULL;
- return port->parent_dport->port;
+ struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
+ struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
+
+ return cxled_a->pos - cxled_b->pos;
}
-static int decoder_match_range(struct device *dev, void *data)
+static int match_switch_decoder_by_range(struct device *dev,
+ const void *data)
{
- struct cxl_endpoint_decoder *cxled = data;
struct cxl_switch_decoder *cxlsd;
+ const struct range *r1, *r2 = data;
+
if (!is_switch_decoder(dev))
return 0;
cxlsd = to_cxl_switch_decoder(dev);
- return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range);
-}
+ r1 = &cxlsd->cxld.hpa_range;
-static void find_positions(const struct cxl_switch_decoder *cxlsd,
- const struct cxl_port *iter_a,
- const struct cxl_port *iter_b, int *a_pos,
- int *b_pos)
-{
- int i;
-
- for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) {
- if (cxlsd->target[i] == iter_a->parent_dport)
- *a_pos = i;
- else if (cxlsd->target[i] == iter_b->parent_dport)
- *b_pos = i;
- if (*a_pos >= 0 && *b_pos >= 0)
- break;
- }
+ if (is_root_decoder(dev))
+ return range_contains(r1, r2);
+ return (r1->start == r2->start && r1->end == r2->end);
}
-static int cmp_decode_pos(const void *a, const void *b)
+static int find_pos_and_ways(struct cxl_port *port, struct range *range,
+ int *pos, int *ways)
{
- struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
- struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
- struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
- struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
- struct cxl_port *port_a = cxled_to_port(cxled_a);
- struct cxl_port *port_b = cxled_to_port(cxled_b);
- struct cxl_port *iter_a, *iter_b, *port = NULL;
struct cxl_switch_decoder *cxlsd;
+ struct cxl_port *parent;
struct device *dev;
- int a_pos, b_pos;
- unsigned int seq;
-
- /* Exit early if any prior sorting failed */
- if (cxled_a->pos < 0 || cxled_b->pos < 0)
- return 0;
+ int rc = -ENXIO;
- /*
- * Walk up the hierarchy to find a shared port, find the decoder that
- * maps the range, compare the relative position of those dport
- * mappings.
- */
- for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
- struct cxl_port *next_a, *next_b;
+ parent = parent_port_of(port);
+ if (!parent)
+ return rc;
- next_a = next_port(iter_a);
- if (!next_a)
- break;
+ dev = device_find_child(&parent->dev, range,
+ match_switch_decoder_by_range);
+ if (!dev) {
+ dev_err(port->uport_dev,
+ "failed to find decoder mapping %#llx-%#llx\n",
+ range->start, range->end);
+ return rc;
+ }
+ cxlsd = to_cxl_switch_decoder(dev);
+ *ways = cxlsd->cxld.interleave_ways;
- for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
- next_b = next_port(iter_b);
- if (next_a != next_b)
- continue;
- port = next_a;
+ for (int i = 0; i < *ways; i++) {
+ if (cxlsd->target[i] == port->parent_dport) {
+ *pos = i;
+ rc = 0;
break;
}
-
- if (port)
- break;
}
+ put_device(dev);
- if (!port) {
- dev_err(cxlmd_a->dev.parent,
- "failed to find shared port with %s\n",
- dev_name(cxlmd_b->dev.parent));
- goto err;
- }
+ if (rc)
+ dev_err(port->uport_dev,
+ "failed to find %s:%s in target list of %s\n",
+ dev_name(&port->dev),
+ dev_name(port->parent_dport->dport_dev),
+ dev_name(&cxlsd->cxld.dev));
- dev = device_find_child(&port->dev, cxled_a, decoder_match_range);
- if (!dev) {
- struct range *range = &cxled_a->cxld.hpa_range;
+ return rc;
+}
- dev_err(port->uport_dev,
- "failed to find decoder that maps %#llx-%#llx\n",
- range->start, range->end);
- goto err;
- }
+/**
+ * cxl_calc_interleave_pos() - calculate an endpoint position in a region
+ * @cxled: endpoint decoder member of given region
+ *
+ * The endpoint position is calculated by traversing the topology from
+ * the endpoint to the root decoder and iteratively applying this
+ * calculation:
+ *
+ * position = position * parent_ways + parent_pos;
+ *
+ * ...where @position is inferred from switch and root decoder target lists.
+ *
+ * Return: position >= 0 on success
+ * -ENXIO on failure
+ */
+static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_port *iter, *port = cxled_to_port(cxled);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct range *range = &cxled->cxld.hpa_range;
+ int parent_ways = 0, parent_pos = 0, pos = 0;
+ int rc;
- cxlsd = to_cxl_switch_decoder(dev);
- do {
- seq = read_seqbegin(&cxlsd->target_lock);
- find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
- } while (read_seqretry(&cxlsd->target_lock, seq));
+ /*
+ * Example: the expected interleave order of the 4-way region shown
+ * below is: mem0, mem2, mem1, mem3
+ *
+ * root_port
+ * / \
+ * host_bridge_0 host_bridge_1
+ * | | | |
+ * mem0 mem1 mem2 mem3
+ *
+ * In the example the calculator will iterate twice. The first iteration
+ * uses the mem position in the host-bridge and the ways of the host-
+ * bridge to generate the first, or local, position. The second
+ * iteration uses the host-bridge position in the root_port and the ways
+ * of the root_port to refine the position.
+ *
+ * A trace of the calculation per endpoint looks like this:
+ * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0
+ * pos = 0 * 2 + 0 pos = 0 * 2 + 1
+ * pos: 0 pos: 1
+ *
+ * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1
+ * pos = 1 * 2 + 0 pos = 1 * 2 + 1
+ * pos: 2 pos = 3
+ *
+ * Note that while this example is simple, the method applies to more
+ * complex topologies, including those with switches.
+ */
- put_device(dev);
+ /* Iterate from endpoint to root_port refining the position */
+ for (iter = port; iter; iter = parent_port_of(iter)) {
+ if (is_cxl_root(iter))
+ break;
- if (a_pos < 0 || b_pos < 0) {
- dev_err(port->uport_dev,
- "failed to find shared decoder for %s and %s\n",
- dev_name(cxlmd_a->dev.parent),
- dev_name(cxlmd_b->dev.parent));
- goto err;
+ rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
+ if (rc)
+ return rc;
+
+ pos = pos * parent_ways + parent_pos;
}
- dev_dbg(port->uport_dev, "%s comes %s %s\n",
- dev_name(cxlmd_a->dev.parent),
- a_pos - b_pos < 0 ? "before" : "after",
- dev_name(cxlmd_b->dev.parent));
+ dev_dbg(&cxlmd->dev,
+ "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
+ dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
+ dev_name(&port->dev), range->start, range->end, pos);
- return a_pos - b_pos;
-err:
- cxled_a->pos = -1;
- return 0;
+ return pos;
}
static int cxl_region_sort_targets(struct cxl_region *cxlr)
@@ -1586,22 +1948,21 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr)
struct cxl_region_params *p = &cxlr->params;
int i, rc = 0;
- sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
- NULL);
-
for (i = 0; i < p->nr_targets; i++) {
struct cxl_endpoint_decoder *cxled = p->targets[i];
+ cxled->pos = cxl_calc_interleave_pos(cxled);
/*
- * Record that sorting failed, but still continue to restore
- * cxled->pos with its ->targets[] position so that follow-on
- * code paths can reliably do p->targets[cxled->pos] to
- * self-reference their entry.
+ * Record that sorting failed, but still continue to calc
+ * cxled->pos so that follow-on code paths can reliably
+ * do p->targets[cxled->pos] to self-reference their entry.
*/
if (cxled->pos < 0)
rc = -ENXIO;
- cxled->pos = i;
}
+ /* Keep the cxlr target list in interleave position order */
+ sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
+ cmp_interleave_pos, NULL);
dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
return rc;
@@ -1612,31 +1973,49 @@ static int cxl_region_attach(struct cxl_region *cxlr,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_region_params *p = &cxlr->params;
struct cxl_port *ep_port, *root_port;
struct cxl_dport *dport;
int rc = -ENXIO;
- if (cxled->mode != cxlr->mode) {
- dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
- dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
- return -EINVAL;
+ rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
+ p->interleave_granularity);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n",
+ dev_name(&cxled->cxld.dev), p->interleave_ways,
+ p->interleave_granularity);
+ return rc;
}
- if (cxled->mode == CXL_DECODER_DEAD) {
+ if (cxled->part < 0) {
dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
return -ENODEV;
}
+ if (cxlds->part[cxled->part].mode != cxlr->mode) {
+ dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n",
+ dev_name(&cxled->cxld.dev), cxlr->mode);
+ return -EINVAL;
+ }
+
/* all full of members, or interleave config not established? */
if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "region already active\n");
return -EBUSY;
- } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ }
+
+ if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "interleave config missing\n");
return -ENXIO;
}
+ if (p->nr_targets >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
+ p->nr_targets);
+ return -EINVAL;
+ }
+
ep_port = cxled_to_port(cxled);
root_port = cxlrd_to_port(cxlrd);
dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
@@ -1660,16 +2039,18 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -ENXIO;
}
- if (resource_size(cxled->dpa_res) * p->interleave_ways !=
+ if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
resource_size(p->res)) {
dev_dbg(&cxlr->dev,
- "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
+ "%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
(u64)resource_size(cxled->dpa_res), p->interleave_ways,
- (u64)resource_size(p->res));
+ (u64)p->cache_size, (u64)resource_size(p->res));
return -EINVAL;
}
+ cxl_region_perf_data_calculate(cxlr, cxled);
+
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
int i;
@@ -1710,6 +2091,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
* then the region is already committed.
*/
p->state = CXL_CONFIG_COMMIT;
+ cxl_region_shared_upstream_bandwidth_update(cxlr);
return 0;
}
@@ -1729,8 +2111,9 @@ static int cxl_region_attach(struct cxl_region *cxlr,
if (p->nr_targets == p->interleave_ways) {
rc = cxl_region_setup_targets(cxlr);
if (rc)
- goto err_decrement;
+ return rc;
p->state = CXL_CONFIG_ACTIVE;
+ cxl_region_shared_upstream_bandwidth_update(cxlr);
}
cxled->cxld.interleave_ways = p->interleave_ways;
@@ -1740,42 +2123,66 @@ static int cxl_region_attach(struct cxl_region *cxlr,
.end = p->res->end,
};
- return 0;
+ if (p->nr_targets != p->interleave_ways)
+ return 0;
-err_decrement:
- p->nr_targets--;
- cxled->pos = -1;
- p->targets[pos] = NULL;
- return rc;
+ /*
+ * Test the auto-discovery position calculator function
+ * against this successfully created user-defined region.
+ * A fail message here means that this interleave config
+ * will fail when presented as CXL_REGION_F_AUTO.
+ */
+ for (int i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+ int test_pos;
+
+ test_pos = cxl_calc_interleave_pos(cxled);
+ dev_dbg(&cxled->cxld.dev,
+ "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
+ (test_pos == cxled->pos) ? "success" : "fail",
+ test_pos, cxled->pos);
+ }
+
+ return 0;
}
-static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
+static struct cxl_region *
+__cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode)
{
- struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
- struct cxl_region *cxlr = cxled->cxld.region;
struct cxl_region_params *p;
- int rc = 0;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
- if (!cxlr)
- return 0;
+ if (!cxled) {
+ p = &cxlr->params;
- p = &cxlr->params;
- get_device(&cxlr->dev);
+ if (pos >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "position %d out of range %d\n",
+ pos, p->interleave_ways);
+ return NULL;
+ }
+
+ if (!p->targets[pos])
+ return NULL;
+ cxled = p->targets[pos];
+ } else {
+ cxlr = cxled->cxld.region;
+ if (!cxlr)
+ return NULL;
+ p = &cxlr->params;
+ }
+
+ if (mode == DETACH_INVALIDATE)
+ cxled->part = -1;
if (p->state > CXL_CONFIG_ACTIVE) {
- /*
- * TODO: tear down all impacted regions if a device is
- * removed out of order
- */
- rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
- if (rc)
- goto out;
+ cxl_region_decode_reset(cxlr, p->interleave_ways);
p->state = CXL_CONFIG_ACTIVE;
}
- for (iter = ep_port; !is_cxl_root(iter);
+ for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
iter = to_cxl_port(iter->dev.parent))
cxl_port_detach_region(iter, cxlr, cxled);
@@ -1786,7 +2193,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
cxled->pos);
- goto out;
+ return NULL;
}
if (p->state == CXL_CONFIG_ACTIVE) {
@@ -1800,68 +2207,79 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
.end = -1,
};
- /* notify the region driver that one of its targets has departed */
- up_write(&cxl_region_rwsem);
- device_release_driver(&cxlr->dev);
- down_write(&cxl_region_rwsem);
-out:
- put_device(&cxlr->dev);
- return rc;
+ get_device(&cxlr->dev);
+ return cxlr;
+}
+
+/*
+ * Cleanup a decoder's interest in a region. There are 2 cases to
+ * handle, removing an unknown @cxled from a known position in a region
+ * (detach_target()) or removing a known @cxled from an unknown @cxlr
+ * (cxld_unregister())
+ *
+ * When the detachment finds a region release the region driver.
+ */
+int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode)
+{
+ struct cxl_region *detach;
+
+ /* when the decoder is being destroyed lock unconditionally */
+ if (mode == DETACH_INVALIDATE) {
+ guard(rwsem_write)(&cxl_rwsem.region);
+ detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
+ } else {
+ int rc;
+
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+ return rc;
+ detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
+ }
+
+ if (detach) {
+ device_release_driver(&detach->dev);
+ put_device(&detach->dev);
+ }
+ return 0;
}
-void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+static int __attach_target(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ unsigned int state)
{
- down_write(&cxl_region_rwsem);
- cxled->mode = CXL_DECODER_DEAD;
- cxl_region_detach(cxled);
- up_write(&cxl_region_rwsem);
+ int rc;
+
+ if (state == TASK_INTERRUPTIBLE) {
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+ return rc;
+ guard(rwsem_read)(&cxl_rwsem.dpa);
+ return cxl_region_attach(cxlr, cxled, pos);
+ }
+ guard(rwsem_write)(&cxl_rwsem.region);
+ guard(rwsem_read)(&cxl_rwsem.dpa);
+ return cxl_region_attach(cxlr, cxled, pos);
}
static int attach_target(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled, int pos,
unsigned int state)
{
- int rc = 0;
+ int rc = __attach_target(cxlr, cxled, pos, state);
- if (state == TASK_INTERRUPTIBLE)
- rc = down_write_killable(&cxl_region_rwsem);
- else
- down_write(&cxl_region_rwsem);
- if (rc)
- return rc;
+ if (rc == 0)
+ return 0;
- down_read(&cxl_dpa_rwsem);
- rc = cxl_region_attach(cxlr, cxled, pos);
- up_read(&cxl_dpa_rwsem);
- up_write(&cxl_region_rwsem);
+ dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n",
+ dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc);
return rc;
}
static int detach_target(struct cxl_region *cxlr, int pos)
{
- struct cxl_region_params *p = &cxlr->params;
- int rc;
-
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
- return rc;
-
- if (pos >= p->interleave_ways) {
- dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
- p->interleave_ways);
- rc = -ENXIO;
- goto out;
- }
-
- if (!p->targets[pos]) {
- rc = 0;
- goto out;
- }
-
- rc = cxl_region_detach(p->targets[pos]);
-out:
- up_write(&cxl_region_rwsem);
- return rc;
+ return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
}
static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
@@ -1971,6 +2389,8 @@ static const struct attribute_group *region_groups[] = {
&cxl_base_attribute_group,
&cxl_region_group,
&cxl_region_target_group,
+ &cxl_region_access0_coordinate_group,
+ &cxl_region_access1_coordinate_group,
NULL,
};
@@ -2007,7 +2427,7 @@ bool is_cxl_region(struct device *dev)
{
return dev->type == &cxl_region_type;
}
-EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
+EXPORT_SYMBOL_NS_GPL(is_cxl_region, "CXL");
static struct cxl_region *to_cxl_region(struct device *dev)
{
@@ -2018,13 +2438,13 @@ static struct cxl_region *to_cxl_region(struct device *dev)
return container_of(dev, struct cxl_region, dev);
}
-static void unregister_region(void *dev)
+static void unregister_region(void *_cxlr)
{
- struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region *cxlr = _cxlr;
struct cxl_region_params *p = &cxlr->params;
int i;
- device_del(dev);
+ device_del(&cxlr->dev);
/*
* Now that region sysfs is shutdown, the parameter block is now
@@ -2035,7 +2455,7 @@ static void unregister_region(void *dev)
detach_target(cxlr, i);
cxl_region_iomem_release(cxlr);
- put_device(dev);
+ put_device(&cxlr->dev);
}
static struct lock_class_key cxl_region_key;
@@ -2064,10 +2484,92 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
dev->bus = &cxl_bus_type;
dev->type = &cxl_region_type;
cxlr->id = id;
+ cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld);
return cxlr;
}
+static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
+{
+ int cset = 0;
+ int rc;
+
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ if (cxlr->coord[i].read_bandwidth) {
+ node_update_perf_attrs(nid, &cxlr->coord[i], i);
+ cset++;
+ }
+ }
+
+ if (!cset)
+ return false;
+
+ rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
+ if (rc)
+ dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
+
+ rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
+ if (rc)
+ dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
+
+ return true;
+}
+
+static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
+ unsigned long action, void *arg)
+{
+ struct cxl_region *cxlr = container_of(nb, struct cxl_region,
+ node_notifier);
+ struct node_notify *nn = arg;
+ int nid = nn->nid;
+ int region_nid;
+
+ if (action != NODE_ADDED_FIRST_MEMORY)
+ return NOTIFY_DONE;
+
+ /*
+ * No need to hold cxl_rwsem.region; region parameters are stable
+ * within the cxl_region driver.
+ */
+ region_nid = phys_to_target_node(cxlr->params.res->start);
+ if (nid != region_nid)
+ return NOTIFY_DONE;
+
+ /* No action needed if node bit already set */
+ if (node_test_and_set(nid, nodemask_region_seen))
+ return NOTIFY_DONE;
+
+ if (!cxl_region_update_coordinates(cxlr, nid))
+ return NOTIFY_DONE;
+
+ return NOTIFY_OK;
+}
+
+static int cxl_region_calculate_adistance(struct notifier_block *nb,
+ unsigned long nid, void *data)
+{
+ struct cxl_region *cxlr = container_of(nb, struct cxl_region,
+ adist_notifier);
+ struct access_coordinate *perf;
+ int *adist = data;
+ int region_nid;
+
+ /*
+ * No need to hold cxl_rwsem.region; region parameters are stable
+ * within the cxl_region driver.
+ */
+ region_nid = phys_to_target_node(cxlr->params.res->start);
+ if (nid != region_nid)
+ return NOTIFY_OK;
+
+ perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
+
+ if (mt_perf_to_adistance(perf, adist))
+ return NOTIFY_OK;
+
+ return NOTIFY_STOP;
+}
+
/**
* devm_cxl_add_region - Adds a region to a decoder
* @cxlrd: root decoder
@@ -2083,7 +2585,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
*/
static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
int id,
- enum cxl_decoder_mode mode,
+ enum cxl_partition_mode mode,
enum cxl_decoder_type type)
{
struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
@@ -2091,15 +2593,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
struct device *dev;
int rc;
- switch (mode) {
- case CXL_DECODER_RAM:
- case CXL_DECODER_PMEM:
- break;
- default:
- dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
- return ERR_PTR(-EINVAL);
- }
-
cxlr = cxl_region_alloc(cxlrd, id);
if (IS_ERR(cxlr))
return cxlr;
@@ -2146,10 +2639,19 @@ static ssize_t create_ram_region_show(struct device *dev,
}
static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
- enum cxl_decoder_mode mode, int id)
+ enum cxl_partition_mode mode, int id)
{
int rc;
+ switch (mode) {
+ case CXL_PARTMODE_RAM:
+ case CXL_PARTMODE_PMEM:
+ break;
+ default:
+ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
+ return ERR_PTR(-EINVAL);
+ }
+
rc = memregion_alloc(GFP_KERNEL);
if (rc < 0)
return ERR_PTR(rc);
@@ -2162,9 +2664,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
}
-static ssize_t create_pmem_region_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t len)
+static ssize_t create_region_store(struct device *dev, const char *buf,
+ size_t len, enum cxl_partition_mode mode)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_region *cxlr;
@@ -2174,31 +2675,26 @@ static ssize_t create_pmem_region_store(struct device *dev,
if (rc != 1)
return -EINVAL;
- cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
+ cxlr = __create_region(cxlrd, mode, id);
if (IS_ERR(cxlr))
return PTR_ERR(cxlr);
return len;
}
+
+static ssize_t create_pmem_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM);
+}
DEVICE_ATTR_RW(create_pmem_region);
static ssize_t create_ram_region_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
- struct cxl_region *cxlr;
- int rc, id;
-
- rc = sscanf(buf, "region%d\n", &id);
- if (rc != 1)
- return -EINVAL;
-
- cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
- if (IS_ERR(cxlr))
- return PTR_ERR(cxlr);
-
- return len;
+ return create_region_store(dev, buf, len, CXL_PARTMODE_RAM);
}
DEVICE_ATTR_RW(create_ram_region);
@@ -2208,17 +2704,13 @@ static ssize_t region_show(struct device *dev, struct device_attribute *attr,
struct cxl_decoder *cxld = to_cxl_decoder(dev);
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
if (cxld->region)
- rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
- else
- rc = sysfs_emit(buf, "\n");
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
+ return sysfs_emit(buf, "\n");
}
DEVICE_ATTR_RO(region);
@@ -2283,7 +2775,7 @@ bool is_cxl_pmem_region(struct device *dev)
{
return dev->type == &cxl_pmem_region_type;
}
-EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
+EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, "CXL");
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
{
@@ -2292,11 +2784,11 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
return NULL;
return container_of(dev, struct cxl_pmem_region, dev);
}
-EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
+EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL");
struct cxl_poison_context {
struct cxl_port *port;
- enum cxl_decoder_mode mode;
+ int part;
u64 offset;
};
@@ -2304,47 +2796,45 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
struct cxl_poison_context *ctx)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ const struct resource *res;
+ struct resource *p, *last;
u64 offset, length;
int rc = 0;
+ if (ctx->part < 0)
+ return 0;
+
/*
- * Collect poison for the remaining unmapped resources
- * after poison is collected by committed endpoints.
- *
- * Knowing that PMEM must always follow RAM, get poison
- * for unmapped resources based on the last decoder's mode:
- * ram: scan remains of ram range, then any pmem range
- * pmem: scan remains of pmem range
+ * Collect poison for the remaining unmapped resources after
+ * poison is collected by committed endpoints decoders.
*/
-
- if (ctx->mode == CXL_DECODER_RAM) {
- offset = ctx->offset;
- length = resource_size(&cxlds->ram_res) - offset;
+ for (int i = ctx->part; i < cxlds->nr_partitions; i++) {
+ res = &cxlds->part[i].res;
+ for (p = res->child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last)
+ offset = last->end + 1;
+ else
+ offset = res->start;
+ length = res->end - offset + 1;
+ if (!length)
+ break;
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc == -EFAULT)
- rc = 0;
+ if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM)
+ continue;
if (rc)
- return rc;
- }
- if (ctx->mode == CXL_DECODER_PMEM) {
- offset = ctx->offset;
- length = resource_size(&cxlds->dpa_res) - offset;
- if (!length)
- return 0;
- } else if (resource_size(&cxlds->pmem_res)) {
- offset = cxlds->pmem_res.start;
- length = resource_size(&cxlds->pmem_res);
- } else {
- return 0;
+ break;
}
- return cxl_mem_get_poison(cxlmd, offset, length, NULL);
+ return rc;
}
static int poison_by_decoder(struct device *dev, void *arg)
{
struct cxl_poison_context *ctx = arg;
struct cxl_endpoint_decoder *cxled;
+ enum cxl_partition_mode mode;
+ struct cxl_dev_state *cxlds;
struct cxl_memdev *cxlmd;
u64 offset, length;
int rc = 0;
@@ -2353,27 +2843,18 @@ static int poison_by_decoder(struct device *dev, void *arg)
return rc;
cxled = to_cxl_endpoint_decoder(dev);
- if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
+ if (!cxled->dpa_res)
return rc;
- /*
- * Regions are only created with single mode decoders: pmem or ram.
- * Linux does not support mixed mode decoders. This means that
- * reading poison per endpoint decoder adheres to the requirement
- * that poison reads of pmem and ram must be separated.
- * CXL 3.0 Spec 8.2.9.8.4.1
- */
- if (cxled->mode == CXL_DECODER_MIXED) {
- dev_dbg(dev, "poison list read unsupported in mixed mode\n");
- return rc;
- }
-
cxlmd = cxled_to_memdev(cxled);
+ cxlds = cxlmd->cxlds;
+ mode = cxlds->part[cxled->part].mode;
+
if (cxled->skip) {
offset = cxled->dpa_res->start - cxled->skip;
length = cxled->skip;
rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
- if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
rc = 0;
if (rc)
return rc;
@@ -2382,7 +2863,7 @@ static int poison_by_decoder(struct device *dev, void *arg)
offset = cxled->dpa_res->start;
length = cxled->dpa_res->end - offset + 1;
rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
- if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
+ if (rc == -EFAULT && mode == CXL_PARTMODE_RAM)
rc = 0;
if (rc)
return rc;
@@ -2390,7 +2871,7 @@ static int poison_by_decoder(struct device *dev, void *arg)
/* Iterate until commit_end is reached */
if (cxled->cxld.id == ctx->port->commit_end) {
ctx->offset = cxled->dpa_res->end + 1;
- ctx->mode = cxled->mode;
+ ctx->part = cxled->part;
return 1;
}
@@ -2402,12 +2883,9 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
struct cxl_poison_context ctx;
int rc = 0;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
- return rc;
-
ctx = (struct cxl_poison_context) {
- .port = port
+ .port = port,
+ .part = -1,
};
rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
@@ -2415,37 +2893,352 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
&ctx);
- up_read(&cxl_region_rwsem);
return rc;
}
+struct cxl_dpa_to_region_context {
+ struct cxl_region *cxlr;
+ u64 dpa;
+};
+
+static int __cxl_dpa_to_region(struct device *dev, void *arg)
+{
+ struct cxl_dpa_to_region_context *ctx = arg;
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_region *cxlr;
+ u64 dpa = ctx->dpa;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
+ return 0;
+
+ if (!cxl_resource_contains_addr(cxled->dpa_res, dpa))
+ return 0;
+
+ /*
+ * Stop the region search (return 1) when an endpoint mapping is
+ * found. The region may not be fully constructed so offering
+ * the cxlr in the context structure is not guaranteed.
+ */
+ cxlr = cxled->cxld.region;
+ if (cxlr)
+ dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
+ dev_name(&cxlr->dev));
+ else
+ dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa,
+ dev_name(dev));
+
+ ctx->cxlr = cxlr;
+
+ return 1;
+}
+
+struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa)
+{
+ struct cxl_dpa_to_region_context ctx;
+ struct cxl_port *port;
+
+ ctx = (struct cxl_dpa_to_region_context) {
+ .dpa = dpa,
+ };
+ port = cxlmd->endpoint;
+ if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
+ device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
+
+ return ctx.cxlr;
+}
+
+static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int gran = p->interleave_granularity;
+ int ways = p->interleave_ways;
+ u64 offset;
+
+ /* Is the hpa in an expected chunk for its pos(-ition) */
+ offset = hpa - p->res->start;
+ offset = do_div(offset, gran * ways);
+ if ((offset >= pos * gran) && (offset < (pos + 1) * gran))
+ return true;
+
+ dev_dbg(&cxlr->dev,
+ "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa);
+
+ return false;
+}
+
+#define CXL_POS_ZERO 0
+/**
+ * cxl_validate_translation_params
+ * @eiw: encoded interleave ways
+ * @eig: encoded interleave granularity
+ * @pos: position in interleave
+ *
+ * Callers pass CXL_POS_ZERO when no position parameter needs validating.
+ *
+ * Returns: 0 on success, -EINVAL on first invalid parameter
+ */
+int cxl_validate_translation_params(u8 eiw, u16 eig, int pos)
+{
+ int ways, gran;
+
+ if (eiw_to_ways(eiw, &ways)) {
+ pr_debug("%s: invalid eiw=%u\n", __func__, eiw);
+ return -EINVAL;
+ }
+ if (eig_to_granularity(eig, &gran)) {
+ pr_debug("%s: invalid eig=%u\n", __func__, eig);
+ return -EINVAL;
+ }
+ if (pos < 0 || pos >= ways) {
+ pr_debug("%s: invalid pos=%d for ways=%u\n", __func__, pos,
+ ways);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_validate_translation_params, "cxl_translate");
+
+u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig)
+{
+ u64 dpa_offset, bits_lower, bits_upper, temp;
+ int ret;
+
+ ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
+ if (ret)
+ return ULLONG_MAX;
+
+ /*
+ * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13
+ * Lower bits [IG+7:0] pass through unchanged
+ * (eiw < 8)
+ * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW)
+ * Clear the position bits to isolate upper section, then
+ * reverse the left shift by eiw that occurred during DPA->HPA
+ * (eiw >= 8)
+ * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3
+ * Extract upper bits from the correct bit range and divide by 3
+ * to recover the original DPA upper bits
+ */
+ bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0);
+ if (eiw < 8) {
+ temp = hpa_offset &= ~GENMASK_ULL(eig + eiw + 8 - 1, 0);
+ dpa_offset = temp >> eiw;
+ } else {
+ bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3);
+ dpa_offset = bits_upper << (eig + 8);
+ }
+ dpa_offset |= bits_lower;
+
+ return dpa_offset;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_dpa_offset, "cxl_translate");
+
+int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig)
+{
+ unsigned int ways = 0;
+ u64 shifted, rem;
+ int pos, ret;
+
+ ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
+ if (ret)
+ return ret;
+
+ if (!eiw)
+ /* position is 0 if no interleaving */
+ return 0;
+
+ /*
+ * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13
+ * eiw < 8
+ * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8].
+ * Per spec "remove IW bits starting with bit position IG+8"
+ * eiw >= 8
+ * Position is not explicitly stored in HPA_OFFSET bits. It is
+ * derived from the modulo operation of the upper bits using
+ * the total number of interleave ways.
+ */
+ if (eiw < 8) {
+ pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0);
+ } else {
+ shifted = hpa_offset >> (eig + 8);
+ eiw_to_ways(eiw, &ways);
+ div64_u64_rem(shifted, ways, &rem);
+ pos = rem;
+ }
+
+ return pos;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_position, "cxl_translate");
+
+u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
+{
+ u64 mask_upper, hpa_offset, bits_upper;
+ int ret;
+
+ ret = cxl_validate_translation_params(eiw, eig, pos);
+ if (ret)
+ return ULLONG_MAX;
+
+ /*
+ * The device position in the region interleave set was removed
+ * from the offset at HPA->DPA translation. To reconstruct the
+ * HPA, place the 'pos' in the offset.
+ *
+ * The placement of 'pos' in the HPA is determined by interleave
+ * ways and granularity and is defined in the CXL Spec 3.0 Section
+ * 8.2.4.19.13 Implementation Note: Device Decode Logic
+ */
+
+ mask_upper = GENMASK_ULL(51, eig + 8);
+
+ if (eiw < 8) {
+ hpa_offset = (dpa_offset & mask_upper) << eiw;
+ hpa_offset |= pos << (eig + 8);
+ } else {
+ bits_upper = (dpa_offset & mask_upper) >> (eig + 8);
+ bits_upper = bits_upper * 3;
+ hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8);
+ }
+
+ /* The lower bits remain unchanged */
+ hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
+
+ return hpa_offset;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate");
+
+u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
+ u64 dpa)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_endpoint_decoder *cxled = NULL;
+ u64 dpa_offset, hpa_offset, hpa;
+ u16 eig = 0;
+ u8 eiw = 0;
+ int pos;
+
+ for (int i = 0; i < p->nr_targets; i++) {
+ if (cxlmd == cxled_to_memdev(p->targets[i])) {
+ cxled = p->targets[i];
+ break;
+ }
+ }
+ if (!cxled)
+ return ULLONG_MAX;
+
+ pos = cxled->pos;
+ ways_to_eiw(p->interleave_ways, &eiw);
+ granularity_to_eig(p->interleave_granularity, &eig);
+
+ dpa_offset = dpa - cxl_dpa_resource_start(cxled);
+ hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
+
+ /* Apply the hpa_offset to the region base address */
+ hpa = hpa_offset + p->res->start + p->cache_size;
+
+ /* Root decoder translation overrides typical modulo decode */
+ if (cxlrd->ops.hpa_to_spa)
+ hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa);
+
+ if (!cxl_resource_contains_addr(p->res, hpa)) {
+ dev_dbg(&cxlr->dev,
+ "Addr trans fail: hpa 0x%llx not in region\n", hpa);
+ return ULLONG_MAX;
+ }
+
+ /* Simple chunk check, by pos & gran, only applies to modulo decodes */
+ if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
+ return ULLONG_MAX;
+
+ return hpa;
+}
+
+struct dpa_result {
+ struct cxl_memdev *cxlmd;
+ u64 dpa;
+};
+
+static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
+ struct dpa_result *result)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_endpoint_decoder *cxled;
+ u64 hpa, hpa_offset, dpa_offset;
+ u16 eig = 0;
+ u8 eiw = 0;
+ int pos;
+
+ lockdep_assert_held(&cxl_rwsem.region);
+ lockdep_assert_held(&cxl_rwsem.dpa);
+
+ /* Input validation ensures valid ways and gran */
+ granularity_to_eig(p->interleave_granularity, &eig);
+ ways_to_eiw(p->interleave_ways, &eiw);
+
+ /*
+ * If the root decoder has SPA to CXL HPA callback, use it. Otherwise
+ * CXL HPA is assumed to equal SPA.
+ */
+ if (cxlrd->ops.spa_to_hpa) {
+ hpa = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset);
+ hpa_offset = hpa - p->res->start;
+ } else {
+ hpa_offset = offset;
+ }
+
+ pos = cxl_calculate_position(hpa_offset, eiw, eig);
+ if (pos < 0 || pos >= p->nr_targets) {
+ dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n",
+ pos, p->nr_targets);
+ return -ENXIO;
+ }
+
+ dpa_offset = cxl_calculate_dpa_offset(hpa_offset, eiw, eig);
+
+ /* Look-up and return the result: a memdev and a DPA */
+ for (int i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ if (cxled->pos != pos)
+ continue;
+ result->cxlmd = cxled_to_memdev(cxled);
+ result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset;
+
+ return 0;
+ }
+ dev_err(&cxlr->dev, "No device found for position %d\n", pos);
+
+ return -ENXIO;
+}
+
static struct lock_class_key cxl_pmem_region_key;
-static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
+static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_nvdimm_bridge *cxl_nvb;
- struct cxl_pmem_region *cxlr_pmem;
struct device *dev;
int i;
- down_read(&cxl_region_rwsem);
- if (p->state != CXL_CONFIG_COMMIT) {
- cxlr_pmem = ERR_PTR(-ENXIO);
- goto out;
- }
+ guard(rwsem_read)(&cxl_rwsem.region);
+ if (p->state != CXL_CONFIG_COMMIT)
+ return -ENXIO;
- cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
- GFP_KERNEL);
- if (!cxlr_pmem) {
- cxlr_pmem = ERR_PTR(-ENOMEM);
- goto out;
- }
+ struct cxl_pmem_region *cxlr_pmem __free(kfree) =
+ kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), GFP_KERNEL);
+ if (!cxlr_pmem)
+ return -ENOMEM;
cxlr_pmem->hpa_range.start = p->res->start;
cxlr_pmem->hpa_range.end = p->res->end;
- /* Snapshot the region configuration underneath the cxl_region_rwsem */
+ /* Snapshot the region configuration underneath the cxl_rwsem.region */
cxlr_pmem->nr_mappings = p->nr_targets;
for (i = 0; i < p->nr_targets; i++) {
struct cxl_endpoint_decoder *cxled = p->targets[i];
@@ -2457,11 +3250,9 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
* bridge for one device is the same for all.
*/
if (i == 0) {
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
- if (!cxl_nvb) {
- cxlr_pmem = ERR_PTR(-ENODEV);
- goto out;
- }
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
+ if (!cxl_nvb)
+ return -ENODEV;
cxlr->cxl_nvb = cxl_nvb;
}
m->cxlmd = cxlmd;
@@ -2472,18 +3263,16 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
}
dev = &cxlr_pmem->dev;
- cxlr_pmem->cxlr = cxlr;
- cxlr->cxlr_pmem = cxlr_pmem;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
device_set_pm_not_required(dev);
dev->parent = &cxlr->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_pmem_region_type;
-out:
- up_read(&cxl_region_rwsem);
+ cxlr_pmem->cxlr = cxlr;
+ cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem);
- return cxlr_pmem;
+ return 0;
}
static void cxl_dax_region_release(struct device *dev)
@@ -2516,7 +3305,7 @@ struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
return NULL;
return container_of(dev, struct cxl_dax_region, dev);
}
-EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
+EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL");
static struct lock_class_key cxl_dax_region_key;
@@ -2526,17 +3315,13 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
struct cxl_dax_region *cxlr_dax;
struct device *dev;
- down_read(&cxl_region_rwsem);
- if (p->state != CXL_CONFIG_COMMIT) {
- cxlr_dax = ERR_PTR(-ENXIO);
- goto out;
- }
+ guard(rwsem_read)(&cxl_rwsem.region);
+ if (p->state != CXL_CONFIG_COMMIT)
+ return ERR_PTR(-ENXIO);
cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
- if (!cxlr_dax) {
- cxlr_dax = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (!cxlr_dax)
+ return ERR_PTR(-ENOMEM);
cxlr_dax->hpa_range.start = p->res->start;
cxlr_dax->hpa_range.end = p->res->end;
@@ -2549,8 +3334,6 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
dev->parent = &cxlr->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_dax_region_type;
-out:
- up_read(&cxl_region_rwsem);
return cxlr_dax;
}
@@ -2578,11 +3361,11 @@ static void cxlr_release_nvdimm(void *_cxlr)
struct cxl_region *cxlr = _cxlr;
struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
- device_lock(&cxl_nvb->dev);
- if (cxlr->cxlr_pmem)
- devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
- cxlr->cxlr_pmem);
- device_unlock(&cxl_nvb->dev);
+ scoped_guard(device, &cxl_nvb->dev) {
+ if (cxlr->cxlr_pmem)
+ devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
+ cxlr->cxlr_pmem);
+ }
cxlr->cxl_nvb = NULL;
put_device(&cxl_nvb->dev);
}
@@ -2600,9 +3383,10 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
struct device *dev;
int rc;
- cxlr_pmem = cxl_pmem_region_alloc(cxlr);
- if (IS_ERR(cxlr_pmem))
- return PTR_ERR(cxlr_pmem);
+ rc = cxl_pmem_region_alloc(cxlr);
+ if (rc)
+ return rc;
+ cxlr_pmem = cxlr->cxlr_pmem;
cxl_nvb = cxlr->cxl_nvb;
dev = &cxlr_pmem->dev;
@@ -2617,13 +3401,14 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));
- device_lock(&cxl_nvb->dev);
- if (cxl_nvb->dev.driver)
- rc = devm_add_action_or_reset(&cxl_nvb->dev,
- cxlr_pmem_unregister, cxlr_pmem);
- else
- rc = -ENXIO;
- device_unlock(&cxl_nvb->dev);
+ scoped_guard(device, &cxl_nvb->dev) {
+ if (cxl_nvb->dev.driver)
+ rc = devm_add_action_or_reset(&cxl_nvb->dev,
+ cxlr_pmem_unregister,
+ cxlr_pmem);
+ else
+ rc = -ENXIO;
+ }
if (rc)
goto err_bridge;
@@ -2675,25 +3460,54 @@ err:
return rc;
}
-static int match_decoder_by_range(struct device *dev, void *data)
+static int match_decoder_by_range(struct device *dev, const void *data)
{
- struct range *r1, *r2 = data;
- struct cxl_root_decoder *cxlrd;
+ const struct range *r1, *r2 = data;
+ struct cxl_decoder *cxld;
- if (!is_root_decoder(dev))
+ if (!is_switch_decoder(dev))
return 0;
- cxlrd = to_cxl_root_decoder(dev);
- r1 = &cxlrd->cxlsd.cxld.hpa_range;
+ cxld = to_cxl_decoder(dev);
+ r1 = &cxld->hpa_range;
return range_contains(r1, r2);
}
-static int match_region_by_range(struct device *dev, void *data)
+static struct cxl_decoder *
+cxl_port_find_switch_decoder(struct cxl_port *port, struct range *hpa)
+{
+ struct device *cxld_dev = device_find_child(&port->dev, hpa,
+ match_decoder_by_range);
+
+ return cxld_dev ? to_cxl_decoder(cxld_dev) : NULL;
+}
+
+static struct cxl_root_decoder *
+cxl_find_root_decoder(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
+ struct cxl_decoder *root, *cxld = &cxled->cxld;
+ struct range *hpa = &cxld->hpa_range;
+
+ root = cxl_port_find_switch_decoder(&cxl_root->port, hpa);
+ if (!root) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s no CXL window for range %#llx:%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxld->dev),
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return NULL;
+ }
+
+ return to_cxl_root_decoder(&root->dev);
+}
+
+static int match_region_by_range(struct device *dev, const void *data)
{
struct cxl_region_params *p;
struct cxl_region *cxlr;
- struct range *r = data;
- int rc = 0;
+ const struct range *r = data;
if (!is_cxl_region(dev))
return 0;
@@ -2701,60 +3515,92 @@ static int match_region_by_range(struct device *dev, void *data)
cxlr = to_cxl_region(dev);
p = &cxlr->params;
- down_read(&cxl_region_rwsem);
- if (p->res && p->res->start == r->start && p->res->end == r->end)
- rc = 1;
- up_read(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
+ return spa_maps_hpa(p, r);
+}
- return rc;
+static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
+ struct resource *res)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_region_params *p = &cxlr->params;
+ resource_size_t size = resource_size(res);
+ resource_size_t cache_size, start;
+
+ cache_size = cxlrd->cache_size;
+ if (!cache_size)
+ return 0;
+
+ if (size != cache_size) {
+ dev_warn(&cxlr->dev,
+ "Extended Linear Cache size %pa != CXL size %pa. No Support!",
+ &cache_size, &size);
+ return -ENXIO;
+ }
+
+ /*
+ * Move the start of the range to where the cache range starts. The
+ * implementation assumes that the cache range is in front of the
+ * CXL range. This is not dictated by the HMAT spec but is how the
+ * current known implementation is configured.
+ *
+ * The cache range is expected to be within the CFMWS. The adjusted
+ * res->start should not be less than cxlrd->res->start.
+ */
+ start = res->start - cache_size;
+ if (start < cxlrd->res->start)
+ return -ENXIO;
+
+ res->start = start;
+ p->cache_size = cache_size;
+
+ return 0;
}
-/* Establish an empty region covering the given HPA range */
-static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
- struct cxl_endpoint_decoder *cxled)
+static int __construct_region(struct cxl_region *cxlr,
+ struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlrd_to_port(cxlrd);
struct range *hpa = &cxled->cxld.hpa_range;
struct cxl_region_params *p;
- struct cxl_region *cxlr;
struct resource *res;
int rc;
- do {
- cxlr = __create_region(cxlrd, cxled->mode,
- atomic_read(&cxlrd->region_id));
- } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
-
- if (IS_ERR(cxlr)) {
- dev_err(cxlmd->dev.parent,
- "%s:%s: %s failed assign region: %ld\n",
- dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
- __func__, PTR_ERR(cxlr));
- return cxlr;
- }
-
- down_write(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.region);
p = &cxlr->params;
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_err(cxlmd->dev.parent,
"%s:%s: %s autodiscovery interrupted\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
__func__);
- rc = -EBUSY;
- goto err;
+ return -EBUSY;
}
set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
res = kmalloc(sizeof(*res), GFP_KERNEL);
- if (!res) {
- rc = -ENOMEM;
- goto err;
- }
+ if (!res)
+ return -ENOMEM;
*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
dev_name(&cxlr->dev));
+
+ rc = cxl_extended_linear_cache_resize(cxlr, res);
+ if (rc && rc != -EOPNOTSUPP) {
+ /*
+ * Failing to support extended linear cache region resize does not
+ * prevent the region from functioning. Only causes cxl list showing
+ * incorrect region size.
+ */
+ dev_warn(cxlmd->dev.parent,
+ "Extended linear cache calculation failed rc:%d\n", rc);
+ }
+
+ rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group);
+ if (rc)
+ return rc;
+
rc = insert_resource(cxlrd->res, res);
if (rc) {
/*
@@ -2774,7 +3620,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
if (rc)
- goto err;
+ return rc;
dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
@@ -2783,64 +3629,88 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
/* ...to match put_device() in cxl_add_to_region() */
get_device(&cxlr->dev);
- up_write(&cxl_region_rwsem);
+
+ return 0;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxlrd_to_port(cxlrd);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ int rc, part = READ_ONCE(cxled->part);
+ struct cxl_region *cxlr;
+
+ do {
+ cxlr = __create_region(cxlrd, cxlds->part[part].mode,
+ atomic_read(&cxlrd->region_id));
+ } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
+
+ if (IS_ERR(cxlr)) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s failed assign region: %ld\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, PTR_ERR(cxlr));
+ return cxlr;
+ }
+
+ rc = __construct_region(cxlr, cxlrd, cxled);
+ if (rc) {
+ devm_release_action(port->uport_dev, unregister_region, cxlr);
+ return ERR_PTR(rc);
+ }
return cxlr;
+}
-err:
- up_write(&cxl_region_rwsem);
- devm_release_action(port->uport_dev, unregister_region, cxlr);
- return ERR_PTR(rc);
+static struct cxl_region *
+cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
+{
+ struct device *region_dev;
+
+ region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
+ match_region_by_range);
+ if (!region_dev)
+ return NULL;
+
+ return to_cxl_region(region_dev);
}
-int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
+int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
{
- struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct range *hpa = &cxled->cxld.hpa_range;
- struct cxl_decoder *cxld = &cxled->cxld;
- struct device *cxlrd_dev, *region_dev;
- struct cxl_root_decoder *cxlrd;
struct cxl_region_params *p;
- struct cxl_region *cxlr;
bool attach = false;
int rc;
- cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
- match_decoder_by_range);
- if (!cxlrd_dev) {
- dev_err(cxlmd->dev.parent,
- "%s:%s no CXL window for range %#llx:%#llx\n",
- dev_name(&cxlmd->dev), dev_name(&cxld->dev),
- cxld->hpa_range.start, cxld->hpa_range.end);
+ struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
+ cxl_find_root_decoder(cxled);
+ if (!cxlrd)
return -ENXIO;
- }
-
- cxlrd = to_cxl_root_decoder(cxlrd_dev);
/*
* Ensure that if multiple threads race to construct_region() for @hpa
* one does the construction and the others add to that.
*/
mutex_lock(&cxlrd->range_lock);
- region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
- match_region_by_range);
- if (!region_dev) {
+ struct cxl_region *cxlr __free(put_cxl_region) =
+ cxl_find_region_by_range(cxlrd, hpa);
+ if (!cxlr)
cxlr = construct_region(cxlrd, cxled);
- region_dev = &cxlr->dev;
- } else
- cxlr = to_cxl_region(region_dev);
mutex_unlock(&cxlrd->range_lock);
rc = PTR_ERR_OR_ZERO(cxlr);
if (rc)
- goto out;
+ return rc;
attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
- down_read(&cxl_region_rwsem);
- p = &cxlr->params;
- attach = p->state == CXL_CONFIG_COMMIT;
- up_read(&cxl_region_rwsem);
+ scoped_guard(rwsem_read, &cxl_rwsem.region) {
+ p = &cxlr->params;
+ attach = p->state == CXL_CONFIG_COMMIT;
+ }
if (attach) {
/*
@@ -2853,12 +3723,37 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
p->res);
}
- put_device(region_dev);
-out:
- put_device(cxlrd_dev);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
+
+u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
+{
+ struct cxl_region_ref *iter;
+ unsigned long index;
+
+ if (!endpoint)
+ return ~0ULL;
+
+ guard(rwsem_write)(&cxl_rwsem.region);
+
+ xa_for_each(&endpoint->regions, index, iter) {
+ struct cxl_region_params *p = &iter->region->params;
+
+ if (cxl_resource_contains_addr(p->res, spa)) {
+ if (!p->cache_size)
+ return ~0ULL;
+
+ if (spa >= p->res->start + p->cache_size)
+ return spa - p->cache_size;
+
+ return spa + p->cache_size;
+ }
+ }
+
+ return ~0ULL;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
static int is_system_ram(struct resource *res, void *arg)
{
@@ -2869,45 +3764,207 @@ static int is_system_ram(struct resource *res, void *arg)
return 1;
}
-static int cxl_region_probe(struct device *dev)
+static void shutdown_notifiers(void *_cxlr)
+{
+ struct cxl_region *cxlr = _cxlr;
+
+ unregister_node_notifier(&cxlr->node_notifier);
+ unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
+}
+
+static void remove_debugfs(void *dentry)
+{
+ debugfs_remove_recursive(dentry);
+}
+
+static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
{
- struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
+ resource_size_t region_size;
+ u64 hpa;
+
+ if (offset < p->cache_size) {
+ dev_err(&cxlr->dev,
+ "Offset %#llx is within extended linear cache %pa\n",
+ offset, &p->cache_size);
+ return -EINVAL;
+ }
+
+ region_size = resource_size(p->res);
+ if (offset >= region_size) {
+ dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n",
+ offset, &region_size);
+ return -EINVAL;
+ }
+
+ hpa = p->res->start + offset;
+ if (hpa < p->res->start || hpa > p->res->end) {
+ dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa,
+ p->res);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
+{
+ struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
+ struct cxl_region *cxlr = data;
int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc) {
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+ return rc;
+
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+ return rc;
+
+ if (validate_region_offset(cxlr, offset))
+ return -EINVAL;
+
+ offset -= cxlr->params.cache_size;
+ rc = region_offset_to_dpa_result(cxlr, offset, &result);
+ if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
+ dev_dbg(&cxlr->dev,
+ "Failed to resolve DPA for region offset %#llx rc %d\n",
+ offset, rc);
+
+ return rc ? rc : -EINVAL;
+ }
+
+ return cxl_inject_poison_locked(result.cxlmd, result.dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
+ cxl_region_debugfs_poison_inject, "%llx\n");
+
+static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
+{
+ struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
+ struct cxl_region *cxlr = data;
+ int rc;
+
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+ return rc;
+
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+ return rc;
+
+ if (validate_region_offset(cxlr, offset))
+ return -EINVAL;
+
+ offset -= cxlr->params.cache_size;
+ rc = region_offset_to_dpa_result(cxlr, offset, &result);
+ if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
+ dev_dbg(&cxlr->dev,
+ "Failed to resolve DPA for region offset %#llx rc %d\n",
+ offset, rc);
+
+ return rc ? rc : -EINVAL;
+ }
+
+ return cxl_clear_poison_locked(result.cxlmd, result.dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
+ cxl_region_debugfs_poison_clear, "%llx\n");
+
+static int cxl_region_can_probe(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int rc;
+
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) {
dev_dbg(&cxlr->dev, "probe interrupted\n");
return rc;
}
if (p->state < CXL_CONFIG_COMMIT) {
dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
- rc = -ENXIO;
- goto out;
+ return -ENXIO;
}
if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
dev_err(&cxlr->dev,
"failed to activate, re-commit region and retry\n");
- rc = -ENXIO;
- goto out;
+ return -ENXIO;
}
+ return 0;
+}
+
+static int cxl_region_probe(struct device *dev)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ bool poison_supported = true;
+ int rc;
+
+ rc = cxl_region_can_probe(cxlr);
+ if (rc)
+ return rc;
+
/*
* From this point on any path that changes the region's state away from
* CXL_CONFIG_COMMIT is also responsible for releasing the driver.
*/
-out:
- up_read(&cxl_region_rwsem);
+ cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
+ cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
+ register_node_notifier(&cxlr->node_notifier);
+
+ cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
+ cxlr->adist_notifier.priority = 100;
+ register_mt_adistance_algorithm(&cxlr->adist_notifier);
+
+ rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr);
if (rc)
return rc;
+ /* Create poison attributes if all memdevs support the capabilities */
+ for (int i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+
+ if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) ||
+ !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) {
+ poison_supported = false;
+ break;
+ }
+ }
+
+ if (poison_supported) {
+ struct dentry *dentry;
+
+ dentry = cxl_debugfs_create_dir(dev_name(dev));
+ debugfs_create_file("inject_poison", 0200, dentry, cxlr,
+ &cxl_poison_inject_fops);
+ debugfs_create_file("clear_poison", 0200, dentry, cxlr,
+ &cxl_poison_clear_fops);
+ rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
+ if (rc)
+ return rc;
+ }
+
switch (cxlr->mode) {
- case CXL_DECODER_PMEM:
+ case CXL_PARTMODE_PMEM:
+ rc = devm_cxl_region_edac_register(cxlr);
+ if (rc)
+ dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
+ cxlr->id);
+
return devm_cxl_add_pmem_region(cxlr);
- case CXL_DECODER_RAM:
+ case CXL_PARTMODE_RAM:
+ rc = devm_cxl_region_edac_register(cxlr);
+ if (rc)
+ dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
+ cxlr->id);
+
/*
* The region can not be manged by CXL if any portion of
* it is already online as 'System RAM'
@@ -2941,6 +3998,6 @@ void cxl_region_exit(void)
cxl_driver_unregister(&cxl_region_driver);
}
-MODULE_IMPORT_NS(CXL);
-MODULE_IMPORT_NS(DEVMEM);
+MODULE_IMPORT_NS("CXL");
+MODULE_IMPORT_NS("DEVMEM");
MODULE_ALIAS_CXL(CXL_DEVICE_REGION);