summaryrefslogtreecommitdiff
path: root/drivers/cxl/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl/core')
-rw-r--r--drivers/cxl/core/Makefile2
-rw-r--r--drivers/cxl/core/acpi.c11
-rw-r--r--drivers/cxl/core/cdat.c8
-rw-r--r--drivers/cxl/core/core.h40
-rw-r--r--drivers/cxl/core/edac.c2109
-rw-r--r--drivers/cxl/core/features.c43
-rw-r--r--drivers/cxl/core/hdm.c136
-rw-r--r--drivers/cxl/core/mbox.c48
-rw-r--r--drivers/cxl/core/mce.h2
-rw-r--r--drivers/cxl/core/memdev.c57
-rw-r--r--drivers/cxl/core/pci.c48
-rw-r--r--drivers/cxl/core/port.c52
-rw-r--r--drivers/cxl/core/ras.c47
-rw-r--r--drivers/cxl/core/region.c679
-rw-r--r--drivers/cxl/core/trace.h133
15 files changed, 2880 insertions, 535 deletions
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 086df97a0fcf..5ad8fef210b5 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -15,8 +15,8 @@ cxl_core-y += hdm.o
cxl_core-y += pmu.o
cxl_core-y += cdat.o
cxl_core-y += ras.o
-cxl_core-y += acpi.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
cxl_core-$(CONFIG_CXL_MCE) += mce.o
cxl_core-$(CONFIG_CXL_FEATURES) += features.o
+cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
diff --git a/drivers/cxl/core/acpi.c b/drivers/cxl/core/acpi.c
deleted file mode 100644
index f13b4dae6ac5..000000000000
--- a/drivers/cxl/core/acpi.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
-#include <linux/acpi.h>
-#include "cxl.h"
-#include "core.h"
-
-int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res,
- int nid, resource_size_t *size)
-{
- return hmat_get_extended_linear_cache_size(backing_res, nid, size);
-}
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index edb4f41eeacc..c0af645425f4 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -28,7 +28,7 @@ static u32 cdat_normalize(u16 entry, u64 base, u8 type)
*/
if (entry == 0xffff || !entry)
return 0;
- else if (base > (UINT_MAX / (entry)))
+ if (base > (UINT_MAX / (entry)))
return 0;
/*
@@ -336,7 +336,7 @@ static int match_cxlrd_hb(struct device *dev, void *data)
cxlrd = to_cxl_root_decoder(dev);
cxlsd = &cxlrd->cxlsd;
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
for (int i = 0; i < cxlsd->nr_targets; i++) {
if (host_bridge == cxlsd->target[i]->dport_dev)
return 1;
@@ -987,7 +987,7 @@ void cxl_region_shared_upstream_bandwidth_update(struct cxl_region *cxlr)
bool is_root;
int rc;
- lockdep_assert_held(&cxl_dpa_rwsem);
+ lockdep_assert_held(&cxl_rwsem.dpa);
struct xarray *usp_xa __free(free_perf_xa) =
kzalloc(sizeof(*usp_xa), GFP_KERNEL);
@@ -1057,7 +1057,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
{
struct cxl_dpa_perf *perf;
- lockdep_assert_held(&cxl_dpa_rwsem);
+ lockdep_assert_held(&cxl_rwsem.dpa);
perf = cxled_get_dpa_perf(cxled);
if (IS_ERR(perf))
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 17b692eb3257..2669f251d677 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -5,6 +5,7 @@
#define __CXL_CORE_H__
#include <cxl/mailbox.h>
+#include <linux/rwsem.h>
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
@@ -12,6 +13,11 @@ extern const struct device_type cxl_pmu_type;
extern struct attribute_group cxl_base_attribute_group;
+enum cxl_detach_mode {
+ DETACH_ONLY,
+ DETACH_INVALIDATE,
+};
+
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
extern struct device_attribute dev_attr_create_ram_region;
@@ -20,7 +26,11 @@ extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
extern const struct device_type cxl_dax_region_type;
extern const struct device_type cxl_region_type;
-void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
+
+int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode);
+
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
@@ -48,8 +58,11 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
{
return 0;
}
-static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+static inline int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ int pos, enum cxl_detach_mode mode)
{
+ return 0;
}
static inline int cxl_region_init(void)
{
@@ -76,10 +89,11 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
struct dentry *cxl_debugfs_create_dir(const char *dir);
int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
enum cxl_partition_mode mode);
-int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size);
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
+bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr);
enum cxl_rcrb {
CXL_RCRB_DOWNSTREAM,
@@ -96,8 +110,20 @@ u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb);
#define PCI_RCRB_CAP_HDR_NEXT_MASK GENMASK(15, 8)
#define PCI_CAP_EXP_SIZEOF 0x3c
-extern struct rw_semaphore cxl_dpa_rwsem;
-extern struct rw_semaphore cxl_region_rwsem;
+struct cxl_rwsem {
+ /*
+ * All changes to HPA (interleave configuration) occur with this
+ * lock held for write.
+ */
+ struct rw_semaphore region;
+ /*
+ * All changes to a device DPA space occur with this lock held
+ * for write.
+ */
+ struct rw_semaphore dpa;
+};
+
+extern struct cxl_rwsem cxl_rwsem;
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
@@ -120,10 +146,10 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
int cxl_ras_init(void);
void cxl_ras_exit(void);
int cxl_gpf_port_setup(struct cxl_dport *dport);
-int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res,
- int nid, resource_size_t *size);
#ifdef CONFIG_CXL_FEATURES
+struct cxl_feat_entry *
+cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid);
size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
enum cxl_get_feat_selection selection,
void *feat_out, size_t feat_out_size, u16 offset,
diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c
new file mode 100644
index 000000000000..79994ca9bc9f
--- /dev/null
+++ b/drivers/cxl/core/edac.c
@@ -0,0 +1,2109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CXL EDAC memory feature driver.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ *
+ * - Supports functions to configure EDAC features of the
+ * CXL memory devices.
+ * - Registers with the EDAC device subsystem driver to expose
+ * the features sysfs attributes to the user for configuring
+ * CXL memory RAS feature.
+ */
+
+#include <linux/cleanup.h>
+#include <linux/edac.h>
+#include <linux/limits.h>
+#include <linux/unaligned.h>
+#include <linux/xarray.h>
+#include <cxl/features.h>
+#include <cxl.h>
+#include <cxlmem.h>
+#include "core.h"
+#include "trace.h"
+
+#define CXL_NR_EDAC_DEV_FEATURES 7
+
+#define CXL_SCRUB_NO_REGION -1
+
+struct cxl_patrol_scrub_context {
+ u8 instance;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ struct cxl_memdev *cxlmd;
+ struct cxl_region *cxlr;
+};
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-222 Device Patrol Scrub Control
+ * Feature Readable Attributes.
+ */
+struct cxl_scrub_rd_attrbs {
+ u8 scrub_cycle_cap;
+ __le16 scrub_cycle_hours;
+ u8 scrub_flags;
+} __packed;
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.1 Table 8-223 Device Patrol Scrub Control
+ * Feature Writable Attributes.
+ */
+struct cxl_scrub_wr_attrbs {
+ u8 scrub_cycle_hours;
+ u8 scrub_flags;
+} __packed;
+
+#define CXL_SCRUB_CONTROL_CHANGEABLE BIT(0)
+#define CXL_SCRUB_CONTROL_REALTIME BIT(1)
+#define CXL_SCRUB_CONTROL_CYCLE_MASK GENMASK(7, 0)
+#define CXL_SCRUB_CONTROL_MIN_CYCLE_MASK GENMASK(15, 8)
+#define CXL_SCRUB_CONTROL_ENABLE BIT(0)
+
+#define CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap) \
+ FIELD_GET(CXL_SCRUB_CONTROL_CHANGEABLE, cap)
+#define CXL_GET_SCRUB_CYCLE(cycle) \
+ FIELD_GET(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
+#define CXL_GET_SCRUB_MIN_CYCLE(cycle) \
+ FIELD_GET(CXL_SCRUB_CONTROL_MIN_CYCLE_MASK, cycle)
+#define CXL_GET_SCRUB_EN_STS(flags) FIELD_GET(CXL_SCRUB_CONTROL_ENABLE, flags)
+
+#define CXL_SET_SCRUB_CYCLE(cycle) \
+ FIELD_PREP(CXL_SCRUB_CONTROL_CYCLE_MASK, cycle)
+#define CXL_SET_SCRUB_EN(en) FIELD_PREP(CXL_SCRUB_CONTROL_ENABLE, en)
+
+static int cxl_mem_scrub_get_attrbs(struct cxl_mailbox *cxl_mbox, u8 *cap,
+ u16 *cycle, u8 *flags, u8 *min_cycle)
+{
+ size_t rd_data_size = sizeof(struct cxl_scrub_rd_attrbs);
+ size_t data_size;
+ struct cxl_scrub_rd_attrbs *rd_attrbs __free(kfree) =
+ kzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, NULL);
+ if (!data_size)
+ return -EIO;
+
+ *cap = rd_attrbs->scrub_cycle_cap;
+ *cycle = le16_to_cpu(rd_attrbs->scrub_cycle_hours);
+ *flags = rd_attrbs->scrub_flags;
+ if (min_cycle)
+ *min_cycle = CXL_GET_SCRUB_MIN_CYCLE(*cycle);
+
+ return 0;
+}
+
+static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
+{
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_region_params *p;
+ struct cxl_memdev *cxlmd;
+ struct cxl_region *cxlr;
+ u8 min_scrub_cycle = 0;
+ int i, ret;
+
+ if (!cxl_ps_ctx->cxlr) {
+ cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
+ return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle,
+ flags, min_cycle);
+ }
+
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
+ return ret;
+
+ cxlr = cxl_ps_ctx->cxlr;
+ p = &cxlr->params;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ cxlmd = cxled_to_memdev(cxled);
+ cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags,
+ min_cycle);
+ if (ret)
+ return ret;
+
+ /*
+ * The min_scrub_cycle of a region is the max of minimum scrub
+ * cycles supported by memdevs that back the region.
+ */
+ if (min_cycle)
+ min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
+ }
+
+ if (min_cycle)
+ *min_cycle = min_scrub_cycle;
+
+ return 0;
+}
+
+static int cxl_scrub_set_attrbs_region(struct device *dev,
+ struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 cycle, u8 flags)
+{
+ struct cxl_scrub_wr_attrbs wr_attrbs;
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_region_params *p;
+ struct cxl_memdev *cxlmd;
+ struct cxl_region *cxlr;
+ int ret, i;
+
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((ret = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
+ return ret;
+
+ cxlr = cxl_ps_ctx->cxlr;
+ p = &cxlr->params;
+ wr_attrbs.scrub_cycle_hours = cycle;
+ wr_attrbs.scrub_flags = flags;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ cxlmd = cxled_to_memdev(cxled);
+ cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
+ cxl_ps_ctx->set_version, &wr_attrbs,
+ sizeof(wr_attrbs),
+ CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
+ 0, NULL);
+ if (ret)
+ return ret;
+
+ if (cycle != cxlmd->scrub_cycle) {
+ if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
+ dev_info(dev,
+ "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n",
+ cxlmd->scrub_cycle,
+ cxlmd->scrub_region_id, cxlr->id,
+ cycle);
+
+ cxlmd->scrub_cycle = cycle;
+ cxlmd->scrub_region_id = cxlr->id;
+ }
+ }
+
+ return 0;
+}
+
+static int cxl_scrub_set_attrbs_device(struct device *dev,
+ struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 cycle, u8 flags)
+{
+ struct cxl_scrub_wr_attrbs wr_attrbs;
+ struct cxl_mailbox *cxl_mbox;
+ struct cxl_memdev *cxlmd;
+ int ret;
+
+ wr_attrbs.scrub_cycle_hours = cycle;
+ wr_attrbs.scrub_flags = flags;
+
+ cxlmd = cxl_ps_ctx->cxlmd;
+ cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
+ cxl_ps_ctx->set_version, &wr_attrbs,
+ sizeof(wr_attrbs),
+ CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0,
+ NULL);
+ if (ret)
+ return ret;
+
+ if (cycle != cxlmd->scrub_cycle) {
+ if (cxlmd->scrub_region_id != CXL_SCRUB_NO_REGION)
+ dev_info(dev,
+ "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n",
+ cxlmd->scrub_cycle, cxlmd->scrub_region_id,
+ cycle);
+
+ cxlmd->scrub_cycle = cycle;
+ cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
+ }
+
+ return 0;
+}
+
+static int cxl_scrub_set_attrbs(struct device *dev,
+ struct cxl_patrol_scrub_context *cxl_ps_ctx,
+ u8 cycle, u8 flags)
+{
+ if (cxl_ps_ctx->cxlr)
+ return cxl_scrub_set_attrbs_region(dev, cxl_ps_ctx, cycle, flags);
+
+ return cxl_scrub_set_attrbs_device(dev, cxl_ps_ctx, cycle, flags);
+}
+
+static int cxl_patrol_scrub_get_enabled_bg(struct device *dev, void *drv_data,
+ bool *enabled)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags;
+ u16 cycle;
+ int ret;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
+ if (ret)
+ return ret;
+
+ *enabled = CXL_GET_SCRUB_EN_STS(flags);
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_set_enabled_bg(struct device *dev, void *drv_data,
+ bool enable)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags, wr_cycle;
+ u16 rd_cycle;
+ int ret;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, NULL);
+ if (ret)
+ return ret;
+
+ wr_cycle = CXL_GET_SCRUB_CYCLE(rd_cycle);
+ flags = CXL_SET_SCRUB_EN(enable);
+
+ return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
+}
+
+static int cxl_patrol_scrub_get_min_scrub_cycle(struct device *dev,
+ void *drv_data, u32 *min)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags, min_cycle;
+ u16 cycle;
+ int ret;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, &min_cycle);
+ if (ret)
+ return ret;
+
+ *min = min_cycle * 3600;
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_get_max_scrub_cycle(struct device *dev,
+ void *drv_data, u32 *max)
+{
+ *max = U8_MAX * 3600; /* Max set by register size */
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_get_scrub_cycle(struct device *dev, void *drv_data,
+ u32 *scrub_cycle_secs)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 cap, flags;
+ u16 cycle;
+ int ret;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &cycle, &flags, NULL);
+ if (ret)
+ return ret;
+
+ *scrub_cycle_secs = CXL_GET_SCRUB_CYCLE(cycle) * 3600;
+
+ return 0;
+}
+
+static int cxl_patrol_scrub_set_scrub_cycle(struct device *dev, void *drv_data,
+ u32 scrub_cycle_secs)
+{
+ struct cxl_patrol_scrub_context *ctx = drv_data;
+ u8 scrub_cycle_hours = scrub_cycle_secs / 3600;
+ u8 cap, wr_cycle, flags, min_cycle;
+ u16 rd_cycle;
+ int ret;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ ret = cxl_scrub_get_attrbs(ctx, &cap, &rd_cycle, &flags, &min_cycle);
+ if (ret)
+ return ret;
+
+ if (!CXL_GET_SCRUB_CYCLE_CHANGEABLE(cap))
+ return -EOPNOTSUPP;
+
+ if (scrub_cycle_hours < min_cycle) {
+ dev_dbg(dev, "Invalid CXL patrol scrub cycle(%d) to set\n",
+ scrub_cycle_hours);
+ dev_dbg(dev,
+ "Minimum supported CXL patrol scrub cycle in hour %d\n",
+ min_cycle);
+ return -EINVAL;
+ }
+ wr_cycle = CXL_SET_SCRUB_CYCLE(scrub_cycle_hours);
+
+ return cxl_scrub_set_attrbs(dev, ctx, wr_cycle, flags);
+}
+
+static const struct edac_scrub_ops cxl_ps_scrub_ops = {
+ .get_enabled_bg = cxl_patrol_scrub_get_enabled_bg,
+ .set_enabled_bg = cxl_patrol_scrub_set_enabled_bg,
+ .get_min_cycle = cxl_patrol_scrub_get_min_scrub_cycle,
+ .get_max_cycle = cxl_patrol_scrub_get_max_scrub_cycle,
+ .get_cycle_duration = cxl_patrol_scrub_get_scrub_cycle,
+ .set_cycle_duration = cxl_patrol_scrub_set_scrub_cycle,
+};
+
+static int cxl_memdev_scrub_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ u8 scrub_inst)
+{
+ struct cxl_patrol_scrub_context *cxl_ps_ctx;
+ struct cxl_feat_entry *feat_entry;
+ u8 cap, flags;
+ u16 cycle;
+ int rc;
+
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &CXL_FEAT_PATROL_SCRUB_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ cxl_ps_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
+ if (!cxl_ps_ctx)
+ return -ENOMEM;
+
+ *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .instance = scrub_inst,
+ .cxlmd = cxlmd,
+ };
+
+ rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap, &cycle,
+ &flags, NULL);
+ if (rc)
+ return rc;
+
+ cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
+ cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
+
+ ras_feature->ft_type = RAS_FEAT_SCRUB;
+ ras_feature->instance = cxl_ps_ctx->instance;
+ ras_feature->scrub_ops = &cxl_ps_scrub_ops;
+ ras_feature->ctx = cxl_ps_ctx;
+
+ return 0;
+}
+
+static int cxl_region_scrub_init(struct cxl_region *cxlr,
+ struct edac_dev_feature *ras_feature,
+ u8 scrub_inst)
+{
+ struct cxl_patrol_scrub_context *cxl_ps_ctx;
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_feat_entry *feat_entry = NULL;
+ struct cxl_memdev *cxlmd;
+ u8 cap, flags;
+ u16 cycle;
+ int i, rc;
+
+ /*
+ * The cxl_region_rwsem must be held if the code below is used in a context
+ * other than when the region is in the probe state, as shown here.
+ */
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ cxlmd = cxled_to_memdev(cxled);
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &CXL_FEAT_PATROL_SCRUB_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) &
+ CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ rc = cxl_mem_scrub_get_attrbs(&cxlmd->cxlds->cxl_mbox, &cap,
+ &cycle, &flags, NULL);
+ if (rc)
+ return rc;
+
+ cxlmd->scrub_cycle = CXL_GET_SCRUB_CYCLE(cycle);
+ cxlmd->scrub_region_id = CXL_SCRUB_NO_REGION;
+ }
+
+ cxl_ps_ctx = devm_kzalloc(&cxlr->dev, sizeof(*cxl_ps_ctx), GFP_KERNEL);
+ if (!cxl_ps_ctx)
+ return -ENOMEM;
+
+ *cxl_ps_ctx = (struct cxl_patrol_scrub_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .instance = scrub_inst,
+ .cxlr = cxlr,
+ };
+
+ ras_feature->ft_type = RAS_FEAT_SCRUB;
+ ras_feature->instance = cxl_ps_ctx->instance;
+ ras_feature->scrub_ops = &cxl_ps_scrub_ops;
+ ras_feature->ctx = cxl_ps_ctx;
+
+ return 0;
+}
+
+struct cxl_ecs_context {
+ u16 num_media_frus;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ struct cxl_memdev *cxlmd;
+};
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-225 DDR5 ECS Control Feature
+ * Readable Attributes.
+ */
+struct cxl_ecs_fru_rd_attrbs {
+ u8 ecs_cap;
+ __le16 ecs_config;
+ u8 ecs_flags;
+} __packed;
+
+struct cxl_ecs_rd_attrbs {
+ u8 ecs_log_cap;
+ struct cxl_ecs_fru_rd_attrbs fru_attrbs[];
+} __packed;
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.9.11.2 Table 8-226 DDR5 ECS Control Feature
+ * Writable Attributes.
+ */
+struct cxl_ecs_fru_wr_attrbs {
+ __le16 ecs_config;
+} __packed;
+
+struct cxl_ecs_wr_attrbs {
+ u8 ecs_log_cap;
+ struct cxl_ecs_fru_wr_attrbs fru_attrbs[];
+} __packed;
+
+#define CXL_ECS_LOG_ENTRY_TYPE_MASK GENMASK(1, 0)
+#define CXL_ECS_REALTIME_REPORT_CAP_MASK BIT(0)
+#define CXL_ECS_THRESHOLD_COUNT_MASK GENMASK(2, 0)
+#define CXL_ECS_COUNT_MODE_MASK BIT(3)
+#define CXL_ECS_RESET_COUNTER_MASK BIT(4)
+#define CXL_ECS_RESET_COUNTER 1
+
+enum {
+ ECS_THRESHOLD_256 = 256,
+ ECS_THRESHOLD_1024 = 1024,
+ ECS_THRESHOLD_4096 = 4096,
+};
+
+enum {
+ ECS_THRESHOLD_IDX_256 = 3,
+ ECS_THRESHOLD_IDX_1024 = 4,
+ ECS_THRESHOLD_IDX_4096 = 5,
+};
+
+static const u16 ecs_supp_threshold[] = {
+ [ECS_THRESHOLD_IDX_256] = 256,
+ [ECS_THRESHOLD_IDX_1024] = 1024,
+ [ECS_THRESHOLD_IDX_4096] = 4096,
+};
+
+enum {
+ ECS_LOG_ENTRY_TYPE_DRAM = 0x0,
+ ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU = 0x1,
+};
+
+enum cxl_ecs_count_mode {
+ ECS_MODE_COUNTS_ROWS = 0,
+ ECS_MODE_COUNTS_CODEWORDS = 1,
+};
+
+static int cxl_mem_ecs_get_attrbs(struct device *dev,
+ struct cxl_ecs_context *cxl_ecs_ctx,
+ int fru_id, u8 *log_cap, u16 *config)
+{
+ struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
+ size_t rd_data_size;
+ size_t data_size;
+
+ rd_data_size = cxl_ecs_ctx->get_feat_size;
+
+ struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
+ kvzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, NULL);
+ if (!data_size)
+ return -EIO;
+
+ fru_rd_attrbs = rd_attrbs->fru_attrbs;
+ *log_cap = rd_attrbs->ecs_log_cap;
+ *config = le16_to_cpu(fru_rd_attrbs[fru_id].ecs_config);
+
+ return 0;
+}
+
+static int cxl_mem_ecs_set_attrbs(struct device *dev,
+ struct cxl_ecs_context *cxl_ecs_ctx,
+ int fru_id, u8 log_cap, u16 config)
+{
+ struct cxl_memdev *cxlmd = cxl_ecs_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ struct cxl_ecs_fru_rd_attrbs *fru_rd_attrbs;
+ struct cxl_ecs_fru_wr_attrbs *fru_wr_attrbs;
+ size_t rd_data_size, wr_data_size;
+ u16 num_media_frus, count;
+ size_t data_size;
+
+ num_media_frus = cxl_ecs_ctx->num_media_frus;
+ rd_data_size = cxl_ecs_ctx->get_feat_size;
+ wr_data_size = cxl_ecs_ctx->set_feat_size;
+ struct cxl_ecs_rd_attrbs *rd_attrbs __free(kvfree) =
+ kvzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, NULL);
+ if (!data_size)
+ return -EIO;
+
+ struct cxl_ecs_wr_attrbs *wr_attrbs __free(kvfree) =
+ kvzalloc(wr_data_size, GFP_KERNEL);
+ if (!wr_attrbs)
+ return -ENOMEM;
+
+ /*
+ * Fill writable attributes from the current attributes read
+ * for all the media FRUs.
+ */
+ fru_rd_attrbs = rd_attrbs->fru_attrbs;
+ fru_wr_attrbs = wr_attrbs->fru_attrbs;
+ wr_attrbs->ecs_log_cap = log_cap;
+ for (count = 0; count < num_media_frus; count++)
+ fru_wr_attrbs[count].ecs_config =
+ fru_rd_attrbs[count].ecs_config;
+
+ fru_wr_attrbs[fru_id].ecs_config = cpu_to_le16(config);
+
+ return cxl_set_feature(cxl_mbox, &CXL_FEAT_ECS_UUID,
+ cxl_ecs_ctx->set_version, wr_attrbs,
+ wr_data_size,
+ CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
+ 0, NULL);
+}
+
+static u8 cxl_get_ecs_log_entry_type(u8 log_cap, u16 config)
+{
+ return FIELD_GET(CXL_ECS_LOG_ENTRY_TYPE_MASK, log_cap);
+}
+
+static u16 cxl_get_ecs_threshold(u8 log_cap, u16 config)
+{
+ u8 index = FIELD_GET(CXL_ECS_THRESHOLD_COUNT_MASK, config);
+
+ return ecs_supp_threshold[index];
+}
+
+static u8 cxl_get_ecs_count_mode(u8 log_cap, u16 config)
+{
+ return FIELD_GET(CXL_ECS_COUNT_MODE_MASK, config);
+}
+
+#define CXL_ECS_GET_ATTR(attrb) \
+ static int cxl_ecs_get_##attrb(struct device *dev, void *drv_data, \
+ int fru_id, u32 *val) \
+ { \
+ struct cxl_ecs_context *ctx = drv_data; \
+ u8 log_cap; \
+ u16 config; \
+ int ret; \
+ \
+ ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
+ &config); \
+ if (ret) \
+ return ret; \
+ \
+ *val = cxl_get_ecs_##attrb(log_cap, config); \
+ \
+ return 0; \
+ }
+
+CXL_ECS_GET_ATTR(log_entry_type)
+CXL_ECS_GET_ATTR(count_mode)
+CXL_ECS_GET_ATTR(threshold)
+
+static int cxl_set_ecs_log_entry_type(struct device *dev, u8 *log_cap,
+ u16 *config, u32 val)
+{
+ if (val != ECS_LOG_ENTRY_TYPE_DRAM &&
+ val != ECS_LOG_ENTRY_TYPE_MEM_MEDIA_FRU)
+ return -EINVAL;
+
+ *log_cap = FIELD_PREP(CXL_ECS_LOG_ENTRY_TYPE_MASK, val);
+
+ return 0;
+}
+
+static int cxl_set_ecs_threshold(struct device *dev, u8 *log_cap, u16 *config,
+ u32 val)
+{
+ *config &= ~CXL_ECS_THRESHOLD_COUNT_MASK;
+
+ switch (val) {
+ case ECS_THRESHOLD_256:
+ *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
+ ECS_THRESHOLD_IDX_256);
+ break;
+ case ECS_THRESHOLD_1024:
+ *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
+ ECS_THRESHOLD_IDX_1024);
+ break;
+ case ECS_THRESHOLD_4096:
+ *config |= FIELD_PREP(CXL_ECS_THRESHOLD_COUNT_MASK,
+ ECS_THRESHOLD_IDX_4096);
+ break;
+ default:
+ dev_dbg(dev, "Invalid CXL ECS threshold count(%u) to set\n",
+ val);
+ dev_dbg(dev, "Supported ECS threshold counts: %u, %u, %u\n",
+ ECS_THRESHOLD_256, ECS_THRESHOLD_1024,
+ ECS_THRESHOLD_4096);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cxl_set_ecs_count_mode(struct device *dev, u8 *log_cap, u16 *config,
+ u32 val)
+{
+ if (val != ECS_MODE_COUNTS_ROWS && val != ECS_MODE_COUNTS_CODEWORDS) {
+ dev_dbg(dev, "Invalid CXL ECS scrub mode(%d) to set\n", val);
+ dev_dbg(dev,
+ "Supported ECS Modes: 0: ECS counts rows with errors,"
+ " 1: ECS counts codewords with errors\n");
+ return -EINVAL;
+ }
+
+ *config &= ~CXL_ECS_COUNT_MODE_MASK;
+ *config |= FIELD_PREP(CXL_ECS_COUNT_MODE_MASK, val);
+
+ return 0;
+}
+
+static int cxl_set_ecs_reset_counter(struct device *dev, u8 *log_cap,
+ u16 *config, u32 val)
+{
+ if (val != CXL_ECS_RESET_COUNTER)
+ return -EINVAL;
+
+ *config &= ~CXL_ECS_RESET_COUNTER_MASK;
+ *config |= FIELD_PREP(CXL_ECS_RESET_COUNTER_MASK, val);
+
+ return 0;
+}
+
+#define CXL_ECS_SET_ATTR(attrb) \
+ static int cxl_ecs_set_##attrb(struct device *dev, void *drv_data, \
+ int fru_id, u32 val) \
+ { \
+ struct cxl_ecs_context *ctx = drv_data; \
+ u8 log_cap; \
+ u16 config; \
+ int ret; \
+ \
+ if (!capable(CAP_SYS_RAWIO)) \
+ return -EPERM; \
+ \
+ ret = cxl_mem_ecs_get_attrbs(dev, ctx, fru_id, &log_cap, \
+ &config); \
+ if (ret) \
+ return ret; \
+ \
+ ret = cxl_set_ecs_##attrb(dev, &log_cap, &config, val); \
+ if (ret) \
+ return ret; \
+ \
+ return cxl_mem_ecs_set_attrbs(dev, ctx, fru_id, log_cap, \
+ config); \
+ }
+CXL_ECS_SET_ATTR(log_entry_type)
+CXL_ECS_SET_ATTR(count_mode)
+CXL_ECS_SET_ATTR(reset_counter)
+CXL_ECS_SET_ATTR(threshold)
+
+static const struct edac_ecs_ops cxl_ecs_ops = {
+ .get_log_entry_type = cxl_ecs_get_log_entry_type,
+ .set_log_entry_type = cxl_ecs_set_log_entry_type,
+ .get_mode = cxl_ecs_get_count_mode,
+ .set_mode = cxl_ecs_set_count_mode,
+ .reset = cxl_ecs_set_reset_counter,
+ .get_threshold = cxl_ecs_get_threshold,
+ .set_threshold = cxl_ecs_set_threshold,
+};
+
+static int cxl_memdev_ecs_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature)
+{
+ struct cxl_ecs_context *cxl_ecs_ctx;
+ struct cxl_feat_entry *feat_entry;
+ int num_media_frus;
+
+ feat_entry =
+ cxl_feature_info(to_cxlfs(cxlmd->cxlds), &CXL_FEAT_ECS_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ num_media_frus = (le16_to_cpu(feat_entry->get_feat_size) -
+ sizeof(struct cxl_ecs_rd_attrbs)) /
+ sizeof(struct cxl_ecs_fru_rd_attrbs);
+ if (!num_media_frus)
+ return -EOPNOTSUPP;
+
+ cxl_ecs_ctx =
+ devm_kzalloc(&cxlmd->dev, sizeof(*cxl_ecs_ctx), GFP_KERNEL);
+ if (!cxl_ecs_ctx)
+ return -ENOMEM;
+
+ *cxl_ecs_ctx = (struct cxl_ecs_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .num_media_frus = num_media_frus,
+ .cxlmd = cxlmd,
+ };
+
+ ras_feature->ft_type = RAS_FEAT_ECS;
+ ras_feature->ecs_ops = &cxl_ecs_ops;
+ ras_feature->ctx = cxl_ecs_ctx;
+ ras_feature->ecs_info.num_media_frus = num_media_frus;
+
+ return 0;
+}
+
+/*
+ * Perform Maintenance CXL 3.2 Spec 8.2.10.7.1
+ */
+
+/*
+ * Perform Maintenance input payload
+ * CXL rev 3.2 section 8.2.10.7.1 Table 8-117
+ */
+struct cxl_mbox_maintenance_hdr {
+ u8 op_class;
+ u8 op_subclass;
+} __packed;
+
+static int cxl_perform_maintenance(struct cxl_mailbox *cxl_mbox, u8 class,
+ u8 subclass, void *data_in,
+ size_t data_in_size)
+{
+ struct cxl_memdev_maintenance_pi {
+ struct cxl_mbox_maintenance_hdr hdr;
+ u8 data[];
+ } __packed;
+ struct cxl_mbox_cmd mbox_cmd;
+ size_t hdr_size;
+
+ struct cxl_memdev_maintenance_pi *pi __free(kvfree) =
+ kvzalloc(cxl_mbox->payload_size, GFP_KERNEL);
+ if (!pi)
+ return -ENOMEM;
+
+ pi->hdr.op_class = class;
+ pi->hdr.op_subclass = subclass;
+ hdr_size = sizeof(pi->hdr);
+ /*
+ * Check minimum mbox payload size is available for
+ * the maintenance data transfer.
+ */
+ if (hdr_size + data_in_size > cxl_mbox->payload_size)
+ return -ENOMEM;
+
+ memcpy(pi->data, data_in, data_in_size);
+ mbox_cmd = (struct cxl_mbox_cmd){
+ .opcode = CXL_MBOX_OP_DO_MAINTENANCE,
+ .size_in = hdr_size + data_in_size,
+ .payload_in = pi,
+ };
+
+ return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
+}
+
+/*
+ * Support for finding a memory operation attributes
+ * are from the current boot or not.
+ */
+
+struct cxl_mem_err_rec {
+ struct xarray rec_gen_media;
+ struct xarray rec_dram;
+};
+
+enum cxl_mem_repair_type {
+ CXL_PPR,
+ CXL_CACHELINE_SPARING,
+ CXL_ROW_SPARING,
+ CXL_BANK_SPARING,
+ CXL_RANK_SPARING,
+ CXL_REPAIR_MAX,
+};
+
+/**
+ * struct cxl_mem_repair_attrbs - CXL memory repair attributes
+ * @dpa: DPA of memory to repair
+ * @nibble_mask: nibble mask, identifies one or more nibbles on the memory bus
+ * @row: row of memory to repair
+ * @column: column of memory to repair
+ * @channel: channel of memory to repair
+ * @sub_channel: sub channel of memory to repair
+ * @rank: rank of memory to repair
+ * @bank_group: bank group of memory to repair
+ * @bank: bank of memory to repair
+ * @repair_type: repair type. For eg. PPR, memory sparing etc.
+ */
+struct cxl_mem_repair_attrbs {
+ u64 dpa;
+ u32 nibble_mask;
+ u32 row;
+ u16 column;
+ u8 channel;
+ u8 sub_channel;
+ u8 rank;
+ u8 bank_group;
+ u8 bank;
+ enum cxl_mem_repair_type repair_type;
+};
+
+static struct cxl_event_gen_media *
+cxl_find_rec_gen_media(struct cxl_memdev *cxlmd,
+ struct cxl_mem_repair_attrbs *attrbs)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec;
+
+ if (!array_rec)
+ return NULL;
+
+ rec = xa_load(&array_rec->rec_gen_media, attrbs->dpa);
+ if (!rec)
+ return NULL;
+
+ if (attrbs->repair_type == CXL_PPR)
+ return rec;
+
+ return NULL;
+}
+
+static struct cxl_event_dram *
+cxl_find_rec_dram(struct cxl_memdev *cxlmd,
+ struct cxl_mem_repair_attrbs *attrbs)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_dram *rec;
+ u16 validity_flags;
+
+ if (!array_rec)
+ return NULL;
+
+ rec = xa_load(&array_rec->rec_dram, attrbs->dpa);
+ if (!rec)
+ return NULL;
+
+ validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
+ if (!(validity_flags & CXL_DER_VALID_CHANNEL) ||
+ !(validity_flags & CXL_DER_VALID_RANK))
+ return NULL;
+
+ switch (attrbs->repair_type) {
+ case CXL_PPR:
+ if (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) == attrbs->nibble_mask)
+ return rec;
+ break;
+ case CXL_CACHELINE_SPARING:
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
+ !(validity_flags & CXL_DER_VALID_BANK) ||
+ !(validity_flags & CXL_DER_VALID_ROW) ||
+ !(validity_flags & CXL_DER_VALID_COLUMN))
+ return NULL;
+
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ rec->bank_group == attrbs->bank_group &&
+ rec->bank == attrbs->bank &&
+ get_unaligned_le24(rec->row) == attrbs->row &&
+ get_unaligned_le16(rec->column) == attrbs->column &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask) &&
+ (!(validity_flags & CXL_DER_VALID_SUB_CHANNEL) ||
+ rec->sub_channel == attrbs->sub_channel))
+ return rec;
+ break;
+ case CXL_ROW_SPARING:
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
+ !(validity_flags & CXL_DER_VALID_BANK) ||
+ !(validity_flags & CXL_DER_VALID_ROW))
+ return NULL;
+
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ rec->bank_group == attrbs->bank_group &&
+ rec->bank == attrbs->bank &&
+ get_unaligned_le24(rec->row) == attrbs->row &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask))
+ return rec;
+ break;
+ case CXL_BANK_SPARING:
+ if (!(validity_flags & CXL_DER_VALID_BANK_GROUP) ||
+ !(validity_flags & CXL_DER_VALID_BANK))
+ return NULL;
+
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ rec->bank_group == attrbs->bank_group &&
+ rec->bank == attrbs->bank &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask))
+ return rec;
+ break;
+ case CXL_RANK_SPARING:
+ if (rec->media_hdr.channel == attrbs->channel &&
+ rec->media_hdr.rank == attrbs->rank &&
+ (!(validity_flags & CXL_DER_VALID_NIBBLE) ||
+ get_unaligned_le24(rec->nibble_mask) ==
+ attrbs->nibble_mask))
+ return rec;
+ break;
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#define CXL_MAX_STORAGE_DAYS 10
+#define CXL_MAX_STORAGE_TIME_SECS (CXL_MAX_STORAGE_DAYS * 24 * 60 * 60)
+
+static void cxl_del_expired_gmedia_recs(struct xarray *rec_xarray,
+ struct cxl_event_gen_media *cur_rec)
+{
+ u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
+ struct cxl_event_gen_media *rec;
+ unsigned long index;
+ u64 delta_ts_secs;
+
+ xa_for_each(rec_xarray, index, rec) {
+ delta_ts_secs = (cur_ts -
+ le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
+ if (delta_ts_secs >= CXL_MAX_STORAGE_TIME_SECS) {
+ xa_erase(rec_xarray, index);
+ kfree(rec);
+ }
+ }
+}
+
+static void cxl_del_expired_dram_recs(struct xarray *rec_xarray,
+ struct cxl_event_dram *cur_rec)
+{
+ u64 cur_ts = le64_to_cpu(cur_rec->media_hdr.hdr.timestamp);
+ struct cxl_event_dram *rec;
+ unsigned long index;
+ u64 delta_secs;
+
+ xa_for_each(rec_xarray, index, rec) {
+ delta_secs = (cur_ts -
+ le64_to_cpu(rec->media_hdr.hdr.timestamp)) / 1000000000ULL;
+ if (delta_secs >= CXL_MAX_STORAGE_TIME_SECS) {
+ xa_erase(rec_xarray, index);
+ kfree(rec);
+ }
+ }
+}
+
+#define CXL_MAX_REC_STORAGE_COUNT 200
+
+static void cxl_del_overflow_old_recs(struct xarray *rec_xarray)
+{
+ void *err_rec;
+ unsigned long index, count = 0;
+
+ xa_for_each(rec_xarray, index, err_rec)
+ count++;
+
+ if (count <= CXL_MAX_REC_STORAGE_COUNT)
+ return;
+
+ count -= CXL_MAX_REC_STORAGE_COUNT;
+ xa_for_each(rec_xarray, index, err_rec) {
+ xa_erase(rec_xarray, index);
+ kfree(err_rec);
+ count--;
+ if (!count)
+ break;
+ }
+}
+
+int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec;
+ void *old_rec;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
+ return 0;
+
+ rec = kmemdup(&evt->gen_media, sizeof(*rec), GFP_KERNEL);
+ if (!rec)
+ return -ENOMEM;
+
+ old_rec = xa_store(&array_rec->rec_gen_media,
+ le64_to_cpu(rec->media_hdr.phys_addr), rec,
+ GFP_KERNEL);
+ if (xa_is_err(old_rec)) {
+ kfree(rec);
+ return xa_err(old_rec);
+ }
+
+ kfree(old_rec);
+
+ cxl_del_expired_gmedia_recs(&array_rec->rec_gen_media, rec);
+ cxl_del_overflow_old_recs(&array_rec->rec_gen_media);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_store_rec_gen_media, "CXL");
+
+int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_dram *rec;
+ void *old_rec;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
+ return 0;
+
+ rec = kmemdup(&evt->dram, sizeof(*rec), GFP_KERNEL);
+ if (!rec)
+ return -ENOMEM;
+
+ old_rec = xa_store(&array_rec->rec_dram,
+ le64_to_cpu(rec->media_hdr.phys_addr), rec,
+ GFP_KERNEL);
+ if (xa_is_err(old_rec)) {
+ kfree(rec);
+ return xa_err(old_rec);
+ }
+
+ kfree(old_rec);
+
+ cxl_del_expired_dram_recs(&array_rec->rec_dram, rec);
+ cxl_del_overflow_old_recs(&array_rec->rec_dram);
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_store_rec_dram, "CXL");
+
+static bool cxl_is_memdev_memory_online(const struct cxl_memdev *cxlmd)
+{
+ struct cxl_port *port = cxlmd->endpoint;
+
+ if (port && cxl_num_decoders_committed(port))
+ return true;
+
+ return false;
+}
+
+/*
+ * CXL memory sparing control
+ */
+enum cxl_mem_sparing_granularity {
+ CXL_MEM_SPARING_CACHELINE,
+ CXL_MEM_SPARING_ROW,
+ CXL_MEM_SPARING_BANK,
+ CXL_MEM_SPARING_RANK,
+ CXL_MEM_SPARING_MAX
+};
+
+struct cxl_mem_sparing_context {
+ struct cxl_memdev *cxlmd;
+ uuid_t repair_uuid;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u16 effects;
+ u8 instance;
+ u8 get_version;
+ u8 set_version;
+ u8 op_class;
+ u8 op_subclass;
+ bool cap_safe_when_in_use;
+ bool cap_hard_sparing;
+ bool cap_soft_sparing;
+ u8 channel;
+ u8 rank;
+ u8 bank_group;
+ u32 nibble_mask;
+ u64 dpa;
+ u32 row;
+ u16 column;
+ u8 bank;
+ u8 sub_channel;
+ enum edac_mem_repair_type repair_type;
+ bool persist_mode;
+};
+
+#define CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK BIT(0)
+#define CXL_SPARING_RD_CAP_HARD_SPARING_MASK BIT(1)
+#define CXL_SPARING_RD_CAP_SOFT_SPARING_MASK BIT(2)
+
+#define CXL_SPARING_WR_DEVICE_INITIATED_MASK BIT(0)
+
+#define CXL_SPARING_QUERY_RESOURCE_FLAG BIT(0)
+#define CXL_SET_HARD_SPARING_FLAG BIT(1)
+#define CXL_SPARING_SUB_CHNL_VALID_FLAG BIT(2)
+#define CXL_SPARING_NIB_MASK_VALID_FLAG BIT(3)
+
+#define CXL_GET_SPARING_SAFE_IN_USE(flags) \
+ (FIELD_GET(CXL_SPARING_RD_CAP_SAFE_IN_USE_MASK, \
+ flags) ^ 1)
+#define CXL_GET_CAP_HARD_SPARING(flags) \
+ FIELD_GET(CXL_SPARING_RD_CAP_HARD_SPARING_MASK, \
+ flags)
+#define CXL_GET_CAP_SOFT_SPARING(flags) \
+ FIELD_GET(CXL_SPARING_RD_CAP_SOFT_SPARING_MASK, \
+ flags)
+
+#define CXL_SET_SPARING_QUERY_RESOURCE(val) \
+ FIELD_PREP(CXL_SPARING_QUERY_RESOURCE_FLAG, val)
+#define CXL_SET_HARD_SPARING(val) \
+ FIELD_PREP(CXL_SET_HARD_SPARING_FLAG, val)
+#define CXL_SET_SPARING_SUB_CHNL_VALID(val) \
+ FIELD_PREP(CXL_SPARING_SUB_CHNL_VALID_FLAG, val)
+#define CXL_SET_SPARING_NIB_MASK_VALID(val) \
+ FIELD_PREP(CXL_SPARING_NIB_MASK_VALID_FLAG, val)
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.7.2.3 Table 8-134 Memory Sparing Feature
+ * Readable Attributes.
+ */
+struct cxl_memdev_repair_rd_attrbs_hdr {
+ u8 max_op_latency;
+ __le16 op_cap;
+ __le16 op_mode;
+ u8 op_class;
+ u8 op_subclass;
+ u8 rsvd[9];
+} __packed;
+
+struct cxl_memdev_sparing_rd_attrbs {
+ struct cxl_memdev_repair_rd_attrbs_hdr hdr;
+ u8 rsvd;
+ __le16 restriction_flags;
+} __packed;
+
+/*
+ * See CXL spec rev 3.2 @8.2.10.7.1.4 Table 8-120 Memory Sparing Input Payload.
+ */
+struct cxl_memdev_sparing_in_payload {
+ u8 flags;
+ u8 channel;
+ u8 rank;
+ u8 nibble_mask[3];
+ u8 bank_group;
+ u8 bank;
+ u8 row[3];
+ __le16 column;
+ u8 sub_channel;
+} __packed;
+
+static int
+cxl_mem_sparing_get_attrbs(struct cxl_mem_sparing_context *cxl_sparing_ctx)
+{
+ size_t rd_data_size = sizeof(struct cxl_memdev_sparing_rd_attrbs);
+ struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ u16 restriction_flags;
+ size_t data_size;
+ u16 return_code;
+ struct cxl_memdev_sparing_rd_attrbs *rd_attrbs __free(kfree) =
+ kzalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &cxl_sparing_ctx->repair_uuid,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, &return_code);
+ if (!data_size)
+ return -EIO;
+
+ cxl_sparing_ctx->op_class = rd_attrbs->hdr.op_class;
+ cxl_sparing_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
+ restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
+ cxl_sparing_ctx->cap_safe_when_in_use =
+ CXL_GET_SPARING_SAFE_IN_USE(restriction_flags);
+ cxl_sparing_ctx->cap_hard_sparing =
+ CXL_GET_CAP_HARD_SPARING(restriction_flags);
+ cxl_sparing_ctx->cap_soft_sparing =
+ CXL_GET_CAP_SOFT_SPARING(restriction_flags);
+
+ return 0;
+}
+
+static struct cxl_event_dram *
+cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
+ struct cxl_mem_sparing_context *ctx)
+{
+ struct cxl_mem_repair_attrbs attrbs = { 0 };
+
+ attrbs.dpa = ctx->dpa;
+ attrbs.channel = ctx->channel;
+ attrbs.rank = ctx->rank;
+ attrbs.nibble_mask = ctx->nibble_mask;
+ switch (ctx->repair_type) {
+ case EDAC_REPAIR_CACHELINE_SPARING:
+ attrbs.repair_type = CXL_CACHELINE_SPARING;
+ attrbs.bank_group = ctx->bank_group;
+ attrbs.bank = ctx->bank;
+ attrbs.row = ctx->row;
+ attrbs.column = ctx->column;
+ attrbs.sub_channel = ctx->sub_channel;
+ break;
+ case EDAC_REPAIR_ROW_SPARING:
+ attrbs.repair_type = CXL_ROW_SPARING;
+ attrbs.bank_group = ctx->bank_group;
+ attrbs.bank = ctx->bank;
+ attrbs.row = ctx->row;
+ break;
+ case EDAC_REPAIR_BANK_SPARING:
+ attrbs.repair_type = CXL_BANK_SPARING;
+ attrbs.bank_group = ctx->bank_group;
+ attrbs.bank = ctx->bank;
+ break;
+ case EDAC_REPAIR_RANK_SPARING:
+ attrbs.repair_type = CXL_RANK_SPARING;
+ break;
+ default:
+ return NULL;
+ }
+
+ return cxl_find_rec_dram(cxlmd, &attrbs);
+}
+
+static int
+cxl_mem_perform_sparing(struct device *dev,
+ struct cxl_mem_sparing_context *cxl_sparing_ctx)
+{
+ struct cxl_memdev *cxlmd = cxl_sparing_ctx->cxlmd;
+ struct cxl_memdev_sparing_in_payload sparing_pi;
+ struct cxl_event_dram *rec = NULL;
+ u16 validity_flags = 0;
+ int ret;
+
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((ret = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+ return ret;
+
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+ return ret;
+
+ if (!cxl_sparing_ctx->cap_safe_when_in_use) {
+ /* Memory to repair must be offline */
+ if (cxl_is_memdev_memory_online(cxlmd))
+ return -EBUSY;
+ } else {
+ if (cxl_is_memdev_memory_online(cxlmd)) {
+ rec = cxl_mem_get_rec_dram(cxlmd, cxl_sparing_ctx);
+ if (!rec)
+ return -EINVAL;
+
+ if (!get_unaligned_le16(rec->media_hdr.validity_flags))
+ return -EINVAL;
+ }
+ }
+
+ memset(&sparing_pi, 0, sizeof(sparing_pi));
+ sparing_pi.flags = CXL_SET_SPARING_QUERY_RESOURCE(0);
+ if (cxl_sparing_ctx->persist_mode)
+ sparing_pi.flags |= CXL_SET_HARD_SPARING(1);
+
+ if (rec)
+ validity_flags = get_unaligned_le16(rec->media_hdr.validity_flags);
+
+ switch (cxl_sparing_ctx->repair_type) {
+ case EDAC_REPAIR_CACHELINE_SPARING:
+ sparing_pi.column = cpu_to_le16(cxl_sparing_ctx->column);
+ if (!rec || (validity_flags & CXL_DER_VALID_SUB_CHANNEL)) {
+ sparing_pi.flags |= CXL_SET_SPARING_SUB_CHNL_VALID(1);
+ sparing_pi.sub_channel = cxl_sparing_ctx->sub_channel;
+ }
+ fallthrough;
+ case EDAC_REPAIR_ROW_SPARING:
+ put_unaligned_le24(cxl_sparing_ctx->row, sparing_pi.row);
+ fallthrough;
+ case EDAC_REPAIR_BANK_SPARING:
+ sparing_pi.bank_group = cxl_sparing_ctx->bank_group;
+ sparing_pi.bank = cxl_sparing_ctx->bank;
+ fallthrough;
+ case EDAC_REPAIR_RANK_SPARING:
+ sparing_pi.rank = cxl_sparing_ctx->rank;
+ fallthrough;
+ default:
+ sparing_pi.channel = cxl_sparing_ctx->channel;
+ if ((rec && (validity_flags & CXL_DER_VALID_NIBBLE)) ||
+ (!rec && (!cxl_sparing_ctx->nibble_mask ||
+ (cxl_sparing_ctx->nibble_mask & 0xFFFFFF)))) {
+ sparing_pi.flags |= CXL_SET_SPARING_NIB_MASK_VALID(1);
+ put_unaligned_le24(cxl_sparing_ctx->nibble_mask,
+ sparing_pi.nibble_mask);
+ }
+ break;
+ }
+
+ return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
+ cxl_sparing_ctx->op_class,
+ cxl_sparing_ctx->op_subclass,
+ &sparing_pi, sizeof(sparing_pi));
+}
+
+static int cxl_mem_sparing_get_repair_type(struct device *dev, void *drv_data,
+ const char **repair_type)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ switch (ctx->repair_type) {
+ case EDAC_REPAIR_CACHELINE_SPARING:
+ case EDAC_REPAIR_ROW_SPARING:
+ case EDAC_REPAIR_BANK_SPARING:
+ case EDAC_REPAIR_RANK_SPARING:
+ *repair_type = edac_repair_type[ctx->repair_type];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#define CXL_SPARING_GET_ATTR(attrb, data_type) \
+ static int cxl_mem_sparing_get_##attrb( \
+ struct device *dev, void *drv_data, data_type *val) \
+ { \
+ struct cxl_mem_sparing_context *ctx = drv_data; \
+ \
+ *val = ctx->attrb; \
+ \
+ return 0; \
+ }
+CXL_SPARING_GET_ATTR(persist_mode, bool)
+CXL_SPARING_GET_ATTR(dpa, u64)
+CXL_SPARING_GET_ATTR(nibble_mask, u32)
+CXL_SPARING_GET_ATTR(bank_group, u32)
+CXL_SPARING_GET_ATTR(bank, u32)
+CXL_SPARING_GET_ATTR(rank, u32)
+CXL_SPARING_GET_ATTR(row, u32)
+CXL_SPARING_GET_ATTR(column, u32)
+CXL_SPARING_GET_ATTR(channel, u32)
+CXL_SPARING_GET_ATTR(sub_channel, u32)
+
+#define CXL_SPARING_SET_ATTR(attrb, data_type) \
+ static int cxl_mem_sparing_set_##attrb(struct device *dev, \
+ void *drv_data, data_type val) \
+ { \
+ struct cxl_mem_sparing_context *ctx = drv_data; \
+ \
+ ctx->attrb = val; \
+ \
+ return 0; \
+ }
+CXL_SPARING_SET_ATTR(nibble_mask, u32)
+CXL_SPARING_SET_ATTR(bank_group, u32)
+CXL_SPARING_SET_ATTR(bank, u32)
+CXL_SPARING_SET_ATTR(rank, u32)
+CXL_SPARING_SET_ATTR(row, u32)
+CXL_SPARING_SET_ATTR(column, u32)
+CXL_SPARING_SET_ATTR(channel, u32)
+CXL_SPARING_SET_ATTR(sub_channel, u32)
+
+static int cxl_mem_sparing_set_persist_mode(struct device *dev, void *drv_data,
+ bool persist_mode)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ if ((persist_mode && ctx->cap_hard_sparing) ||
+ (!persist_mode && ctx->cap_soft_sparing))
+ ctx->persist_mode = persist_mode;
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int cxl_get_mem_sparing_safe_when_in_use(struct device *dev,
+ void *drv_data, bool *safe)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ *safe = ctx->cap_safe_when_in_use;
+
+ return 0;
+}
+
+static int cxl_mem_sparing_get_min_dpa(struct device *dev, void *drv_data,
+ u64 *min_dpa)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+ struct cxl_memdev *cxlmd = ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *min_dpa = cxlds->dpa_res.start;
+
+ return 0;
+}
+
+static int cxl_mem_sparing_get_max_dpa(struct device *dev, void *drv_data,
+ u64 *max_dpa)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+ struct cxl_memdev *cxlmd = ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *max_dpa = cxlds->dpa_res.end;
+
+ return 0;
+}
+
+static int cxl_mem_sparing_set_dpa(struct device *dev, void *drv_data, u64 dpa)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+ struct cxl_memdev *cxlmd = ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa))
+ return -EINVAL;
+
+ ctx->dpa = dpa;
+
+ return 0;
+}
+
+static int cxl_do_mem_sparing(struct device *dev, void *drv_data, u32 val)
+{
+ struct cxl_mem_sparing_context *ctx = drv_data;
+
+ if (val != EDAC_DO_MEM_REPAIR)
+ return -EINVAL;
+
+ return cxl_mem_perform_sparing(dev, ctx);
+}
+
+#define RANK_OPS \
+ .get_repair_type = cxl_mem_sparing_get_repair_type, \
+ .get_persist_mode = cxl_mem_sparing_get_persist_mode, \
+ .set_persist_mode = cxl_mem_sparing_set_persist_mode, \
+ .get_repair_safe_when_in_use = cxl_get_mem_sparing_safe_when_in_use, \
+ .get_min_dpa = cxl_mem_sparing_get_min_dpa, \
+ .get_max_dpa = cxl_mem_sparing_get_max_dpa, \
+ .get_dpa = cxl_mem_sparing_get_dpa, \
+ .set_dpa = cxl_mem_sparing_set_dpa, \
+ .get_nibble_mask = cxl_mem_sparing_get_nibble_mask, \
+ .set_nibble_mask = cxl_mem_sparing_set_nibble_mask, \
+ .get_rank = cxl_mem_sparing_get_rank, \
+ .set_rank = cxl_mem_sparing_set_rank, \
+ .get_channel = cxl_mem_sparing_get_channel, \
+ .set_channel = cxl_mem_sparing_set_channel, \
+ .do_repair = cxl_do_mem_sparing
+
+#define BANK_OPS \
+ RANK_OPS, .get_bank_group = cxl_mem_sparing_get_bank_group, \
+ .set_bank_group = cxl_mem_sparing_set_bank_group, \
+ .get_bank = cxl_mem_sparing_get_bank, \
+ .set_bank = cxl_mem_sparing_set_bank
+
+#define ROW_OPS \
+ BANK_OPS, .get_row = cxl_mem_sparing_get_row, \
+ .set_row = cxl_mem_sparing_set_row
+
+#define CACHELINE_OPS \
+ ROW_OPS, .get_column = cxl_mem_sparing_get_column, \
+ .set_column = cxl_mem_sparing_set_column, \
+ .get_sub_channel = cxl_mem_sparing_get_sub_channel, \
+ .set_sub_channel = cxl_mem_sparing_set_sub_channel
+
+static const struct edac_mem_repair_ops cxl_rank_sparing_ops = {
+ RANK_OPS,
+};
+
+static const struct edac_mem_repair_ops cxl_bank_sparing_ops = {
+ BANK_OPS,
+};
+
+static const struct edac_mem_repair_ops cxl_row_sparing_ops = {
+ ROW_OPS,
+};
+
+static const struct edac_mem_repair_ops cxl_cacheline_sparing_ops = {
+ CACHELINE_OPS,
+};
+
+struct cxl_mem_sparing_desc {
+ const uuid_t repair_uuid;
+ enum edac_mem_repair_type repair_type;
+ const struct edac_mem_repair_ops *repair_ops;
+};
+
+static const struct cxl_mem_sparing_desc mem_sparing_desc[] = {
+ {
+ .repair_uuid = CXL_FEAT_CACHELINE_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_CACHELINE_SPARING,
+ .repair_ops = &cxl_cacheline_sparing_ops,
+ },
+ {
+ .repair_uuid = CXL_FEAT_ROW_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_ROW_SPARING,
+ .repair_ops = &cxl_row_sparing_ops,
+ },
+ {
+ .repair_uuid = CXL_FEAT_BANK_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_BANK_SPARING,
+ .repair_ops = &cxl_bank_sparing_ops,
+ },
+ {
+ .repair_uuid = CXL_FEAT_RANK_SPARING_UUID,
+ .repair_type = EDAC_REPAIR_RANK_SPARING,
+ .repair_ops = &cxl_rank_sparing_ops,
+ },
+};
+
+static int cxl_memdev_sparing_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ const struct cxl_mem_sparing_desc *desc,
+ u8 repair_inst)
+{
+ struct cxl_mem_sparing_context *cxl_sparing_ctx;
+ struct cxl_feat_entry *feat_entry;
+ int ret;
+
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &desc->repair_uuid);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ cxl_sparing_ctx = devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sparing_ctx),
+ GFP_KERNEL);
+ if (!cxl_sparing_ctx)
+ return -ENOMEM;
+
+ *cxl_sparing_ctx = (struct cxl_mem_sparing_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .cxlmd = cxlmd,
+ .repair_type = desc->repair_type,
+ .instance = repair_inst++,
+ };
+ uuid_copy(&cxl_sparing_ctx->repair_uuid, &desc->repair_uuid);
+
+ ret = cxl_mem_sparing_get_attrbs(cxl_sparing_ctx);
+ if (ret)
+ return ret;
+
+ if ((cxl_sparing_ctx->cap_soft_sparing &&
+ cxl_sparing_ctx->cap_hard_sparing) ||
+ cxl_sparing_ctx->cap_soft_sparing)
+ cxl_sparing_ctx->persist_mode = 0;
+ else if (cxl_sparing_ctx->cap_hard_sparing)
+ cxl_sparing_ctx->persist_mode = 1;
+ else
+ return -EOPNOTSUPP;
+
+ ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
+ ras_feature->instance = cxl_sparing_ctx->instance;
+ ras_feature->mem_repair_ops = desc->repair_ops;
+ ras_feature->ctx = cxl_sparing_ctx;
+
+ return 0;
+}
+
+/*
+ * CXL memory soft PPR & hard PPR control
+ */
+struct cxl_ppr_context {
+ uuid_t repair_uuid;
+ u8 instance;
+ u16 get_feat_size;
+ u16 set_feat_size;
+ u8 get_version;
+ u8 set_version;
+ u16 effects;
+ u8 op_class;
+ u8 op_subclass;
+ bool cap_dpa;
+ bool cap_nib_mask;
+ bool media_accessible;
+ bool data_retained;
+ struct cxl_memdev *cxlmd;
+ enum edac_mem_repair_type repair_type;
+ bool persist_mode;
+ u64 dpa;
+ u32 nibble_mask;
+};
+
+/*
+ * See CXL rev 3.2 @8.2.10.7.2.1 Table 8-128 sPPR Feature Readable Attributes
+ *
+ * See CXL rev 3.2 @8.2.10.7.2.2 Table 8-131 hPPR Feature Readable Attributes
+ */
+
+#define CXL_PPR_OP_CAP_DEVICE_INITIATED BIT(0)
+#define CXL_PPR_OP_MODE_DEV_INITIATED BIT(0)
+
+#define CXL_PPR_FLAG_DPA_SUPPORT_MASK BIT(0)
+#define CXL_PPR_FLAG_NIB_SUPPORT_MASK BIT(1)
+#define CXL_PPR_FLAG_MEM_SPARING_EV_REC_SUPPORT_MASK BIT(2)
+#define CXL_PPR_FLAG_DEV_INITED_PPR_AT_BOOT_CAP_MASK BIT(3)
+
+#define CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK BIT(0)
+#define CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK BIT(2)
+
+#define CXL_PPR_SPARING_EV_REC_EN_MASK BIT(0)
+#define CXL_PPR_DEV_INITED_PPR_AT_BOOT_EN_MASK BIT(1)
+
+#define CXL_PPR_GET_CAP_DPA(flags) \
+ FIELD_GET(CXL_PPR_FLAG_DPA_SUPPORT_MASK, flags)
+#define CXL_PPR_GET_CAP_NIB_MASK(flags) \
+ FIELD_GET(CXL_PPR_FLAG_NIB_SUPPORT_MASK, flags)
+#define CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags) \
+ (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_MEDIA_ACCESSIBLE_MASK, \
+ restriction_flags) ^ 1)
+#define CXL_PPR_GET_DATA_RETAINED(restriction_flags) \
+ (FIELD_GET(CXL_PPR_RESTRICTION_FLAG_DATA_RETAINED_MASK, \
+ restriction_flags) ^ 1)
+
+struct cxl_memdev_ppr_rd_attrbs {
+ struct cxl_memdev_repair_rd_attrbs_hdr hdr;
+ u8 ppr_flags;
+ __le16 restriction_flags;
+ u8 ppr_op_mode;
+} __packed;
+
+/*
+ * See CXL rev 3.2 @8.2.10.7.1.2 Table 8-118 sPPR Maintenance Input Payload
+ *
+ * See CXL rev 3.2 @8.2.10.7.1.3 Table 8-119 hPPR Maintenance Input Payload
+ */
+struct cxl_memdev_ppr_maintenance_attrbs {
+ u8 flags;
+ __le64 dpa;
+ u8 nibble_mask[3];
+} __packed;
+
+static int cxl_mem_ppr_get_attrbs(struct cxl_ppr_context *cxl_ppr_ctx)
+{
+ size_t rd_data_size = sizeof(struct cxl_memdev_ppr_rd_attrbs);
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
+ u16 restriction_flags;
+ size_t data_size;
+ u16 return_code;
+
+ struct cxl_memdev_ppr_rd_attrbs *rd_attrbs __free(kfree) =
+ kmalloc(rd_data_size, GFP_KERNEL);
+ if (!rd_attrbs)
+ return -ENOMEM;
+
+ data_size = cxl_get_feature(cxl_mbox, &cxl_ppr_ctx->repair_uuid,
+ CXL_GET_FEAT_SEL_CURRENT_VALUE, rd_attrbs,
+ rd_data_size, 0, &return_code);
+ if (!data_size)
+ return -EIO;
+
+ cxl_ppr_ctx->op_class = rd_attrbs->hdr.op_class;
+ cxl_ppr_ctx->op_subclass = rd_attrbs->hdr.op_subclass;
+ cxl_ppr_ctx->cap_dpa = CXL_PPR_GET_CAP_DPA(rd_attrbs->ppr_flags);
+ cxl_ppr_ctx->cap_nib_mask =
+ CXL_PPR_GET_CAP_NIB_MASK(rd_attrbs->ppr_flags);
+
+ restriction_flags = le16_to_cpu(rd_attrbs->restriction_flags);
+ cxl_ppr_ctx->media_accessible =
+ CXL_PPR_GET_MEDIA_ACCESSIBLE(restriction_flags);
+ cxl_ppr_ctx->data_retained =
+ CXL_PPR_GET_DATA_RETAINED(restriction_flags);
+
+ return 0;
+}
+
+static int cxl_mem_perform_ppr(struct cxl_ppr_context *cxl_ppr_ctx)
+{
+ struct cxl_memdev_ppr_maintenance_attrbs maintenance_attrbs;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_mem_repair_attrbs attrbs = { 0 };
+ int ret;
+
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((ret = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+ return ret;
+
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((ret = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+ return ret;
+
+ if (!cxl_ppr_ctx->media_accessible || !cxl_ppr_ctx->data_retained) {
+ /* Memory to repair must be offline */
+ if (cxl_is_memdev_memory_online(cxlmd))
+ return -EBUSY;
+ } else {
+ if (cxl_is_memdev_memory_online(cxlmd)) {
+ /* Check memory to repair is from the current boot */
+ attrbs.repair_type = CXL_PPR;
+ attrbs.dpa = cxl_ppr_ctx->dpa;
+ attrbs.nibble_mask = cxl_ppr_ctx->nibble_mask;
+ if (!cxl_find_rec_dram(cxlmd, &attrbs) &&
+ !cxl_find_rec_gen_media(cxlmd, &attrbs))
+ return -EINVAL;
+ }
+ }
+
+ memset(&maintenance_attrbs, 0, sizeof(maintenance_attrbs));
+ maintenance_attrbs.flags = 0;
+ maintenance_attrbs.dpa = cpu_to_le64(cxl_ppr_ctx->dpa);
+ put_unaligned_le24(cxl_ppr_ctx->nibble_mask,
+ maintenance_attrbs.nibble_mask);
+
+ return cxl_perform_maintenance(&cxlmd->cxlds->cxl_mbox,
+ cxl_ppr_ctx->op_class,
+ cxl_ppr_ctx->op_subclass,
+ &maintenance_attrbs,
+ sizeof(maintenance_attrbs));
+}
+
+static int cxl_ppr_get_repair_type(struct device *dev, void *drv_data,
+ const char **repair_type)
+{
+ *repair_type = edac_repair_type[EDAC_REPAIR_PPR];
+
+ return 0;
+}
+
+static int cxl_ppr_get_persist_mode(struct device *dev, void *drv_data,
+ bool *persist_mode)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *persist_mode = cxl_ppr_ctx->persist_mode;
+
+ return 0;
+}
+
+static int cxl_get_ppr_safe_when_in_use(struct device *dev, void *drv_data,
+ bool *safe)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *safe = cxl_ppr_ctx->media_accessible & cxl_ppr_ctx->data_retained;
+
+ return 0;
+}
+
+static int cxl_ppr_get_min_dpa(struct device *dev, void *drv_data, u64 *min_dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *min_dpa = cxlds->dpa_res.start;
+
+ return 0;
+}
+
+static int cxl_ppr_get_max_dpa(struct device *dev, void *drv_data, u64 *max_dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ *max_dpa = cxlds->dpa_res.end;
+
+ return 0;
+}
+
+static int cxl_ppr_get_dpa(struct device *dev, void *drv_data, u64 *dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *dpa = cxl_ppr_ctx->dpa;
+
+ return 0;
+}
+
+static int cxl_ppr_set_dpa(struct device *dev, void *drv_data, u64 dpa)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa))
+ return -EINVAL;
+
+ cxl_ppr_ctx->dpa = dpa;
+
+ return 0;
+}
+
+static int cxl_ppr_get_nibble_mask(struct device *dev, void *drv_data,
+ u32 *nibble_mask)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ *nibble_mask = cxl_ppr_ctx->nibble_mask;
+
+ return 0;
+}
+
+static int cxl_ppr_set_nibble_mask(struct device *dev, void *drv_data,
+ u32 nibble_mask)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+
+ cxl_ppr_ctx->nibble_mask = nibble_mask;
+
+ return 0;
+}
+
+static int cxl_do_ppr(struct device *dev, void *drv_data, u32 val)
+{
+ struct cxl_ppr_context *cxl_ppr_ctx = drv_data;
+ struct cxl_memdev *cxlmd = cxl_ppr_ctx->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ if (val != EDAC_DO_MEM_REPAIR ||
+ !cxl_resource_contains_addr(&cxlds->dpa_res, cxl_ppr_ctx->dpa))
+ return -EINVAL;
+
+ return cxl_mem_perform_ppr(cxl_ppr_ctx);
+}
+
+static const struct edac_mem_repair_ops cxl_sppr_ops = {
+ .get_repair_type = cxl_ppr_get_repair_type,
+ .get_persist_mode = cxl_ppr_get_persist_mode,
+ .get_repair_safe_when_in_use = cxl_get_ppr_safe_when_in_use,
+ .get_min_dpa = cxl_ppr_get_min_dpa,
+ .get_max_dpa = cxl_ppr_get_max_dpa,
+ .get_dpa = cxl_ppr_get_dpa,
+ .set_dpa = cxl_ppr_set_dpa,
+ .get_nibble_mask = cxl_ppr_get_nibble_mask,
+ .set_nibble_mask = cxl_ppr_set_nibble_mask,
+ .do_repair = cxl_do_ppr,
+};
+
+static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
+ struct edac_dev_feature *ras_feature,
+ u8 repair_inst)
+{
+ struct cxl_ppr_context *cxl_sppr_ctx;
+ struct cxl_feat_entry *feat_entry;
+ int ret;
+
+ feat_entry = cxl_feature_info(to_cxlfs(cxlmd->cxlds),
+ &CXL_FEAT_SPPR_UUID);
+ if (IS_ERR(feat_entry))
+ return -EOPNOTSUPP;
+
+ if (!(le32_to_cpu(feat_entry->flags) & CXL_FEATURE_F_CHANGEABLE))
+ return -EOPNOTSUPP;
+
+ cxl_sppr_ctx =
+ devm_kzalloc(&cxlmd->dev, sizeof(*cxl_sppr_ctx), GFP_KERNEL);
+ if (!cxl_sppr_ctx)
+ return -ENOMEM;
+
+ *cxl_sppr_ctx = (struct cxl_ppr_context){
+ .get_feat_size = le16_to_cpu(feat_entry->get_feat_size),
+ .set_feat_size = le16_to_cpu(feat_entry->set_feat_size),
+ .get_version = feat_entry->get_feat_ver,
+ .set_version = feat_entry->set_feat_ver,
+ .effects = le16_to_cpu(feat_entry->effects),
+ .cxlmd = cxlmd,
+ .repair_type = EDAC_REPAIR_PPR,
+ .persist_mode = 0,
+ .instance = repair_inst,
+ };
+ uuid_copy(&cxl_sppr_ctx->repair_uuid, &CXL_FEAT_SPPR_UUID);
+
+ ret = cxl_mem_ppr_get_attrbs(cxl_sppr_ctx);
+ if (ret)
+ return ret;
+
+ ras_feature->ft_type = RAS_FEAT_MEM_REPAIR;
+ ras_feature->instance = cxl_sppr_ctx->instance;
+ ras_feature->mem_repair_ops = &cxl_sppr_ops;
+ ras_feature->ctx = cxl_sppr_ctx;
+
+ return 0;
+}
+
+int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
+{
+ struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
+ int num_ras_features = 0;
+ u8 repair_inst = 0;
+ int rc;
+
+ if (IS_ENABLED(CONFIG_CXL_EDAC_SCRUB)) {
+ rc = cxl_memdev_scrub_init(cxlmd, &ras_features[num_ras_features], 0);
+ if (rc < 0 && rc != -EOPNOTSUPP)
+ return rc;
+
+ if (rc != -EOPNOTSUPP)
+ num_ras_features++;
+ }
+
+ if (IS_ENABLED(CONFIG_CXL_EDAC_ECS)) {
+ rc = cxl_memdev_ecs_init(cxlmd, &ras_features[num_ras_features]);
+ if (rc < 0 && rc != -EOPNOTSUPP)
+ return rc;
+
+ if (rc != -EOPNOTSUPP)
+ num_ras_features++;
+ }
+
+ if (IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR)) {
+ for (int i = 0; i < CXL_MEM_SPARING_MAX; i++) {
+ rc = cxl_memdev_sparing_init(cxlmd,
+ &ras_features[num_ras_features],
+ &mem_sparing_desc[i], repair_inst);
+ if (rc == -EOPNOTSUPP)
+ continue;
+ if (rc < 0)
+ return rc;
+
+ repair_inst++;
+ num_ras_features++;
+ }
+
+ rc = cxl_memdev_soft_ppr_init(cxlmd, &ras_features[num_ras_features],
+ repair_inst);
+ if (rc < 0 && rc != -EOPNOTSUPP)
+ return rc;
+
+ if (rc != -EOPNOTSUPP) {
+ repair_inst++;
+ num_ras_features++;
+ }
+
+ if (repair_inst) {
+ struct cxl_mem_err_rec *array_rec =
+ devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
+ GFP_KERNEL);
+ if (!array_rec)
+ return -ENOMEM;
+
+ xa_init(&array_rec->rec_gen_media);
+ xa_init(&array_rec->rec_dram);
+ cxlmd->err_rec_array = array_rec;
+ }
+ }
+
+ if (!num_ras_features)
+ return -EINVAL;
+
+ char *cxl_dev_name __free(kfree) =
+ kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlmd->dev));
+ if (!cxl_dev_name)
+ return -ENOMEM;
+
+ return edac_dev_register(&cxlmd->dev, cxl_dev_name, NULL,
+ num_ras_features, ras_features);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_register, "CXL");
+
+int devm_cxl_region_edac_register(struct cxl_region *cxlr)
+{
+ struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
+ int num_ras_features = 0;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_SCRUB))
+ return 0;
+
+ rc = cxl_region_scrub_init(cxlr, &ras_features[num_ras_features], 0);
+ if (rc < 0)
+ return rc;
+
+ num_ras_features++;
+
+ char *cxl_dev_name __free(kfree) =
+ kasprintf(GFP_KERNEL, "cxl_%s", dev_name(&cxlr->dev));
+ if (!cxl_dev_name)
+ return -ENOMEM;
+
+ return edac_dev_register(&cxlr->dev, cxl_dev_name, NULL,
+ num_ras_features, ras_features);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");
+
+void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
+{
+ struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+ struct cxl_event_gen_media *rec_gen_media;
+ struct cxl_event_dram *rec_dram;
+ unsigned long index;
+
+ if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
+ return;
+
+ xa_for_each(&array_rec->rec_dram, index, rec_dram)
+ kfree(rec_dram);
+ xa_destroy(&array_rec->rec_dram);
+
+ xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
+ kfree(rec_gen_media);
+ xa_destroy(&array_rec->rec_gen_media);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c
index 1498e2369c37..7c750599ea69 100644
--- a/drivers/cxl/core/features.c
+++ b/drivers/cxl/core/features.c
@@ -9,6 +9,16 @@
#include "core.h"
#include "cxlmem.h"
+/**
+ * DOC: cxl features
+ *
+ * CXL Features:
+ * A CXL device that includes a mailbox supports commands that allows
+ * listing, getting, and setting of optionally defined features such
+ * as memory sparing or post package sparing. Vendors may define custom
+ * features for the device.
+ */
+
/* All the features below are exclusive to the kernel */
static const uuid_t cxl_exclusive_feats[] = {
CXL_FEAT_PATROL_SCRUB_UUID,
@@ -36,7 +46,7 @@ static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry)
return is_cxl_feature_exclusive_by_uuid(&entry->uuid);
}
-inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
+struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
{
return cxlds->cxlfs;
}
@@ -355,17 +365,11 @@ static void cxlctl_close_uctx(struct fwctl_uctx *uctx)
{
}
-static struct cxl_feat_entry *
-get_support_feature_info(struct cxl_features_state *cxlfs,
- const struct fwctl_rpc_cxl *rpc_in)
+struct cxl_feat_entry *
+cxl_feature_info(struct cxl_features_state *cxlfs,
+ const uuid_t *uuid)
{
struct cxl_feat_entry *feat;
- const uuid_t *uuid;
-
- if (rpc_in->op_size < sizeof(uuid))
- return ERR_PTR(-EINVAL);
-
- uuid = &rpc_in->set_feat_in.uuid;
for (int i = 0; i < cxlfs->entries->num_features; i++) {
feat = &cxlfs->entries->ent[i];
@@ -416,14 +420,6 @@ static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs,
rpc_out->size = struct_size(feat_out, ents, requested);
feat_out = &rpc_out->get_sup_feats_out;
- if (requested == 0) {
- feat_out->num_entries = cpu_to_le16(requested);
- feat_out->supported_feats =
- cpu_to_le16(cxlfs->entries->num_features);
- rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
- *out_len = out_size;
- return no_free_ptr(rpc_out);
- }
for (i = start, pos = &feat_out->ents[0];
i < cxlfs->entries->num_features; i++, pos++) {
@@ -547,7 +543,10 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
struct cxl_feat_entry *feat;
u32 flags;
- feat = get_support_feature_info(cxlfs, rpc_in);
+ if (rpc_in->op_size < sizeof(uuid_t))
+ return false;
+
+ feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid);
if (IS_ERR(feat))
return false;
@@ -614,11 +613,7 @@ static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs,
switch (opcode) {
case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
case CXL_MBOX_OP_GET_FEATURE:
- if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
- return false;
- if (scope >= FWCTL_RPC_CONFIGURATION)
- return true;
- return false;
+ return cxl_mbox->feat_cap >= CXL_FEATURES_RO;
case CXL_MBOX_OP_SET_FEATURE:
if (cxl_mbox->feat_cap < CXL_FEATURES_RW)
return false;
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 70cae4ebf8a4..e9e1d555cec6 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -16,7 +16,10 @@
* for enumerating these registers and capabilities.
*/
-DECLARE_RWSEM(cxl_dpa_rwsem);
+struct cxl_rwsem cxl_rwsem = {
+ .region = __RWSEM_INITIALIZER(cxl_rwsem.region),
+ .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa),
+};
static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
int *target_map)
@@ -34,7 +37,8 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
if (rc)
return rc;
- dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+ dev_dbg(port->uport_dev, "%s added to %s\n",
+ dev_name(&cxld->dev), dev_name(&port->dev));
return 0;
}
@@ -213,7 +217,7 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
{
struct resource *p1, *p2;
- guard(rwsem_read)(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.dpa);
for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) {
__cxl_dpa_debug(file, p1, 0);
for (p2 = p1->child; p2; p2 = p2->sibling)
@@ -265,7 +269,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
struct resource *res = cxled->dpa_res;
resource_size_t skip_start;
- lockdep_assert_held_write(&cxl_dpa_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.dpa);
/* save @skip_start, before @res is released */
skip_start = res->start - cxled->skip;
@@ -280,7 +284,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
static void cxl_dpa_release(void *cxled)
{
- guard(rwsem_write)(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.dpa);
__cxl_dpa_release(cxled);
}
@@ -292,7 +296,7 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = cxled_to_port(cxled);
- lockdep_assert_held_write(&cxl_dpa_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.dpa);
devm_remove_action(&port->dev, cxl_dpa_release, cxled);
__cxl_dpa_release(cxled);
}
@@ -360,7 +364,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
struct resource *res;
int rc;
- lockdep_assert_held_write(&cxl_dpa_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.dpa);
if (!len) {
dev_warn(dev, "decoder%d.%d: empty reservation attempted\n",
@@ -469,7 +473,7 @@ int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info)
{
struct device *dev = cxlds->dev;
- guard(rwsem_write)(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.dpa);
if (cxlds->nr_partitions)
return -EBUSY;
@@ -515,9 +519,8 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
struct cxl_port *port = cxled_to_port(cxled);
int rc;
- down_write(&cxl_dpa_rwsem);
- rc = __cxl_dpa_reserve(cxled, base, len, skipped);
- up_write(&cxl_dpa_rwsem);
+ scoped_guard(rwsem_write, &cxl_rwsem.dpa)
+ rc = __cxl_dpa_reserve(cxled, base, len, skipped);
if (rc)
return rc;
@@ -528,7 +531,7 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL");
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
- guard(rwsem_read)(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.dpa);
if (cxled->dpa_res)
return resource_size(cxled->dpa_res);
@@ -539,19 +542,26 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
{
resource_size_t base = -1;
- lockdep_assert_held(&cxl_dpa_rwsem);
+ lockdep_assert_held(&cxl_rwsem.dpa);
if (cxled->dpa_res)
base = cxled->dpa_res->start;
return base;
}
+bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr)
+{
+ struct resource _addr = DEFINE_RES_MEM(addr, 1);
+
+ return resource_contains(res, &_addr);
+}
+
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = cxled_to_port(cxled);
struct device *dev = &cxled->cxld.dev;
- guard(rwsem_write)(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.dpa);
if (!cxled->dpa_res)
return 0;
if (cxled->cxld.region) {
@@ -581,7 +591,7 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
struct device *dev = &cxled->cxld.dev;
int part;
- guard(rwsem_write)(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.dpa);
if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
return -EBUSY;
@@ -603,7 +613,7 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
return 0;
}
-static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
+static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
@@ -613,7 +623,7 @@ static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long lon
struct resource *p, *last;
int part;
- guard(rwsem_write)(&cxl_dpa_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.dpa);
if (cxled->cxld.region) {
dev_dbg(dev, "decoder attached to %s\n",
dev_name(&cxled->cxld.region->dev));
@@ -666,15 +676,15 @@ static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long lon
skip = res->start - skip_start;
if (size > avail) {
- dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size,
- res->name, &avail);
+ dev_dbg(dev, "%llu exceeds available %s capacity: %llu\n", size,
+ res->name, (u64)avail);
return -ENOSPC;
}
return __cxl_dpa_reserve(cxled, start, size, skip);
}
-int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
struct cxl_port *port = cxled_to_port(cxled);
int rc;
@@ -763,46 +773,12 @@ static int cxld_await_commit(void __iomem *hdm, int id)
return -ETIMEDOUT;
}
-static int cxl_decoder_commit(struct cxl_decoder *cxld)
+static void setup_hw_decoder(struct cxl_decoder *cxld, void __iomem *hdm)
{
- struct cxl_port *port = to_cxl_port(cxld->dev.parent);
- struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
- void __iomem *hdm = cxlhdm->regs.hdm_decoder;
- int id = cxld->id, rc;
+ int id = cxld->id;
u64 base, size;
u32 ctrl;
- if (cxld->flags & CXL_DECODER_F_ENABLE)
- return 0;
-
- if (cxl_num_decoders_committed(port) != id) {
- dev_dbg(&port->dev,
- "%s: out of order commit, expected decoder%d.%d\n",
- dev_name(&cxld->dev), port->id,
- cxl_num_decoders_committed(port));
- return -EBUSY;
- }
-
- /*
- * For endpoint decoders hosted on CXL memory devices that
- * support the sanitize operation, make sure sanitize is not in-flight.
- */
- if (is_endpoint_decoder(&cxld->dev)) {
- struct cxl_endpoint_decoder *cxled =
- to_cxl_endpoint_decoder(&cxld->dev);
- struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_memdev_state *mds =
- to_cxl_memdev_state(cxlmd->cxlds);
-
- if (mds && mds->security.sanitize_active) {
- dev_dbg(&cxlmd->dev,
- "attempted to commit %s during sanitize\n",
- dev_name(&cxld->dev));
- return -EBUSY;
- }
- }
-
- down_read(&cxl_dpa_rwsem);
/* common decoder settings */
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
cxld_set_interleave(cxld, &ctrl);
@@ -836,7 +812,47 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
}
writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
- up_read(&cxl_dpa_rwsem);
+}
+
+static int cxl_decoder_commit(struct cxl_decoder *cxld)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ int id = cxld->id, rc;
+
+ if (cxld->flags & CXL_DECODER_F_ENABLE)
+ return 0;
+
+ if (cxl_num_decoders_committed(port) != id) {
+ dev_dbg(&port->dev,
+ "%s: out of order commit, expected decoder%d.%d\n",
+ dev_name(&cxld->dev), port->id,
+ cxl_num_decoders_committed(port));
+ return -EBUSY;
+ }
+
+ /*
+ * For endpoint decoders hosted on CXL memory devices that
+ * support the sanitize operation, make sure sanitize is not in-flight.
+ */
+ if (is_endpoint_decoder(&cxld->dev)) {
+ struct cxl_endpoint_decoder *cxled =
+ to_cxl_endpoint_decoder(&cxld->dev);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_memdev_state *mds =
+ to_cxl_memdev_state(cxlmd->cxlds);
+
+ if (mds && mds->security.sanitize_active) {
+ dev_dbg(&cxlmd->dev,
+ "attempted to commit %s during sanitize\n",
+ dev_name(&cxld->dev));
+ return -EBUSY;
+ }
+ }
+
+ scoped_guard(rwsem_read, &cxl_rwsem.dpa)
+ setup_hw_decoder(cxld, hdm);
port->commit_end++;
rc = cxld_await_commit(hdm, cxld->id);
@@ -874,7 +890,7 @@ void cxl_port_commit_reap(struct cxl_decoder *cxld)
{
struct cxl_port *port = to_cxl_port(cxld->dev.parent);
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
/*
* Once the highest committed decoder is disabled, free any other
@@ -906,7 +922,6 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
"%s: out of order reset, expected decoder%d.%d\n",
dev_name(&cxld->dev), port->id, port->commit_end);
- down_read(&cxl_dpa_rwsem);
ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
@@ -915,7 +930,6 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
writel(0, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
writel(0, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
- up_read(&cxl_dpa_rwsem);
cxld->flags &= ~CXL_DECODER_F_ENABLE;
@@ -1024,7 +1038,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
else
cxld->target_type = CXL_DECODER_DEVMEM;
- guard(rwsem_write)(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.region);
if (cxld->id != cxl_num_decoders_committed(port)) {
dev_warn(&port->dev,
"decoder%d.%d: Committed out of order\n",
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index d72764056ce6..fa6dd0c94656 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -899,6 +899,10 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic);
return;
}
+ if (event_type == CXL_CPER_EVENT_MEM_SPARING) {
+ trace_cxl_memory_sparing(cxlmd, type, &evt->mem_sparing);
+ return;
+ }
if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX;
@@ -909,8 +913,8 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
* translations. Take topology mutation locks and lookup
* { HPA, REGION } from { DPA, MEMDEV } in the event record.
*/
- guard(rwsem_read)(&cxl_region_rwsem);
- guard(rwsem_read)(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
+ guard(rwsem_read)(&cxl_rwsem.dpa);
dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
@@ -922,12 +926,37 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
hpa_alias = hpa - cache_size;
}
- if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
+ if (event_type == CXL_CPER_EVENT_GEN_MEDIA) {
+ if (cxl_store_rec_gen_media((struct cxl_memdev *)cxlmd, evt))
+ dev_dbg(&cxlmd->dev, "CXL store rec_gen_media failed\n");
+
+ if (evt->gen_media.media_hdr.descriptor &
+ CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
+ WARN_ON_ONCE((evt->gen_media.media_hdr.type &
+ CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) &&
+ !get_unaligned_le24(evt->gen_media.cme_count));
+ else
+ WARN_ON_ONCE(evt->gen_media.media_hdr.type &
+ CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE);
+
trace_cxl_general_media(cxlmd, type, cxlr, hpa,
hpa_alias, &evt->gen_media);
- else if (event_type == CXL_CPER_EVENT_DRAM)
+ } else if (event_type == CXL_CPER_EVENT_DRAM) {
+ if (cxl_store_rec_dram((struct cxl_memdev *)cxlmd, evt))
+ dev_dbg(&cxlmd->dev, "CXL store rec_dram failed\n");
+
+ if (evt->dram.media_hdr.descriptor &
+ CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
+ WARN_ON_ONCE((evt->dram.media_hdr.type &
+ CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) &&
+ !get_unaligned_le24(evt->dram.cvme_count));
+ else
+ WARN_ON_ONCE(evt->dram.media_hdr.type &
+ CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE);
+
trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
&evt->dram);
+ }
}
}
EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL");
@@ -945,6 +974,8 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
ev_type = CXL_CPER_EVENT_DRAM;
else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID))
ev_type = CXL_CPER_EVENT_MEM_MODULE;
+ else if (uuid_equal(uuid, &CXL_EVENT_MEM_SPARING_UUID))
+ ev_type = CXL_CPER_EVENT_MEM_SPARING;
cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event);
}
@@ -1258,7 +1289,7 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
/* synchronize with cxl_mem_probe() and decoder write operations */
guard(device)(&cxlmd->dev);
endpoint = cxlmd->endpoint;
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
/*
* Require an endpoint to be safe otherwise the driver can not
* be sure that the device is unmapped.
@@ -1394,8 +1425,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
int nr_records = 0;
int rc;
- rc = mutex_lock_interruptible(&mds->poison.lock);
- if (rc)
+ ACQUIRE(mutex_intr, lock)(&mds->poison.mutex);
+ if ((rc = ACQUIRE_ERR(mutex_intr, &lock)))
return rc;
po = mds->poison.list_out;
@@ -1430,7 +1461,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
}
} while (po->flags & CXL_POISON_FLAG_MORE);
- mutex_unlock(&mds->poison.lock);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, "CXL");
@@ -1466,7 +1496,7 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds)
return rc;
}
- mutex_init(&mds->poison.lock);
+ mutex_init(&mds->poison.mutex);
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, "CXL");
diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h
index ace73424eeb6..ca272e8db6c7 100644
--- a/drivers/cxl/core/mce.h
+++ b/drivers/cxl/core/mce.h
@@ -7,7 +7,7 @@
#ifdef CONFIG_CXL_MCE
int devm_cxl_register_mce_notifier(struct device *dev,
- struct notifier_block *mce_notifer);
+ struct notifier_block *mce_notifier);
#else
static inline int
devm_cxl_register_mce_notifier(struct device *dev,
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index a16a5886d40a..c569e00a511f 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -27,6 +27,7 @@ static void cxl_memdev_release(struct device *dev)
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
ida_free(&cxl_memdev_ida, cxlmd->id);
+ devm_cxl_memdev_edac_release(cxlmd);
kfree(cxlmd);
}
@@ -153,8 +154,8 @@ static ssize_t security_state_show(struct device *dev,
return sysfs_emit(buf, "frozen\n");
if (state & CXL_PMEM_SEC_STATE_LOCKED)
return sysfs_emit(buf, "locked\n");
- else
- return sysfs_emit(buf, "unlocked\n");
+
+ return sysfs_emit(buf, "unlocked\n");
}
static struct device_attribute dev_attr_security_state =
__ATTR(state, 0444, security_state_show, NULL);
@@ -231,15 +232,13 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
if (!port || !is_cxl_endpoint(port))
return -EINVAL;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
return rc;
- rc = down_read_interruptible(&cxl_dpa_rwsem);
- if (rc) {
- up_read(&cxl_region_rwsem);
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
return rc;
- }
if (cxl_num_decoders_committed(port) == 0) {
/* No regions mapped to this memdev */
@@ -248,8 +247,6 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
/* Regions mapped, collect poison by endpoint */
rc = cxl_get_poison_by_endpoint(port);
}
- up_read(&cxl_dpa_rwsem);
- up_read(&cxl_region_rwsem);
return rc;
}
@@ -266,7 +263,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
dev_dbg(cxlds->dev, "device has no dpa resource\n");
return -EINVAL;
}
- if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
+ if (!cxl_resource_contains_addr(&cxlds->dpa_res, dpa)) {
dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
dpa, &cxlds->dpa_res);
return -EINVAL;
@@ -291,19 +288,17 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
return rc;
- rc = down_read_interruptible(&cxl_dpa_rwsem);
- if (rc) {
- up_read(&cxl_region_rwsem);
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
return rc;
- }
rc = cxl_validate_poison_dpa(cxlmd, dpa);
if (rc)
- goto out;
+ return rc;
inject.address = cpu_to_le64(dpa);
mbox_cmd = (struct cxl_mbox_cmd) {
@@ -313,7 +308,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
- goto out;
+ return rc;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr)
@@ -326,11 +321,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
.length = cpu_to_le32(1),
};
trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
-out:
- up_read(&cxl_dpa_rwsem);
- up_read(&cxl_region_rwsem);
- return rc;
+ return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL");
@@ -346,19 +338,17 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
return rc;
- rc = down_read_interruptible(&cxl_dpa_rwsem);
- if (rc) {
- up_read(&cxl_region_rwsem);
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
return rc;
- }
rc = cxl_validate_poison_dpa(cxlmd, dpa);
if (rc)
- goto out;
+ return rc;
/*
* In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
@@ -377,7 +367,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
- goto out;
+ return rc;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr)
@@ -390,11 +380,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
.length = cpu_to_le32(1),
};
trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
-out:
- up_read(&cxl_dpa_rwsem);
- up_read(&cxl_region_rwsem);
- return rc;
+ return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL");
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 3b80e9a76ba8..b50551601c2e 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -415,17 +415,20 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
*/
if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled))
return devm_cxl_enable_mem(&port->dev, cxlds);
- else if (!hdm)
- return -ENODEV;
- root = to_cxl_port(port->dev.parent);
- while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
- root = to_cxl_port(root->dev.parent);
- if (!is_cxl_root(root)) {
- dev_err(dev, "Failed to acquire root port for HDM enable\n");
+ /*
+ * If the HDM Decoder Capability does not exist and DVSEC was
+ * not setup, the DVSEC based emulation cannot be used.
+ */
+ if (!hdm)
return -ENODEV;
- }
+ /* The HDM Decoder Capability exists but is globally disabled. */
+
+ /*
+ * If the DVSEC CXL Range registers are not enabled, just
+ * enable and use the HDM Decoder Capability registers.
+ */
if (!info->mem_enabled) {
rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
if (rc)
@@ -434,6 +437,26 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
return devm_cxl_enable_mem(&port->dev, cxlds);
}
+ /*
+ * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
+ * [High,Low] when HDM operation is enabled the range register values
+ * are ignored by the device, but the spec also recommends matching the
+ * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
+ * are expected even though Linux does not require or maintain that
+ * match. Check if at least one DVSEC range is enabled and allowed by
+ * the platform. That is, the DVSEC range must be covered by a locked
+ * platform window (CFMWS). Fail otherwise as the endpoint's decoders
+ * cannot be used.
+ */
+
+ root = to_cxl_port(port->dev.parent);
+ while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
+ root = to_cxl_port(root->dev.parent);
+ if (!is_cxl_root(root)) {
+ dev_err(dev, "Failed to acquire root port for HDM enable\n");
+ return -ENODEV;
+ }
+
for (i = 0, allowed = 0; i < info->ranges; i++) {
struct device *cxld_dev;
@@ -453,15 +476,6 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
return -ENXIO;
}
- /*
- * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
- * [High,Low] when HDM operation is enabled the range register values
- * are ignored by the device, but the spec also recommends matching the
- * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
- * are expected even though Linux does not require or maintain that
- * match. If at least one DVSEC range is enabled and allowed, skip HDM
- * Decoder Capability Enable.
- */
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL");
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 726bd4a7de27..29197376b18e 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -30,18 +30,12 @@
* instantiated by the core.
*/
-/*
- * All changes to the interleave configuration occur with this lock held
- * for write.
- */
-DECLARE_RWSEM(cxl_region_rwsem);
-
static DEFINE_IDA(cxl_port_ida);
static DEFINE_XARRAY(cxl_root_buses);
int cxl_num_decoders_committed(struct cxl_port *port)
{
- lockdep_assert_held(&cxl_region_rwsem);
+ lockdep_assert_held(&cxl_rwsem.region);
return port->commit_end + 1;
}
@@ -176,7 +170,7 @@ static ssize_t target_list_show(struct device *dev,
ssize_t offset;
int rc;
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
rc = emit_target_list(cxlsd, buf);
if (rc < 0)
return rc;
@@ -196,7 +190,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- /* without @cxl_dpa_rwsem, make sure @part is not reloaded */
+ /* without @cxl_rwsem.dpa, make sure @part is not reloaded */
int part = READ_ONCE(cxled->part);
const char *desc;
@@ -235,7 +229,7 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- guard(rwsem_read)(&cxl_dpa_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.dpa);
return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled));
}
static DEVICE_ATTR_RO(dpa_resource);
@@ -560,7 +554,7 @@ static ssize_t decoders_committed_show(struct device *dev,
{
struct cxl_port *port = to_cxl_port(dev);
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
return sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port));
}
@@ -602,17 +596,19 @@ struct cxl_port *to_cxl_port(const struct device *dev)
}
EXPORT_SYMBOL_NS_GPL(to_cxl_port, "CXL");
+struct cxl_port *parent_port_of(struct cxl_port *port)
+{
+ if (!port || !port->parent_dport)
+ return NULL;
+ return port->parent_dport->port;
+}
+
static void unregister_port(void *_port)
{
struct cxl_port *port = _port;
- struct cxl_port *parent;
+ struct cxl_port *parent = parent_port_of(port);
struct device *lock_dev;
- if (is_cxl_root(port))
- parent = NULL;
- else
- parent = to_cxl_port(port->dev.parent);
-
/*
* CXL root port's and the first level of ports are unregistered
* under the platform firmware device lock, all other ports are
@@ -1035,15 +1031,6 @@ struct cxl_root *find_cxl_root(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(find_cxl_root, "CXL");
-void put_cxl_root(struct cxl_root *cxl_root)
-{
- if (!cxl_root)
- return;
-
- put_device(&cxl_root->port.dev);
-}
-EXPORT_SYMBOL_NS_GPL(put_cxl_root, "CXL");
-
static struct cxl_dport *find_dport(struct cxl_port *port, int id)
{
struct cxl_dport *dport;
@@ -1729,7 +1716,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
if (xa_empty(&port->dports))
return -EINVAL;
- guard(rwsem_write)(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.region);
for (i = 0; i < cxlsd->cxld.interleave_ways; i++) {
struct cxl_dport *dport = find_dport(port, target_map[i]);
@@ -2008,12 +1995,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL");
static void cxld_unregister(void *dev)
{
- struct cxl_endpoint_decoder *cxled;
-
- if (is_endpoint_decoder(dev)) {
- cxled = to_cxl_endpoint_decoder(dev);
- cxl_decoder_kill_region(cxled);
- }
+ if (is_endpoint_decoder(dev))
+ cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1,
+ DETACH_INVALIDATE);
device_unregister(dev);
}
@@ -2300,7 +2284,7 @@ static const struct attribute_group *cxl_bus_attribute_groups[] = {
NULL,
};
-struct bus_type cxl_bus_type = {
+const struct bus_type cxl_bus_type = {
.name = "cxl",
.uevent = cxl_bus_uevent,
.match = cxl_bus_match,
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 485a831695c7..2731ba3a0799 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
ras_cap.header_log);
}
-static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
- struct cxl_ras_capability_regs ras_cap)
+static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd,
+ struct cxl_ras_capability_regs ras_cap)
{
u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
- struct cxl_dev_state *cxlds;
- cxlds = pci_get_drvdata(pdev);
- if (!cxlds)
- return;
-
- trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+ trace_cxl_aer_correctable_error(cxlmd, status);
}
-static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
- struct cxl_ras_capability_regs ras_cap)
+static void
+cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd,
+ struct cxl_ras_capability_regs ras_cap)
{
u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
- struct cxl_dev_state *cxlds;
u32 fe;
- cxlds = pci_get_drvdata(pdev);
- if (!cxlds)
- return;
-
if (hweight32(status) > 1)
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
ras_cap.cap_control));
else
fe = status;
- trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
+ trace_cxl_aer_uncorrectable_error(cxlmd, status, fe,
ras_cap.header_log);
}
+static int match_memdev_by_parent(struct device *dev, const void *uport)
+{
+ if (is_cxl_memdev(dev) && dev->parent == uport)
+ return 1;
+ return 0;
+}
+
static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
{
unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
@@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
data->prot_err.agent_addr.bus,
devfn);
+ struct cxl_memdev *cxlmd;
int port_type;
if (!pdev)
return;
- guard(device)(&pdev->dev);
-
port_type = pci_pcie_type(pdev);
if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
port_type == PCI_EXP_TYPE_DOWNSTREAM ||
@@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
return;
}
+ guard(device)(&pdev->dev);
+ if (!pdev->dev.driver)
+ return;
+
+ struct device *mem_dev __free(put_device) = bus_find_device(
+ &cxl_bus_type, NULL, pdev, match_memdev_by_parent);
+ if (!mem_dev)
+ return;
+
+ cxlmd = to_cxl_memdev(mem_dev);
if (data->severity == AER_CORRECTABLE)
- cxl_cper_trace_corr_prot_err(pdev, data->ras_cap);
+ cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap);
else
- cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap);
+ cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
}
static void cxl_cper_prot_err_work_fn(struct work_struct *work)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index c3f4dc244df7..71cc42d05248 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -141,16 +141,12 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
struct cxl_region_params *p = &cxlr->params;
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
return rc;
if (cxlr->mode != CXL_PARTMODE_PMEM)
- rc = sysfs_emit(buf, "\n");
- else
- rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "\n");
+ return sysfs_emit(buf, "%pUb\n", &p->uuid);
}
static int is_dup(struct device *match, void *data)
@@ -162,7 +158,7 @@ static int is_dup(struct device *match, void *data)
if (!is_cxl_region(match))
return 0;
- lockdep_assert_held(&cxl_region_rwsem);
+ lockdep_assert_held(&cxl_rwsem.region);
cxlr = to_cxl_region(match);
p = &cxlr->params;
@@ -192,27 +188,22 @@ static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
if (uuid_is_null(&temp))
return -EINVAL;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &region_rwsem)))
return rc;
if (uuid_equal(&p->uuid, &temp))
- goto out;
+ return len;
- rc = -EBUSY;
if (p->state >= CXL_CONFIG_ACTIVE)
- goto out;
+ return -EBUSY;
rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
if (rc < 0)
- goto out;
+ return rc;
uuid_copy(&p->uuid, &temp);
-out:
- up_write(&cxl_region_rwsem);
- if (rc)
- return rc;
return len;
}
static DEVICE_ATTR_RW(uuid);
@@ -231,11 +222,10 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
&cxlr->dev,
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
return 0;
- } else {
- dev_WARN(&cxlr->dev,
- "Failed to synchronize CPU cache state\n");
- return -ENXIO;
}
+ dev_WARN(&cxlr->dev,
+ "Failed to synchronize CPU cache state\n");
+ return -ENXIO;
}
cpu_cache_invalidate_memregion(IORES_DESC_CXL);
@@ -350,33 +340,40 @@ err:
return rc;
}
-static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
+static int queue_reset(struct cxl_region *cxlr)
{
- struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
- bool commit;
- ssize_t rc;
+ int rc;
- rc = kstrtobool(buf, &commit);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ /* Already in the requested state? */
+ if (p->state < CXL_CONFIG_COMMIT)
+ return 0;
+
+ p->state = CXL_CONFIG_RESET_PENDING;
+
+ return 0;
+}
+
+static int __commit(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int rc;
+
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
/* Already in the requested state? */
- if (commit && p->state >= CXL_CONFIG_COMMIT)
- goto out;
- if (!commit && p->state < CXL_CONFIG_COMMIT)
- goto out;
+ if (p->state >= CXL_CONFIG_COMMIT)
+ return 0;
/* Not ready to commit? */
- if (commit && p->state < CXL_CONFIG_ACTIVE) {
- rc = -ENXIO;
- goto out;
- }
+ if (p->state < CXL_CONFIG_ACTIVE)
+ return -ENXIO;
/*
* Invalidate caches before region setup to drop any speculative
@@ -384,33 +381,61 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
*/
rc = cxl_region_invalidate_memregion(cxlr);
if (rc)
- goto out;
+ return rc;
+
+ rc = cxl_region_decode_commit(cxlr);
+ if (rc)
+ return rc;
- if (commit) {
- rc = cxl_region_decode_commit(cxlr);
- if (rc == 0)
- p->state = CXL_CONFIG_COMMIT;
- } else {
- p->state = CXL_CONFIG_RESET_PENDING;
- up_write(&cxl_region_rwsem);
- device_release_driver(&cxlr->dev);
- down_write(&cxl_region_rwsem);
+ p->state = CXL_CONFIG_COMMIT;
- /*
- * The lock was dropped, so need to revalidate that the reset is
- * still pending.
- */
- if (p->state == CXL_CONFIG_RESET_PENDING) {
- cxl_region_decode_reset(cxlr, p->interleave_ways);
- p->state = CXL_CONFIG_ACTIVE;
- }
- }
+ return 0;
+}
-out:
- up_write(&cxl_region_rwsem);
+static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ bool commit;
+ ssize_t rc;
+ rc = kstrtobool(buf, &commit);
+ if (rc)
+ return rc;
+
+ if (commit) {
+ rc = __commit(cxlr);
+ if (rc)
+ return rc;
+ return len;
+ }
+
+ rc = queue_reset(cxlr);
if (rc)
return rc;
+
+ /*
+ * Unmap the region and depend the reset-pending state to ensure
+ * it does not go active again until post reset
+ */
+ device_release_driver(&cxlr->dev);
+
+ /*
+ * With the reset pending take cxl_rwsem.region unconditionally
+ * to ensure the reset gets handled before returning.
+ */
+ guard(rwsem_write)(&cxl_rwsem.region);
+
+ /*
+ * Revalidate that the reset is still pending in case another
+ * thread already handled this reset.
+ */
+ if (p->state == CXL_CONFIG_RESET_PENDING) {
+ cxl_region_decode_reset(cxlr, p->interleave_ways);
+ p->state = CXL_CONFIG_ACTIVE;
+ }
+
return len;
}
@@ -421,13 +446,10 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
struct cxl_region_params *p = &cxlr->params;
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
- rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
}
static DEVICE_ATTR_RW(commit);
@@ -451,15 +473,12 @@ static ssize_t interleave_ways_show(struct device *dev,
{
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
- ssize_t rc;
+ int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
- rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%d\n", p->interleave_ways);
}
static const struct attribute_group *get_cxl_region_target_group(void);
@@ -494,23 +513,21 @@ static ssize_t interleave_ways_store(struct device *dev,
return -EINVAL;
}
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
- if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
- rc = -EBUSY;
- goto out;
- }
+
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+ return -EBUSY;
save = p->interleave_ways;
p->interleave_ways = val;
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
- if (rc)
+ if (rc) {
p->interleave_ways = save;
-out:
- up_write(&cxl_region_rwsem);
- if (rc)
return rc;
+ }
+
return len;
}
static DEVICE_ATTR_RW(interleave_ways);
@@ -521,15 +538,12 @@ static ssize_t interleave_granularity_show(struct device *dev,
{
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
- ssize_t rc;
+ int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
- rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%d\n", p->interleave_granularity);
}
static ssize_t interleave_granularity_store(struct device *dev,
@@ -562,19 +576,15 @@ static ssize_t interleave_granularity_store(struct device *dev,
if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
return -EINVAL;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
- if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
- rc = -EBUSY;
- goto out;
- }
+
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+ return -EBUSY;
p->interleave_granularity = val;
-out:
- up_write(&cxl_region_rwsem);
- if (rc)
- return rc;
+
return len;
}
static DEVICE_ATTR_RW(interleave_granularity);
@@ -585,17 +595,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
u64 resource = -1ULL;
- ssize_t rc;
+ int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
+
if (p->res)
resource = p->res->start;
- rc = sysfs_emit(buf, "%#llx\n", resource);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%#llx\n", resource);
}
static DEVICE_ATTR_RO(resource);
@@ -623,7 +631,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
struct resource *res;
u64 remainder = 0;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
/* Nothing to do... */
if (p->res && resource_size(p->res) == size)
@@ -665,7 +673,7 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr)
struct cxl_region_params *p = &cxlr->params;
if (device_is_registered(&cxlr->dev))
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
if (p->res) {
/*
* Autodiscovered regions may not have been able to insert their
@@ -682,7 +690,7 @@ static int free_hpa(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
if (!p->res)
return 0;
@@ -706,15 +714,14 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr,
if (rc)
return rc;
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
return rc;
if (val)
rc = alloc_hpa(cxlr, val);
else
rc = free_hpa(cxlr);
- up_write(&cxl_region_rwsem);
if (rc)
return rc;
@@ -730,15 +737,12 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr,
u64 size = 0;
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
if (p->res)
size = resource_size(p->res);
- rc = sysfs_emit(buf, "%#llx\n", size);
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%#llx\n", size);
}
static DEVICE_ATTR_RW(size);
@@ -764,26 +768,20 @@ static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
struct cxl_endpoint_decoder *cxled;
int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
if (pos >= p->interleave_ways) {
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
p->interleave_ways);
- rc = -ENXIO;
- goto out;
+ return -ENXIO;
}
cxled = p->targets[pos];
if (!cxled)
- rc = sysfs_emit(buf, "\n");
- else
- rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
-out:
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "\n");
+ return sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
}
static int check_commit_order(struct device *dev, void *data)
@@ -865,10 +863,23 @@ static int match_auto_decoder(struct device *dev, const void *data)
return 0;
}
+/**
+ * cxl_port_pick_region_decoder() - assign or lookup a decoder for a region
+ * @port: a port in the ancestry of the endpoint implied by @cxled
+ * @cxled: endpoint decoder to be, or currently, mapped by @port
+ * @cxlr: region to establish, or validate, decode @port
+ *
+ * In the region creation path cxl_port_pick_region_decoder() is an
+ * allocator to find a free port. In the region assembly path, it is
+ * recalling the decoder that platform firmware picked for validation
+ * purposes.
+ *
+ * The result is recorded in a 'struct cxl_region_ref' in @port.
+ */
static struct cxl_decoder *
-cxl_region_find_decoder(struct cxl_port *port,
- struct cxl_endpoint_decoder *cxled,
- struct cxl_region *cxlr)
+cxl_port_pick_region_decoder(struct cxl_port *port,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region *cxlr)
{
struct device *dev;
@@ -885,7 +896,7 @@ cxl_region_find_decoder(struct cxl_port *port,
/*
* This decoder is pinned registered as long as the endpoint decoder is
* registered, and endpoint decoder unregistration holds the
- * cxl_region_rwsem over unregister events, so no need to hold on to
+ * cxl_rwsem.region over unregister events, so no need to hold on to
* this extra reference.
*/
put_device(dev);
@@ -916,7 +927,8 @@ static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
static struct cxl_region_ref *
alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled)
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_decoder *cxld)
{
struct cxl_region_params *p = &cxlr->params;
struct cxl_region_ref *cxl_rr, *iter;
@@ -930,9 +942,6 @@ alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
continue;
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
- struct cxl_decoder *cxld;
-
- cxld = cxl_region_find_decoder(port, cxled, cxlr);
if (auto_order_ok(port, iter->region, cxld))
continue;
}
@@ -1014,19 +1023,11 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
return 0;
}
-static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled,
- struct cxl_region_ref *cxl_rr)
+static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region_ref *cxl_rr,
+ struct cxl_decoder *cxld)
{
- struct cxl_decoder *cxld;
-
- cxld = cxl_region_find_decoder(port, cxled, cxlr);
- if (!cxld) {
- dev_dbg(&cxlr->dev, "%s: no decoder available\n",
- dev_name(&port->dev));
- return -EBUSY;
- }
-
if (cxld->region) {
dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
dev_name(&port->dev), dev_name(&cxld->dev),
@@ -1086,7 +1087,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
unsigned long index;
int rc = -EBUSY;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
cxl_rr = cxl_rr_load(port, cxlr);
if (cxl_rr) {
@@ -1117,7 +1118,16 @@ static int cxl_port_attach_region(struct cxl_port *port,
nr_targets_inc = true;
}
} else {
- cxl_rr = alloc_region_ref(port, cxlr, cxled);
+ struct cxl_decoder *cxld;
+
+ cxld = cxl_port_pick_region_decoder(port, cxled, cxlr);
+ if (!cxld) {
+ dev_dbg(&cxlr->dev, "%s: no decoder available\n",
+ dev_name(&port->dev));
+ return -EBUSY;
+ }
+
+ cxl_rr = alloc_region_ref(port, cxlr, cxled, cxld);
if (IS_ERR(cxl_rr)) {
dev_dbg(&cxlr->dev,
"%s: failed to allocate region reference\n",
@@ -1126,7 +1136,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
nr_targets_inc = true;
- rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
+ rc = cxl_rr_assign_decoder(port, cxlr, cxled, cxl_rr, cxld);
if (rc)
goto out_erase;
}
@@ -1187,7 +1197,7 @@ static void cxl_port_detach_region(struct cxl_port *port,
struct cxl_region_ref *cxl_rr;
struct cxl_ep *ep = NULL;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
cxl_rr = cxl_rr_load(port, cxlr);
if (!cxl_rr)
@@ -1446,7 +1456,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
if (cxld->interleave_ways != iw ||
- cxld->interleave_granularity != ig ||
+ (iw > 1 && cxld->interleave_granularity != ig) ||
!region_res_match_cxl_range(p, &cxld->hpa_range) ||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
dev_err(&cxlr->dev,
@@ -1748,13 +1758,6 @@ static int cmp_interleave_pos(const void *a, const void *b)
return cxled_a->pos - cxled_b->pos;
}
-static struct cxl_port *next_port(struct cxl_port *port)
-{
- if (!port->parent_dport)
- return NULL;
- return port->parent_dport->port;
-}
-
static int match_switch_decoder_by_range(struct device *dev,
const void *data)
{
@@ -1781,7 +1784,7 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range,
struct device *dev;
int rc = -ENXIO;
- parent = next_port(port);
+ parent = parent_port_of(port);
if (!parent)
return rc;
@@ -1805,6 +1808,13 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range,
}
put_device(dev);
+ if (rc)
+ dev_err(port->uport_dev,
+ "failed to find %s:%s in target list of %s\n",
+ dev_name(&port->dev),
+ dev_name(port->parent_dport->dport_dev),
+ dev_name(&cxlsd->cxld.dev));
+
return rc;
}
@@ -1861,7 +1871,7 @@ static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
*/
/* Iterate from endpoint to root_port refining the position */
- for (iter = port; iter; iter = next_port(iter)) {
+ for (iter = port; iter; iter = parent_port_of(iter)) {
if (is_cxl_root(iter))
break;
@@ -1940,7 +1950,9 @@ static int cxl_region_attach(struct cxl_region *cxlr,
if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "region already active\n");
return -EBUSY;
- } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ }
+
+ if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_dbg(&cxlr->dev, "interleave config missing\n");
return -ENXIO;
}
@@ -2081,27 +2093,43 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return 0;
}
-static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
+static struct cxl_region *
+__cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode)
{
- struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
- struct cxl_region *cxlr = cxled->cxld.region;
struct cxl_region_params *p;
- int rc = 0;
- lockdep_assert_held_write(&cxl_region_rwsem);
+ lockdep_assert_held_write(&cxl_rwsem.region);
- if (!cxlr)
- return 0;
+ if (!cxled) {
+ p = &cxlr->params;
- p = &cxlr->params;
- get_device(&cxlr->dev);
+ if (pos >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "position %d out of range %d\n",
+ pos, p->interleave_ways);
+ return NULL;
+ }
+
+ if (!p->targets[pos])
+ return NULL;
+ cxled = p->targets[pos];
+ } else {
+ cxlr = cxled->cxld.region;
+ if (!cxlr)
+ return NULL;
+ p = &cxlr->params;
+ }
+
+ if (mode == DETACH_INVALIDATE)
+ cxled->part = -1;
if (p->state > CXL_CONFIG_ACTIVE) {
cxl_region_decode_reset(cxlr, p->interleave_ways);
p->state = CXL_CONFIG_ACTIVE;
}
- for (iter = ep_port; !is_cxl_root(iter);
+ for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
iter = to_cxl_port(iter->dev.parent))
cxl_port_detach_region(iter, cxlr, cxled);
@@ -2112,7 +2140,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
cxled->pos);
- goto out;
+ return NULL;
}
if (p->state == CXL_CONFIG_ACTIVE) {
@@ -2126,68 +2154,79 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
.end = -1,
};
- /* notify the region driver that one of its targets has departed */
- up_write(&cxl_region_rwsem);
- device_release_driver(&cxlr->dev);
- down_write(&cxl_region_rwsem);
-out:
- put_device(&cxlr->dev);
- return rc;
+ get_device(&cxlr->dev);
+ return cxlr;
+}
+
+/*
+ * Cleanup a decoder's interest in a region. There are 2 cases to
+ * handle, removing an unknown @cxled from a known position in a region
+ * (detach_target()) or removing a known @cxled from an unknown @cxlr
+ * (cxld_unregister())
+ *
+ * When the detachment finds a region release the region driver.
+ */
+int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode)
+{
+ struct cxl_region *detach;
+
+ /* when the decoder is being destroyed lock unconditionally */
+ if (mode == DETACH_INVALIDATE) {
+ guard(rwsem_write)(&cxl_rwsem.region);
+ detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
+ } else {
+ int rc;
+
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+ return rc;
+ detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
+ }
+
+ if (detach) {
+ device_release_driver(&detach->dev);
+ put_device(&detach->dev);
+ }
+ return 0;
}
-void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+static int __attach_target(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ unsigned int state)
{
- down_write(&cxl_region_rwsem);
- cxled->part = -1;
- cxl_region_detach(cxled);
- up_write(&cxl_region_rwsem);
+ int rc;
+
+ if (state == TASK_INTERRUPTIBLE) {
+ ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+ return rc;
+ guard(rwsem_read)(&cxl_rwsem.dpa);
+ return cxl_region_attach(cxlr, cxled, pos);
+ }
+ guard(rwsem_write)(&cxl_rwsem.region);
+ guard(rwsem_read)(&cxl_rwsem.dpa);
+ return cxl_region_attach(cxlr, cxled, pos);
}
static int attach_target(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled, int pos,
unsigned int state)
{
- int rc = 0;
+ int rc = __attach_target(cxlr, cxled, pos, state);
- if (state == TASK_INTERRUPTIBLE)
- rc = down_write_killable(&cxl_region_rwsem);
- else
- down_write(&cxl_region_rwsem);
- if (rc)
- return rc;
+ if (rc == 0)
+ return 0;
- down_read(&cxl_dpa_rwsem);
- rc = cxl_region_attach(cxlr, cxled, pos);
- up_read(&cxl_dpa_rwsem);
- up_write(&cxl_region_rwsem);
+ dev_warn(cxled->cxld.dev.parent, "failed to attach %s to %s: %d\n",
+ dev_name(&cxled->cxld.dev), dev_name(&cxlr->dev), rc);
return rc;
}
static int detach_target(struct cxl_region *cxlr, int pos)
{
- struct cxl_region_params *p = &cxlr->params;
- int rc;
-
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
- return rc;
-
- if (pos >= p->interleave_ways) {
- dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
- p->interleave_ways);
- rc = -ENXIO;
- goto out;
- }
-
- if (!p->targets[pos]) {
- rc = 0;
- goto out;
- }
-
- rc = cxl_region_detach(p->targets[pos]);
-out:
- up_write(&cxl_region_rwsem);
- return rc;
+ return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
}
static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
@@ -2432,16 +2471,16 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
unsigned long action, void *arg)
{
struct cxl_region *cxlr = container_of(nb, struct cxl_region,
- memory_notifier);
- struct memory_notify *mnb = arg;
- int nid = mnb->status_change_nid;
+ node_notifier);
+ struct node_notify *nn = arg;
+ int nid = nn->nid;
int region_nid;
- if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+ if (action != NODE_ADDED_FIRST_MEMORY)
return NOTIFY_DONE;
/*
- * No need to hold cxl_region_rwsem; region parameters are stable
+ * No need to hold cxl_rwsem.region; region parameters are stable
* within the cxl_region driver.
*/
region_nid = phys_to_target_node(cxlr->params.res->start);
@@ -2464,7 +2503,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
int region_nid;
/*
- * No need to hold cxl_region_rwsem; region parameters are stable
+ * No need to hold cxl_rwsem.region; region parameters are stable
* within the cxl_region driver.
*/
region_nid = phys_to_target_node(cxlr->params.res->start);
@@ -2613,17 +2652,13 @@ static ssize_t region_show(struct device *dev, struct device_attribute *attr,
struct cxl_decoder *cxld = to_cxl_decoder(dev);
ssize_t rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
return rc;
if (cxld->region)
- rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
- else
- rc = sysfs_emit(buf, "\n");
- up_read(&cxl_region_rwsem);
-
- return rc;
+ return sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
+ return sysfs_emit(buf, "\n");
}
DEVICE_ATTR_RO(region);
@@ -2828,7 +2863,7 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg)
if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res))
return 0;
- if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
+ if (!cxl_resource_contains_addr(cxled->dpa_res, dpa))
return 0;
/*
@@ -2940,7 +2975,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
if (cxlrd->hpa_to_spa)
hpa = cxlrd->hpa_to_spa(cxlrd, hpa);
- if (hpa < p->res->start || hpa > p->res->end) {
+ if (!cxl_resource_contains_addr(p->res, hpa)) {
dev_dbg(&cxlr->dev,
"Addr trans fail: hpa 0x%llx not in region\n", hpa);
return ULLONG_MAX;
@@ -2962,7 +2997,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
struct device *dev;
int i;
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
if (p->state != CXL_CONFIG_COMMIT)
return -ENXIO;
@@ -2974,7 +3009,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr)
cxlr_pmem->hpa_range.start = p->res->start;
cxlr_pmem->hpa_range.end = p->res->end;
- /* Snapshot the region configuration underneath the cxl_region_rwsem */
+ /* Snapshot the region configuration underneath the cxl_rwsem.region */
cxlr_pmem->nr_mappings = p->nr_targets;
for (i = 0; i < p->nr_targets; i++) {
struct cxl_endpoint_decoder *cxled = p->targets[i];
@@ -3051,7 +3086,7 @@ static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
struct cxl_dax_region *cxlr_dax;
struct device *dev;
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
if (p->state != CXL_CONFIG_COMMIT)
return ERR_PTR(-ENXIO);
@@ -3196,20 +3231,49 @@ err:
return rc;
}
-static int match_root_decoder_by_range(struct device *dev,
- const void *data)
+static int match_decoder_by_range(struct device *dev, const void *data)
{
const struct range *r1, *r2 = data;
- struct cxl_root_decoder *cxlrd;
+ struct cxl_decoder *cxld;
- if (!is_root_decoder(dev))
+ if (!is_switch_decoder(dev))
return 0;
- cxlrd = to_cxl_root_decoder(dev);
- r1 = &cxlrd->cxlsd.cxld.hpa_range;
+ cxld = to_cxl_decoder(dev);
+ r1 = &cxld->hpa_range;
return range_contains(r1, r2);
}
+static struct cxl_decoder *
+cxl_port_find_switch_decoder(struct cxl_port *port, struct range *hpa)
+{
+ struct device *cxld_dev = device_find_child(&port->dev, hpa,
+ match_decoder_by_range);
+
+ return cxld_dev ? to_cxl_decoder(cxld_dev) : NULL;
+}
+
+static struct cxl_root_decoder *
+cxl_find_root_decoder(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
+ struct cxl_decoder *root, *cxld = &cxled->cxld;
+ struct range *hpa = &cxld->hpa_range;
+
+ root = cxl_port_find_switch_decoder(&cxl_root->port, hpa);
+ if (!root) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s no CXL window for range %#llx:%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxld->dev),
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return NULL;
+ }
+
+ return to_cxl_root_decoder(&root->dev);
+}
+
static int match_region_by_range(struct device *dev, const void *data)
{
struct cxl_region_params *p;
@@ -3222,7 +3286,7 @@ static int match_region_by_range(struct device *dev, const void *data)
cxlr = to_cxl_region(dev);
p = &cxlr->params;
- guard(rwsem_read)(&cxl_region_rwsem);
+ guard(rwsem_read)(&cxl_rwsem.region);
if (p->res && p->res->start == r->start && p->res->end == r->end)
return 1;
@@ -3234,15 +3298,10 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_region_params *p = &cxlr->params;
- int nid = phys_to_target_node(res->start);
resource_size_t size = resource_size(res);
resource_size_t cache_size, start;
- int rc;
-
- rc = cxl_acpi_get_extended_linear_cache_size(res, nid, &cache_size);
- if (rc)
- return rc;
+ cache_size = cxlrd->cache_size;
if (!cache_size)
return 0;
@@ -3282,7 +3341,7 @@ static int __construct_region(struct cxl_region *cxlr,
struct resource *res;
int rc;
- guard(rwsem_write)(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.region);
p = &cxlr->params;
if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
dev_err(cxlmd->dev.parent,
@@ -3376,54 +3435,52 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
return cxlr;
}
-int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
+static struct cxl_region *
+cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
+{
+ struct device *region_dev;
+
+ region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
+ match_region_by_range);
+ if (!region_dev)
+ return NULL;
+
+ return to_cxl_region(region_dev);
+}
+
+int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
{
- struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct range *hpa = &cxled->cxld.hpa_range;
- struct cxl_decoder *cxld = &cxled->cxld;
- struct device *cxlrd_dev, *region_dev;
- struct cxl_root_decoder *cxlrd;
struct cxl_region_params *p;
- struct cxl_region *cxlr;
bool attach = false;
int rc;
- cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
- match_root_decoder_by_range);
- if (!cxlrd_dev) {
- dev_err(cxlmd->dev.parent,
- "%s:%s no CXL window for range %#llx:%#llx\n",
- dev_name(&cxlmd->dev), dev_name(&cxld->dev),
- cxld->hpa_range.start, cxld->hpa_range.end);
+ struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
+ cxl_find_root_decoder(cxled);
+ if (!cxlrd)
return -ENXIO;
- }
-
- cxlrd = to_cxl_root_decoder(cxlrd_dev);
/*
* Ensure that if multiple threads race to construct_region() for @hpa
* one does the construction and the others add to that.
*/
mutex_lock(&cxlrd->range_lock);
- region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
- match_region_by_range);
- if (!region_dev) {
+ struct cxl_region *cxlr __free(put_cxl_region) =
+ cxl_find_region_by_range(cxlrd, hpa);
+ if (!cxlr)
cxlr = construct_region(cxlrd, cxled);
- region_dev = &cxlr->dev;
- } else
- cxlr = to_cxl_region(region_dev);
mutex_unlock(&cxlrd->range_lock);
rc = PTR_ERR_OR_ZERO(cxlr);
if (rc)
- goto out;
+ return rc;
attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
- down_read(&cxl_region_rwsem);
- p = &cxlr->params;
- attach = p->state == CXL_CONFIG_COMMIT;
- up_read(&cxl_region_rwsem);
+ scoped_guard(rwsem_read, &cxl_rwsem.region) {
+ p = &cxlr->params;
+ attach = p->state == CXL_CONFIG_COMMIT;
+ }
if (attach) {
/*
@@ -3436,9 +3493,6 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
p->res);
}
- put_device(region_dev);
-out:
- put_device(cxlrd_dev);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
@@ -3451,12 +3505,12 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
if (!endpoint)
return ~0ULL;
- guard(rwsem_write)(&cxl_region_rwsem);
+ guard(rwsem_write)(&cxl_rwsem.region);
xa_for_each(&endpoint->regions, index, iter) {
struct cxl_region_params *p = &iter->region->params;
- if (p->res->start <= spa && spa <= p->res->end) {
+ if (cxl_resource_contains_addr(p->res, spa)) {
if (!p->cache_size)
return ~0ULL;
@@ -3484,48 +3538,53 @@ static void shutdown_notifiers(void *_cxlr)
{
struct cxl_region *cxlr = _cxlr;
- unregister_memory_notifier(&cxlr->memory_notifier);
+ unregister_node_notifier(&cxlr->node_notifier);
unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
}
-static int cxl_region_probe(struct device *dev)
+static int cxl_region_can_probe(struct cxl_region *cxlr)
{
- struct cxl_region *cxlr = to_cxl_region(dev);
struct cxl_region_params *p = &cxlr->params;
int rc;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc) {
+ ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) {
dev_dbg(&cxlr->dev, "probe interrupted\n");
return rc;
}
if (p->state < CXL_CONFIG_COMMIT) {
dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
- rc = -ENXIO;
- goto out;
+ return -ENXIO;
}
if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
dev_err(&cxlr->dev,
"failed to activate, re-commit region and retry\n");
- rc = -ENXIO;
- goto out;
+ return -ENXIO;
}
+ return 0;
+}
+
+static int cxl_region_probe(struct device *dev)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ int rc;
+
+ rc = cxl_region_can_probe(cxlr);
+ if (rc)
+ return rc;
+
/*
* From this point on any path that changes the region's state away from
* CXL_CONFIG_COMMIT is also responsible for releasing the driver.
*/
-out:
- up_read(&cxl_region_rwsem);
-
- if (rc)
- return rc;
- cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
- cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
- register_memory_notifier(&cxlr->memory_notifier);
+ cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
+ cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
+ register_node_notifier(&cxlr->node_notifier);
cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
cxlr->adist_notifier.priority = 100;
@@ -3537,8 +3596,18 @@ out:
switch (cxlr->mode) {
case CXL_PARTMODE_PMEM:
+ rc = devm_cxl_region_edac_register(cxlr);
+ if (rc)
+ dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
+ cxlr->id);
+
return devm_cxl_add_pmem_region(cxlr);
case CXL_PARTMODE_RAM:
+ rc = devm_cxl_region_edac_register(cxlr);
+ if (rc)
+ dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
+ cxlr->id);
+
/*
* The region can not be manged by CXL if any portion of
* it is already online as 'System RAM'
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 25ebfbc1616c..a53ec4798b12 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -214,12 +214,16 @@ TRACE_EVENT(cxl_overflow,
#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4)
#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5)
#define CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID BIT(6)
+#define CXL_EVENT_RECORD_FLAG_LD_ID_VALID BIT(7)
+#define CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID BIT(8)
#define show_hdr_flags(flags) __print_flags(flags, " | ", \
{ CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \
{ CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \
{ CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \
{ CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" }, \
- { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" } \
+ { CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" }, \
+ { CXL_EVENT_RECORD_FLAG_LD_ID_VALID, "LD_ID_VALID" }, \
+ { CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID, "HEAD_ID_VALID" } \
)
/*
@@ -247,7 +251,9 @@ TRACE_EVENT(cxl_overflow,
__field(u64, hdr_timestamp) \
__field(u8, hdr_length) \
__field(u8, hdr_maint_op_class) \
- __field(u8, hdr_maint_op_sub_class)
+ __field(u8, hdr_maint_op_sub_class) \
+ __field(u16, hdr_ld_id) \
+ __field(u8, hdr_head_id)
#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \
__assign_str(memdev); \
@@ -260,18 +266,22 @@ TRACE_EVENT(cxl_overflow,
__entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \
__entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \
__entry->hdr_maint_op_class = (hdr).maint_op_class; \
- __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class
+ __entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class; \
+ __entry->hdr_ld_id = le16_to_cpu((hdr).ld_id); \
+ __entry->hdr_head_id = (hdr).head_id
#define CXL_EVT_TP_printk(fmt, ...) \
TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \
"len=%d flags='%s' handle=%x related_handle=%x " \
- "maint_op_class=%u maint_op_sub_class=%u : " fmt, \
+ "maint_op_class=%u maint_op_sub_class=%u " \
+ "ld_id=%x head_id=%x : " fmt, \
__get_str(memdev), __get_str(host), __entry->serial, \
cxl_event_log_type_str(__entry->log), \
__entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\
show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \
__entry->hdr_related_handle, __entry->hdr_maint_op_class, \
__entry->hdr_maint_op_sub_class, \
+ __entry->hdr_ld_id, __entry->hdr_head_id, \
##__VA_ARGS__)
TRACE_EVENT(cxl_generic_event,
@@ -496,7 +506,10 @@ TRACE_EVENT(cxl_general_media,
uuid_copy(&__entry->region_uuid, &uuid_null);
}
__entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags;
- __entry->cme_count = get_unaligned_le24(rec->cme_count);
+ if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
+ __entry->cme_count = get_unaligned_le24(rec->cme_count);
+ else
+ __entry->cme_count = 0;
),
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
@@ -648,7 +661,10 @@ TRACE_EVENT(cxl_dram,
CXL_EVENT_GEN_MED_COMP_ID_SIZE);
__entry->sub_channel = rec->sub_channel;
__entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags;
- __entry->cvme_count = get_unaligned_le24(rec->cvme_count);
+ if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
+ __entry->cvme_count = get_unaligned_le24(rec->cvme_count);
+ else
+ __entry->cvme_count = 0;
),
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' sub_type='%s' " \
@@ -871,6 +887,111 @@ TRACE_EVENT(cxl_memory_module,
)
);
+/*
+ * Memory Sparing Event Record - MSER
+ *
+ * CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60
+ */
+#define CXL_MSER_QUERY_RESOURCE_FLAG BIT(0)
+#define CXL_MSER_HARD_SPARING_FLAG BIT(1)
+#define CXL_MSER_DEV_INITED_FLAG BIT(2)
+#define show_mem_sparing_flags(flags) __print_flags(flags, "|", \
+ { CXL_MSER_QUERY_RESOURCE_FLAG, "Query Resources" }, \
+ { CXL_MSER_HARD_SPARING_FLAG, "Hard Sparing" }, \
+ { CXL_MSER_DEV_INITED_FLAG, "Device Initiated Sparing" } \
+)
+
+#define CXL_MSER_VALID_CHANNEL BIT(0)
+#define CXL_MSER_VALID_RANK BIT(1)
+#define CXL_MSER_VALID_NIBBLE BIT(2)
+#define CXL_MSER_VALID_BANK_GROUP BIT(3)
+#define CXL_MSER_VALID_BANK BIT(4)
+#define CXL_MSER_VALID_ROW BIT(5)
+#define CXL_MSER_VALID_COLUMN BIT(6)
+#define CXL_MSER_VALID_COMPONENT_ID BIT(7)
+#define CXL_MSER_VALID_COMPONENT_ID_FORMAT BIT(8)
+#define CXL_MSER_VALID_SUB_CHANNEL BIT(9)
+#define show_mem_sparing_valid_flags(flags) __print_flags(flags, "|", \
+ { CXL_MSER_VALID_CHANNEL, "CHANNEL" }, \
+ { CXL_MSER_VALID_RANK, "RANK" }, \
+ { CXL_MSER_VALID_NIBBLE, "NIBBLE" }, \
+ { CXL_MSER_VALID_BANK_GROUP, "BANK GROUP" }, \
+ { CXL_MSER_VALID_BANK, "BANK" }, \
+ { CXL_MSER_VALID_ROW, "ROW" }, \
+ { CXL_MSER_VALID_COLUMN, "COLUMN" }, \
+ { CXL_MSER_VALID_COMPONENT_ID, "COMPONENT ID" }, \
+ { CXL_MSER_VALID_COMPONENT_ID_FORMAT, "COMPONENT ID PLDM FORMAT" }, \
+ { CXL_MSER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \
+)
+
+TRACE_EVENT(cxl_memory_sparing,
+
+ TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
+ struct cxl_event_mem_sparing *rec),
+
+ TP_ARGS(cxlmd, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+
+ /* Memory Sparing Event */
+ __field(u8, flags)
+ __field(u8, result)
+ __field(u16, validity_flags)
+ __field(u16, res_avail)
+ __field(u8, channel)
+ __field(u8, rank)
+ __field(u32, nibble_mask)
+ __field(u8, bank_group)
+ __field(u8, bank)
+ __field(u32, row)
+ __field(u16, column)
+ __field(u8, sub_channel)
+ __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
+ __entry->hdr_uuid = CXL_EVENT_MEM_SPARING_UUID;
+
+ /* Memory Sparing Event */
+ __entry->flags = rec->flags;
+ __entry->result = rec->result;
+ __entry->validity_flags = le16_to_cpu(rec->validity_flags);
+ __entry->res_avail = le16_to_cpu(rec->res_avail);
+ __entry->channel = rec->channel;
+ __entry->rank = rec->rank;
+ __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask);
+ __entry->bank_group = rec->bank_group;
+ __entry->bank = rec->bank;
+ __entry->row = get_unaligned_le24(rec->row);
+ __entry->column = le16_to_cpu(rec->column);
+ __entry->sub_channel = rec->sub_channel;
+ memcpy(__entry->comp_id, &rec->component_id,
+ CXL_EVENT_GEN_MED_COMP_ID_SIZE);
+ ),
+
+ CXL_EVT_TP_printk("flags='%s' result=%u validity_flags='%s' " \
+ "spare resource avail=%u channel=%u rank=%u " \
+ "nibble_mask=%x bank_group=%u bank=%u " \
+ "row=%u column=%u sub_channel=%u " \
+ "comp_id=%s comp_id_pldm_valid_flags='%s' " \
+ "pldm_entity_id=%s pldm_resource_id=%s",
+ show_mem_sparing_flags(__entry->flags),
+ __entry->result,
+ show_mem_sparing_valid_flags(__entry->validity_flags),
+ __entry->res_avail, __entry->channel, __entry->rank,
+ __entry->nibble_mask, __entry->bank_group, __entry->bank,
+ __entry->row, __entry->column, __entry->sub_channel,
+ __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
+ show_comp_id_pldm_flags(__entry->comp_id[0]),
+ show_pldm_entity_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID,
+ CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
+ show_pldm_resource_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID,
+ CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id)
+ )
+);
+
#define show_poison_trace_type(type) \
__print_symbolic(type, \
{ CXL_POISON_TRACE_LIST, "List" }, \