diff options
Diffstat (limited to 'drivers/cxl/core/region.c')
-rw-r--r-- | drivers/cxl/core/region.c | 710 |
1 files changed, 577 insertions, 133 deletions
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 4c7fd2d5cccb..e8d11a988fd9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4,10 +4,12 @@ #include <linux/genalloc.h> #include <linux/device.h> #include <linux/module.h> +#include <linux/memory.h> #include <linux/slab.h> #include <linux/uuid.h> #include <linux/sort.h> #include <linux/idr.h> +#include <linux/memory-tiers.h> #include <cxlmem.h> #include <cxl.h> #include "core.h" @@ -30,6 +32,108 @@ static struct cxl_region *to_cxl_region(struct device *dev); +#define __ACCESS_ATTR_RO(_level, _name) { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _name##_access##_level##_show, \ +} + +#define ACCESS_DEVICE_ATTR_RO(level, name) \ + struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name) + +#define ACCESS_ATTR_RO(level, attrib) \ +static ssize_t attrib##_access##level##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (cxlr->coord[level].attrib == 0) \ + return -ENOENT; \ + \ + return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \ +} \ +static ACCESS_DEVICE_ATTR_RO(level, attrib) + +ACCESS_ATTR_RO(0, read_bandwidth); +ACCESS_ATTR_RO(0, read_latency); +ACCESS_ATTR_RO(0, write_bandwidth); +ACCESS_ATTR_RO(0, write_latency); + +#define ACCESS_ATTR_DECLARE(level, attrib) \ + (&dev_attr_access##level##_##attrib.attr) + +static struct attribute *access0_coordinate_attrs[] = { + ACCESS_ATTR_DECLARE(0, read_bandwidth), + ACCESS_ATTR_DECLARE(0, write_bandwidth), + ACCESS_ATTR_DECLARE(0, read_latency), + ACCESS_ATTR_DECLARE(0, write_latency), + NULL +}; + +ACCESS_ATTR_RO(1, read_bandwidth); +ACCESS_ATTR_RO(1, read_latency); +ACCESS_ATTR_RO(1, write_bandwidth); +ACCESS_ATTR_RO(1, write_latency); + +static struct attribute *access1_coordinate_attrs[] = { + ACCESS_ATTR_DECLARE(1, read_bandwidth), + ACCESS_ATTR_DECLARE(1, write_bandwidth), + ACCESS_ATTR_DECLARE(1, read_latency), + ACCESS_ATTR_DECLARE(1, write_latency), + NULL +}; + +#define ACCESS_VISIBLE(level) \ +static umode_t cxl_region_access##level##_coordinate_visible( \ + struct kobject *kobj, struct attribute *a, int n) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (a == &dev_attr_access##level##_read_latency.attr && \ + cxlr->coord[level].read_latency == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_write_latency.attr && \ + cxlr->coord[level].write_latency == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_read_bandwidth.attr && \ + cxlr->coord[level].read_bandwidth == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_write_bandwidth.attr && \ + cxlr->coord[level].write_bandwidth == 0) \ + return 0; \ + \ + return a->mode; \ +} + +ACCESS_VISIBLE(0); +ACCESS_VISIBLE(1); + +static const struct attribute_group cxl_region_access0_coordinate_group = { + .name = "access0", + .attrs = access0_coordinate_attrs, + .is_visible = cxl_region_access0_coordinate_visible, +}; + +static const struct attribute_group *get_cxl_region_access0_group(void) +{ + return &cxl_region_access0_coordinate_group; +} + +static const struct attribute_group cxl_region_access1_coordinate_group = { + .name = "access1", + .attrs = access1_coordinate_attrs, + .is_visible = cxl_region_access1_coordinate_visible, +}; + +static const struct attribute_group *get_cxl_region_access1_group(void) +{ + return &cxl_region_access1_coordinate_group; +} + static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -128,8 +232,8 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); return 0; } else { - dev_err(&cxlr->dev, - "Failed to synchronize CPU cache state\n"); + dev_WARN(&cxlr->dev, + "Failed to synchronize CPU cache state\n"); return -ENXIO; } } @@ -138,19 +242,17 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) return 0; } -static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) +static void cxl_region_decode_reset(struct cxl_region *cxlr, int count) { struct cxl_region_params *p = &cxlr->params; - int i, rc = 0; + int i; /* - * Before region teardown attempt to flush, and if the flush - * fails cancel the region teardown for data consistency - * concerns + * Before region teardown attempt to flush, evict any data cached for + * this region, or scream loudly about missing arch / platform support + * for CXL teardown. */ - rc = cxl_region_invalidate_memregion(cxlr); - if (rc) - return rc; + cxl_region_invalidate_memregion(cxlr); for (i = count - 1; i >= 0; i--) { struct cxl_endpoint_decoder *cxled = p->targets[i]; @@ -173,23 +275,17 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count) cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; if (cxld->reset) - rc = cxld->reset(cxld); - if (rc) - return rc; + cxld->reset(cxld); set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } endpoint_reset: - rc = cxled->cxld.reset(&cxled->cxld); - if (rc) - return rc; + cxled->cxld.reset(&cxled->cxld); set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); } /* all decoders associated with this region have been torn down */ clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); - - return 0; } static int commit_decoder(struct cxl_decoder *cxld) @@ -305,16 +401,8 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, * still pending. */ if (p->state == CXL_CONFIG_RESET_PENDING) { - rc = cxl_region_decode_reset(cxlr, p->interleave_ways); - /* - * Revert to committed since there may still be active - * decoders associated with this region, or move forward - * to active to mark the reset successful - */ - if (rc) - p->state = CXL_CONFIG_COMMIT; - else - p->state = CXL_CONFIG_ACTIVE; + cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; } } @@ -690,31 +778,55 @@ out: return rc; } -static int match_free_decoder(struct device *dev, void *data) +static int check_commit_order(struct device *dev, void *data) { + struct cxl_decoder *cxld = to_cxl_decoder(dev); + + /* + * if port->commit_end is not the only free decoder, then out of + * order shutdown has occurred, block further allocations until + * that is resolved + */ + if (((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) + return -EBUSY; + return 0; +} + +static int match_free_decoder(struct device *dev, const void *data) +{ + struct cxl_port *port = to_cxl_port(dev->parent); struct cxl_decoder *cxld; - int *id = data; + int rc; if (!is_switch_decoder(dev)) return 0; cxld = to_cxl_decoder(dev); - /* enforce ordered allocation */ - if (cxld->id != *id) + if (cxld->id != port->commit_end + 1) return 0; - if (!cxld->region) - return 1; - - (*id)++; + if (cxld->region) { + dev_dbg(dev->parent, + "next decoder to commit (%s) is already reserved (%s)\n", + dev_name(dev), dev_name(&cxld->region->dev)); + return 0; + } - return 0; + rc = device_for_each_child_reverse_from(dev->parent, dev, NULL, + check_commit_order); + if (rc) { + dev_dbg(dev->parent, + "unable to allocate %s due to out of order shutdown\n", + dev_name(dev)); + return 0; + } + return 1; } -static int match_auto_decoder(struct device *dev, void *data) +static int match_auto_decoder(struct device *dev, const void *data) { - struct cxl_region_params *p = data; + const struct cxl_region_params *p = data; struct cxl_decoder *cxld; struct range *r; @@ -736,7 +848,6 @@ cxl_region_find_decoder(struct cxl_port *port, struct cxl_region *cxlr) { struct device *dev; - int id = 0; if (port == cxled_to_port(cxled)) return &cxled->cxld; @@ -745,7 +856,7 @@ cxl_region_find_decoder(struct cxl_port *port, dev = device_find_child(&port->dev, &cxlr->params, match_auto_decoder); else - dev = device_find_child(&port->dev, &id, match_free_decoder); + dev = device_find_child(&port->dev, NULL, match_free_decoder); if (!dev) return NULL; /* @@ -998,6 +1109,26 @@ static int cxl_port_attach_region(struct cxl_port *port, } cxld = cxl_rr->decoder; + /* + * the number of targets should not exceed the target_count + * of the decoder + */ + if (is_switch_decoder(&cxld->dev)) { + struct cxl_switch_decoder *cxlsd; + + cxlsd = to_cxl_switch_decoder(&cxld->dev); + if (cxl_rr->nr_targets > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s %s add: %s:%s @ %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), dev_name(&cxlmd->dev), + dev_name(&cxled->cxld.dev), pos, + cxlsd->nr_targets); + rc = -ENXIO; + goto out_erase; + } + } + rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { dev_dbg(&cxlr->dev, @@ -1107,6 +1238,50 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled, return 0; } +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + unsigned int interleave_mask; + u8 eiw; + u16 eig; + int high_pos, low_pos; + + if (!test_bit(iw, &cxlhdm->iw_cap_mask)) + return -ENXIO; + /* + * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection), + * if eiw < 8: + * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw] + * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0] + * + * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the + * interleave bits are none. + * + * if eiw >= 8: + * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3 + * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0] + * + * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the + * interleave bits are none. + */ + ways_to_eiw(iw, &eiw); + if (eiw == 0 || eiw == 8) + return 0; + + granularity_to_eig(ig, &eig); + if (eiw > 8) + high_pos = eiw + eig - 1; + else + high_pos = eiw + eig + 7; + low_pos = eig + 8; + interleave_mask = GENMASK(high_pos, low_pos); + if (interleave_mask & ~cxlhdm->interleave_mask) + return -ENXIO; + + return 0; +} + static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) @@ -1120,6 +1295,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region_params *p = &cxlr->params; struct cxl_decoder *cxld = cxl_rr->decoder; struct cxl_switch_decoder *cxlsd; + struct cxl_port *iter = port; u16 eig, peig; u8 eiw, peiw; @@ -1136,16 +1312,26 @@ static int cxl_port_setup_targets(struct cxl_port *port, cxlsd = to_cxl_switch_decoder(&cxld->dev); if (cxl_rr->nr_targets_set) { - int i, distance; + int i, distance = 1; + struct cxl_region_ref *cxl_rr_iter; /* - * Passthrough decoders impose no distance requirements between - * peers + * The "distance" between peer downstream ports represents which + * endpoint positions in the region interleave a given port can + * host. + * + * For example, at the root of a hierarchy the distance is + * always 1 as every index targets a different host-bridge. At + * each subsequent switch level those ports map every Nth region + * position where N is the width of the switch == distance. */ - if (cxl_rr->nr_targets == 1) - distance = 0; - else - distance = p->nr_targets / cxl_rr->nr_targets; + do { + cxl_rr_iter = cxl_rr_load(iter, cxlr); + distance *= cxl_rr_iter->nr_targets; + iter = to_cxl_port(iter->dev.parent); + } while (!is_cxl_root(iter)); + distance *= cxlrd->cxlsd.cxld.interleave_ways; + for (i = 0; i < cxl_rr->nr_targets_set; i++) if (ep->dport == cxlsd->target[i]) { rc = check_last_peer(cxled, ep, cxl_rr, @@ -1257,6 +1443,15 @@ static int cxl_port_setup_targets(struct cxl_port *port, return -ENXIO; } } else { + rc = check_interleave_cap(cxld, iw, ig); + if (rc) { + dev_dbg(&cxlr->dev, + "%s:%s iw: %d ig: %d is not supported\n", + dev_name(port->uport_dev), + dev_name(&port->dev), iw, ig); + return rc; + } + cxld->interleave_ways = iw; cxld->interleave_granularity = ig; cxld->hpa_range = (struct range) { @@ -1456,10 +1651,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr, const struct cxl_dport *dport, int pos) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxl_decoder *cxld = &cxlsd->cxld; + int iw = cxld->interleave_ways; struct cxl_port *iter; int rc; - if (cxlrd->calc_hb(cxlrd, pos) != dport) { + if (dport != cxlrd->cxlsd.target[pos % iw]) { dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), dev_name(&cxlrd->cxlsd.cxld.dev)); @@ -1535,10 +1733,12 @@ static struct cxl_port *next_port(struct cxl_port *port) return port->parent_dport->port; } -static int match_switch_decoder_by_range(struct device *dev, void *data) +static int match_switch_decoder_by_range(struct device *dev, + const void *data) { struct cxl_switch_decoder *cxlsd; - struct range *r1, *r2 = data; + const struct range *r1, *r2 = data; + if (!is_switch_decoder(dev)) return 0; @@ -1693,6 +1893,15 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_dport *dport; int rc = -ENXIO; + rc = check_interleave_cap(&cxled->cxld, p->interleave_ways, + p->interleave_granularity); + if (rc) { + dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n", + dev_name(&cxled->cxld.dev), p->interleave_ways, + p->interleave_granularity); + return rc; + } + if (cxled->mode != cxlr->mode) { dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); @@ -1752,6 +1961,8 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } + cxl_region_perf_data_calculate(cxlr, cxled); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { int i; @@ -1792,6 +2003,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, * then the region is already committed. */ p->state = CXL_CONFIG_COMMIT; + cxl_region_shared_upstream_bandwidth_update(cxlr); return 0; } @@ -1813,6 +2025,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (rc) return rc; p->state = CXL_CONFIG_ACTIVE; + cxl_region_shared_upstream_bandwidth_update(cxlr); } cxled->cxld.interleave_ways = p->interleave_ways; @@ -1861,13 +2074,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) get_device(&cxlr->dev); if (p->state > CXL_CONFIG_ACTIVE) { - /* - * TODO: tear down all impacted regions if a device is - * removed out of order - */ - rc = cxl_region_decode_reset(cxlr, p->interleave_ways); - if (rc) - goto out; + cxl_region_decode_reset(cxlr, p->interleave_ways); p->state = CXL_CONFIG_ACTIVE; } @@ -2067,6 +2274,8 @@ static const struct attribute_group *region_groups[] = { &cxl_base_attribute_group, &cxl_region_group, &cxl_region_target_group, + &cxl_region_access0_coordinate_group, + &cxl_region_access1_coordinate_group, NULL, }; @@ -2103,7 +2312,7 @@ bool is_cxl_region(struct device *dev) { return dev->type == &cxl_region_type; } -EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL); +EXPORT_SYMBOL_NS_GPL(is_cxl_region, "CXL"); static struct cxl_region *to_cxl_region(struct device *dev) { @@ -2164,6 +2373,89 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i return cxlr; } +static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) +{ + int cset = 0; + int rc; + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + if (cxlr->coord[i].read_bandwidth) { + rc = 0; + if (cxl_need_node_perf_attrs_update(nid)) + node_set_perf_attrs(nid, &cxlr->coord[i], i); + else + rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); + + if (rc == 0) + cset++; + } + } + + if (!cset) + return false; + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group()); + if (rc) + dev_dbg(&cxlr->dev, "Failed to update access0 group\n"); + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group()); + if (rc) + dev_dbg(&cxlr->dev, "Failed to update access1 group\n"); + + return true; +} + +static int cxl_region_perf_attrs_callback(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + memory_notifier); + struct memory_notify *mnb = arg; + int nid = mnb->status_change_nid; + int region_nid; + + if (nid == NUMA_NO_NODE || action != MEM_ONLINE) + return NOTIFY_DONE; + + /* + * No need to hold cxl_region_rwsem; region parameters are stable + * within the cxl_region driver. + */ + region_nid = phys_to_target_node(cxlr->params.res->start); + if (nid != region_nid) + return NOTIFY_DONE; + + if (!cxl_region_update_coordinates(cxlr, nid)) + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cxl_region_calculate_adistance(struct notifier_block *nb, + unsigned long nid, void *data) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + adist_notifier); + struct access_coordinate *perf; + int *adist = data; + int region_nid; + + /* + * No need to hold cxl_region_rwsem; region parameters are stable + * within the cxl_region driver. + */ + region_nid = phys_to_target_node(cxlr->params.res->start); + if (nid != region_nid) + return NOTIFY_OK; + + perf = &cxlr->coord[ACCESS_COORDINATE_CPU]; + + if (mt_perf_to_adistance(perf, adist)) + return NOTIFY_OK; + + return NOTIFY_STOP; +} + /** * devm_cxl_add_region - Adds a region to a decoder * @cxlrd: root decoder @@ -2187,15 +2479,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, struct device *dev; int rc; - switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: - break; - default: - dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); - return ERR_PTR(-EINVAL); - } - cxlr = cxl_region_alloc(cxlrd, id); if (IS_ERR(cxlr)) return cxlr; @@ -2246,6 +2529,15 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, { int rc; + switch (mode) { + case CXL_DECODER_RAM: + case CXL_DECODER_PMEM: + break; + default: + dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); + return ERR_PTR(-EINVAL); + } + rc = memregion_alloc(GFP_KERNEL); if (rc < 0) return ERR_PTR(rc); @@ -2258,9 +2550,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); } -static ssize_t create_pmem_region_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t create_region_store(struct device *dev, const char *buf, + size_t len, enum cxl_decoder_mode mode) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; @@ -2270,31 +2561,26 @@ static ssize_t create_pmem_region_store(struct device *dev, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id); + cxlr = __create_region(cxlrd, mode, id); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); return len; } + +static ssize_t create_pmem_region_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return create_region_store(dev, buf, len, CXL_DECODER_PMEM); +} DEVICE_ATTR_RW(create_pmem_region); static ssize_t create_ram_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); - struct cxl_region *cxlr; - int rc, id; - - rc = sscanf(buf, "region%d\n", &id); - if (rc != 1) - return -EINVAL; - - cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id); - if (IS_ERR(cxlr)) - return PTR_ERR(cxlr); - - return len; + return create_region_store(dev, buf, len, CXL_DECODER_RAM); } DEVICE_ATTR_RW(create_ram_region); @@ -2379,7 +2665,7 @@ bool is_cxl_pmem_region(struct device *dev) { return dev->type == &cxl_pmem_region_type; } -EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL); +EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, "CXL"); struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) { @@ -2388,7 +2674,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) return NULL; return container_of(dev, struct cxl_pmem_region, dev); } -EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL); +EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL"); struct cxl_poison_context { struct cxl_port *port; @@ -2510,28 +2796,167 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) return rc; } +struct cxl_dpa_to_region_context { + struct cxl_region *cxlr; + u64 dpa; +}; + +static int __cxl_dpa_to_region(struct device *dev, void *arg) +{ + struct cxl_dpa_to_region_context *ctx = arg; + struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; + u64 dpa = ctx->dpa; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) + return 0; + + if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) + return 0; + + /* + * Stop the region search (return 1) when an endpoint mapping is + * found. The region may not be fully constructed so offering + * the cxlr in the context structure is not guaranteed. + */ + cxlr = cxled->cxld.region; + if (cxlr) + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, + dev_name(&cxlr->dev)); + else + dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa, + dev_name(dev)); + + ctx->cxlr = cxlr; + + return 1; +} + +struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) +{ + struct cxl_dpa_to_region_context ctx; + struct cxl_port *port; + + ctx = (struct cxl_dpa_to_region_context) { + .dpa = dpa, + }; + port = cxlmd->endpoint; + if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) + device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); + + return ctx.cxlr; +} + +static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) +{ + struct cxl_region_params *p = &cxlr->params; + int gran = p->interleave_granularity; + int ways = p->interleave_ways; + u64 offset; + + /* Is the hpa in an expected chunk for its pos(-ition) */ + offset = hpa - p->res->start; + offset = do_div(offset, gran * ways); + if ((offset >= pos * gran) && (offset < (pos + 1) * gran)) + return true; + + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa); + + return false; +} + +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = NULL; + u16 eig = 0; + u8 eiw = 0; + int pos; + + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxlmd == cxled_to_memdev(cxled)) + break; + } + if (!cxled || cxlmd != cxled_to_memdev(cxled)) + return ULLONG_MAX; + + pos = cxled->pos; + ways_to_eiw(p->interleave_ways, &eiw); + granularity_to_eig(p->interleave_granularity, &eig); + + /* + * The device position in the region interleave set was removed + * from the offset at HPA->DPA translation. To reconstruct the + * HPA, place the 'pos' in the offset. + * + * The placement of 'pos' in the HPA is determined by interleave + * ways and granularity and is defined in the CXL Spec 3.0 Section + * 8.2.4.19.13 Implementation Note: Device Decode Logic + */ + + /* Remove the dpa base */ + dpa_offset = dpa - cxl_dpa_resource_start(cxled); + + mask_upper = GENMASK_ULL(51, eig + 8); + + if (eiw < 8) { + hpa_offset = (dpa_offset & mask_upper) << eiw; + hpa_offset |= pos << (eig + 8); + } else { + bits_upper = (dpa_offset & mask_upper) >> (eig + 8); + bits_upper = bits_upper * 3; + hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8); + } + + /* The lower bits remain unchanged */ + hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); + + /* Apply the hpa_offset to the region base address */ + hpa = hpa_offset + p->res->start; + + /* Root decoder translation overrides typical modulo decode */ + if (cxlrd->hpa_to_spa) + hpa = cxlrd->hpa_to_spa(cxlrd, hpa); + + if (hpa < p->res->start || hpa > p->res->end) { + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in region\n", hpa); + return ULLONG_MAX; + } + + /* Simple chunk check, by pos & gran, only applies to modulo decodes */ + if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) + return ULLONG_MAX; + + return hpa; +} + static struct lock_class_key cxl_pmem_region_key; -static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) +static int cxl_pmem_region_alloc(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; struct cxl_nvdimm_bridge *cxl_nvb; - struct cxl_pmem_region *cxlr_pmem; struct device *dev; int i; - down_read(&cxl_region_rwsem); - if (p->state != CXL_CONFIG_COMMIT) { - cxlr_pmem = ERR_PTR(-ENXIO); - goto out; - } + guard(rwsem_read)(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) + return -ENXIO; - cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), - GFP_KERNEL); - if (!cxlr_pmem) { - cxlr_pmem = ERR_PTR(-ENOMEM); - goto out; - } + struct cxl_pmem_region *cxlr_pmem __free(kfree) = + kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets), GFP_KERNEL); + if (!cxlr_pmem) + return -ENOMEM; cxlr_pmem->hpa_range.start = p->res->start; cxlr_pmem->hpa_range.end = p->res->end; @@ -2548,11 +2973,9 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) * bridge for one device is the same for all. */ if (i == 0) { - cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); - if (!cxl_nvb) { - cxlr_pmem = ERR_PTR(-ENODEV); - goto out; - } + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint); + if (!cxl_nvb) + return -ENODEV; cxlr->cxl_nvb = cxl_nvb; } m->cxlmd = cxlmd; @@ -2563,18 +2986,16 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) } dev = &cxlr_pmem->dev; - cxlr_pmem->cxlr = cxlr; - cxlr->cxlr_pmem = cxlr_pmem; device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_pmem_region_key); device_set_pm_not_required(dev); dev->parent = &cxlr->dev; dev->bus = &cxl_bus_type; dev->type = &cxl_pmem_region_type; -out: - up_read(&cxl_region_rwsem); + cxlr_pmem->cxlr = cxlr; + cxlr->cxlr_pmem = no_free_ptr(cxlr_pmem); - return cxlr_pmem; + return 0; } static void cxl_dax_region_release(struct device *dev) @@ -2607,7 +3028,7 @@ struct cxl_dax_region *to_cxl_dax_region(struct device *dev) return NULL; return container_of(dev, struct cxl_dax_region, dev); } -EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); +EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL"); static struct lock_class_key cxl_dax_region_key; @@ -2669,11 +3090,11 @@ static void cxlr_release_nvdimm(void *_cxlr) struct cxl_region *cxlr = _cxlr; struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; - device_lock(&cxl_nvb->dev); - if (cxlr->cxlr_pmem) - devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, - cxlr->cxlr_pmem); - device_unlock(&cxl_nvb->dev); + scoped_guard(device, &cxl_nvb->dev) { + if (cxlr->cxlr_pmem) + devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, + cxlr->cxlr_pmem); + } cxlr->cxl_nvb = NULL; put_device(&cxl_nvb->dev); } @@ -2691,9 +3112,10 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) struct device *dev; int rc; - cxlr_pmem = cxl_pmem_region_alloc(cxlr); - if (IS_ERR(cxlr_pmem)) - return PTR_ERR(cxlr_pmem); + rc = cxl_pmem_region_alloc(cxlr); + if (rc) + return rc; + cxlr_pmem = cxlr->cxlr_pmem; cxl_nvb = cxlr->cxl_nvb; dev = &cxlr_pmem->dev; @@ -2708,13 +3130,14 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), dev_name(dev)); - device_lock(&cxl_nvb->dev); - if (cxl_nvb->dev.driver) - rc = devm_add_action_or_reset(&cxl_nvb->dev, - cxlr_pmem_unregister, cxlr_pmem); - else - rc = -ENXIO; - device_unlock(&cxl_nvb->dev); + scoped_guard(device, &cxl_nvb->dev) { + if (cxl_nvb->dev.driver) + rc = devm_add_action_or_reset(&cxl_nvb->dev, + cxlr_pmem_unregister, + cxlr_pmem); + else + rc = -ENXIO; + } if (rc) goto err_bridge; @@ -2766,9 +3189,10 @@ err: return rc; } -static int match_root_decoder_by_range(struct device *dev, void *data) +static int match_root_decoder_by_range(struct device *dev, + const void *data) { - struct range *r1, *r2 = data; + const struct range *r1, *r2 = data; struct cxl_root_decoder *cxlrd; if (!is_root_decoder(dev)) @@ -2779,11 +3203,11 @@ static int match_root_decoder_by_range(struct device *dev, void *data) return range_contains(r1, r2); } -static int match_region_by_range(struct device *dev, void *data) +static int match_region_by_range(struct device *dev, const void *data) { struct cxl_region_params *p; struct cxl_region *cxlr; - struct range *r = data; + const struct range *r = data; int rc = 0; if (!is_cxl_region(dev)) @@ -2949,7 +3373,7 @@ out: put_device(cxlrd_dev); return rc; } -EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL"); static int is_system_ram(struct resource *res, void *arg) { @@ -2960,6 +3384,14 @@ static int is_system_ram(struct resource *res, void *arg) return 1; } +static void shutdown_notifiers(void *_cxlr) +{ + struct cxl_region *cxlr = _cxlr; + + unregister_memory_notifier(&cxlr->memory_notifier); + unregister_mt_adistance_algorithm(&cxlr->adist_notifier); +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -2995,6 +3427,18 @@ out: if (rc) return rc; + cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; + cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; + register_memory_notifier(&cxlr->memory_notifier); + + cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance; + cxlr->adist_notifier.priority = 100; + register_mt_adistance_algorithm(&cxlr->adist_notifier); + + rc = devm_add_action_or_reset(&cxlr->dev, shutdown_notifiers, cxlr); + if (rc) + return rc; + switch (cxlr->mode) { case CXL_DECODER_PMEM: return devm_cxl_add_pmem_region(cxlr); @@ -3032,6 +3476,6 @@ void cxl_region_exit(void) cxl_driver_unregister(&cxl_region_driver); } -MODULE_IMPORT_NS(CXL); -MODULE_IMPORT_NS(DEVMEM); +MODULE_IMPORT_NS("CXL"); +MODULE_IMPORT_NS("DEVMEM"); MODULE_ALIAS_CXL(CXL_DEVICE_REGION); |