summaryrefslogtreecommitdiff
path: root/drivers/dax/kmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dax/kmem.c')
-rw-r--r--drivers/dax/kmem.c118
1 files changed, 103 insertions, 15 deletions
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index ac231cc36359..c036e4d0b610 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -5,15 +5,24 @@
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/device.h>
-#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/memory-tiers.h>
+#include <linux/memory_hotplug.h>
+#include <linux/string_helpers.h>
#include "dax-private.h"
#include "bus.h"
+/*
+ * Default abstract distance assigned to the NUMA node onlined
+ * by DAX/kmem if the low level platform driver didn't initialize
+ * one for this NUMA node.
+ */
+#define MEMTIER_DEFAULT_DAX_ADISTANCE (MEMTIER_ADISTANCE_DRAM * 5)
+
/* Memory resource name used for add_memory_driver_managed(). */
static const char *kmem_name;
/* Set if any memory will remain added when the driver will be unloaded. */
@@ -37,16 +46,35 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
struct dax_kmem_data {
const char *res_name;
+ int mgid;
struct resource *res[];
};
+static DEFINE_MUTEX(kmem_memory_type_lock);
+static LIST_HEAD(kmem_memory_types);
+
+static struct memory_dev_type *kmem_find_alloc_memory_type(int adist)
+{
+ guard(mutex)(&kmem_memory_type_lock);
+ return mt_find_alloc_memory_type(adist, &kmem_memory_types);
+}
+
+static void kmem_put_memory_types(void)
+{
+ guard(mutex)(&kmem_memory_type_lock);
+ mt_put_memory_types(&kmem_memory_types);
+}
+
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
+ unsigned long total_len = 0, orig_len = 0;
struct dax_kmem_data *data;
- int rc = -ENOMEM;
- int i, mapped = 0;
+ struct memory_dev_type *mtype;
+ int i, rc, mapped = 0;
+ mhp_t mhp_flags;
int numa_node;
+ int adist = MEMTIER_DEFAULT_DAX_ADISTANCE;
/*
* Ensure good NUMA information for the persistent memory.
@@ -61,24 +89,58 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
return -EINVAL;
}
+ mt_calc_adistance(numa_node, &adist);
+ mtype = kmem_find_alloc_memory_type(adist);
+ if (IS_ERR(mtype))
+ return PTR_ERR(mtype);
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct range range;
+
+ orig_len += range_len(&dev_dax->ranges[i].range);
+ rc = dax_kmem_range(dev_dax, i, &range);
+ if (rc) {
+ dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
+ i, range.start, range.end);
+ continue;
+ }
+ total_len += range_len(&range);
+ }
+
+ if (!total_len) {
+ dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
+ return -EINVAL;
+ } else if (total_len != orig_len) {
+ char buf[16];
+
+ string_get_size(orig_len - total_len, 1, STRING_UNITS_2,
+ buf, sizeof(buf));
+ dev_warn(dev, "DAX region truncated by %s due to alignment\n", buf);
+ }
+
+ init_node_memory_type(numa_node, mtype);
+
+ rc = -ENOMEM;
data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL);
if (!data)
- return -ENOMEM;
+ goto err_dax_kmem_data;
data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!data->res_name)
goto err_res_name;
+ rc = memory_group_register_static(numa_node, PFN_UP(total_len));
+ if (rc < 0)
+ goto err_reg_mgid;
+ data->mgid = rc;
+
for (i = 0; i < dev_dax->nr_range; i++) {
struct resource *res;
struct range range;
rc = dax_kmem_range(dev_dax, i, &range);
- if (rc) {
- dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
- i, range.start, range.end);
+ if (rc)
continue;
- }
/* Region is permanently reserved if hotremove fails. */
res = request_mem_region(range.start, range_len(&range), data->res_name);
@@ -104,17 +166,21 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
*/
res->flags = IORESOURCE_SYSTEM_RAM;
+ mhp_flags = MHP_NID_IS_MGID;
+ if (dev_dax->memmap_on_memory)
+ mhp_flags |= MHP_MEMMAP_ON_MEMORY;
+
/*
* Ensure that future kexec'd kernels will not treat
* this as RAM automatically.
*/
- rc = add_memory_driver_managed(numa_node, range.start,
- range_len(&range), kmem_name, MHP_NONE);
+ rc = add_memory_driver_managed(data->mgid, range.start,
+ range_len(&range), kmem_name, mhp_flags);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
i, range.start, range.end);
- release_resource(res);
+ remove_resource(res);
kfree(res);
data->res[i] = NULL;
if (mapped)
@@ -129,9 +195,13 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
return 0;
err_request_mem:
+ memory_group_unregister(data->mgid);
+err_reg_mgid:
kfree(data->res_name);
err_res_name:
kfree(data);
+err_dax_kmem_data:
+ clear_node_memory_type(numa_node, mtype);
return rc;
}
@@ -139,6 +209,7 @@ err_res_name:
static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
{
int i, success = 0;
+ int node = dev_dax->target_node;
struct device *dev = &dev_dax->dev;
struct dax_kmem_data *data = dev_get_drvdata(dev);
@@ -156,10 +227,9 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
if (rc)
continue;
- rc = remove_memory(dev_dax->target_node, range.start,
- range_len(&range));
+ rc = remove_memory(range.start, range_len(&range));
if (rc == 0) {
- release_resource(data->res[i]);
+ remove_resource(data->res[i]);
kfree(data->res[i]);
data->res[i] = NULL;
success++;
@@ -172,9 +242,18 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
}
if (success >= dev_dax->nr_range) {
+ memory_group_unregister(data->mgid);
kfree(data->res_name);
kfree(data);
dev_set_drvdata(dev, NULL);
+ /*
+ * Clear the memtype association on successful unplug.
+ * If not, we have memory blocks left which can be
+ * offlined/onlined later. We need to keep memory_dev_type
+ * for that. This implies this reference will be around
+ * till next reboot.
+ */
+ clear_node_memory_type(node, NULL);
}
}
#else
@@ -194,6 +273,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
static struct dax_device_driver device_dax_kmem_driver = {
.probe = dev_dax_kmem_probe,
.remove = dev_dax_kmem_remove,
+ .type = DAXDRV_KMEM_TYPE,
};
static int __init dax_kmem_init(void)
@@ -207,7 +287,13 @@ static int __init dax_kmem_init(void)
rc = dax_driver_register(&device_dax_kmem_driver);
if (rc)
- kfree_const(kmem_name);
+ goto error_dax_driver;
+
+ return rc;
+
+error_dax_driver:
+ kmem_put_memory_types();
+ kfree_const(kmem_name);
return rc;
}
@@ -216,9 +302,11 @@ static void __exit dax_kmem_exit(void)
dax_driver_unregister(&device_dax_kmem_driver);
if (!any_hotremove_failed)
kfree_const(kmem_name);
+ kmem_put_memory_types();
}
MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("KMEM DAX: map dax-devices as System-RAM");
MODULE_LICENSE("GPL v2");
module_init(dax_kmem_init);
module_exit(dax_kmem_exit);