summaryrefslogtreecommitdiff
path: root/drivers/dax
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dax')
-rw-r--r--drivers/dax/Kconfig2
-rw-r--r--drivers/dax/bus.c280
-rw-r--r--drivers/dax/bus.h1
-rw-r--r--drivers/dax/cxl.c4
-rw-r--r--drivers/dax/dax-private.h27
-rw-r--r--drivers/dax/device.c81
-rw-r--r--drivers/dax/hmem/hmem.c3
-rw-r--r--drivers/dax/kmem.c60
-rw-r--r--drivers/dax/pmem.c3
-rw-r--r--drivers/dax/pmem/Makefile7
-rw-r--r--drivers/dax/pmem/pmem.c10
-rw-r--r--drivers/dax/super.c28
12 files changed, 334 insertions, 172 deletions
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index a88744244149..d656e4c0eb84 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -30,7 +30,7 @@ config DEV_DAX_PMEM
config DEV_DAX_HMEM
tristate "HMEM DAX: direct access to 'specific purpose' memory"
depends on EFI_SOFT_RESERVE
- select NUMA_KEEP_MEMINFO if (NUMA && X86)
+ select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS
default DEV_DAX
help
EFI 2.8 platforms, and others, may advertise 'specific purpose'
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 0ee96e6fc426..fde29e0ad68b 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -12,6 +12,18 @@
static DEFINE_MUTEX(dax_bus_lock);
+/*
+ * All changes to the dax region configuration occur with this lock held
+ * for write.
+ */
+DECLARE_RWSEM(dax_region_rwsem);
+
+/*
+ * All changes to the dax device configuration occur with this lock held
+ * for write.
+ */
+DECLARE_RWSEM(dax_dev_rwsem);
+
#define DAX_NAME_LEN 30
struct dax_id {
struct list_head list;
@@ -27,12 +39,9 @@ static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
}
-static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
-{
- return container_of(drv, struct dax_device_driver, drv);
-}
+#define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv)
-static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
+static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv,
const char *dev_name)
{
struct dax_id *dax_id;
@@ -45,7 +54,7 @@ static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
return NULL;
}
-static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
+static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev)
{
int match;
@@ -56,7 +65,7 @@ static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
return match;
}
-static int dax_match_type(struct dax_device_driver *dax_drv, struct device *dev)
+static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev)
{
enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
struct dev_dax *dev_dax = to_dev_dax(dev);
@@ -103,7 +112,7 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf,
if (action == ID_ADD) {
dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
if (dax_id) {
- strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
+ strscpy(dax_id->dev_name, buf, DAX_NAME_LEN);
list_add(&dax_id->list, &dax_drv->ids);
} else
rc = -ENOMEM;
@@ -144,7 +153,7 @@ static struct attribute *dax_drv_attrs[] = {
};
ATTRIBUTE_GROUPS(dax_drv);
-static int dax_bus_match(struct device *dev, struct device_driver *drv);
+static int dax_bus_match(struct device *dev, const struct device_driver *drv);
/*
* Static dax regions are regions created by an external subsystem
@@ -180,7 +189,7 @@ static u64 dev_dax_size(struct dev_dax *dev_dax)
u64 size = 0;
int i;
- device_lock_assert(&dev_dax->dev);
+ lockdep_assert_held(&dax_dev_rwsem);
for (i = 0; i < dev_dax->nr_range; i++)
size += range_len(&dev_dax->ranges[i].range);
@@ -194,8 +203,15 @@ static int dax_bus_probe(struct device *dev)
struct dev_dax *dev_dax = to_dev_dax(dev);
struct dax_region *dax_region = dev_dax->region;
int rc;
+ u64 size;
- if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
+ rc = down_read_interruptible(&dax_dev_rwsem);
+ if (rc)
+ return rc;
+ size = dev_dax_size(dev_dax);
+ up_read(&dax_dev_rwsem);
+
+ if (size == 0 || dev_dax->id < 0)
return -ENXIO;
rc = dax_drv->probe(dev_dax);
@@ -222,7 +238,7 @@ static void dax_bus_remove(struct device *dev)
dax_drv->remove(dev_dax);
}
-static struct bus_type dax_bus_type = {
+static const struct bus_type dax_bus_type = {
.name = "dax",
.uevent = dax_bus_uevent,
.match = dax_bus_match,
@@ -231,9 +247,9 @@ static struct bus_type dax_bus_type = {
.drv_groups = dax_drv_groups,
};
-static int dax_bus_match(struct device *dev, struct device_driver *drv)
+static int dax_bus_match(struct device *dev, const struct device_driver *drv)
{
- struct dax_device_driver *dax_drv = to_dax_drv(drv);
+ const struct dax_device_driver *dax_drv = to_dax_drv(drv);
if (dax_match_id(dax_drv, dev))
return 1;
@@ -250,7 +266,7 @@ static ssize_t id_show(struct device *dev,
{
struct dax_region *dax_region = dev_get_drvdata(dev);
- return sprintf(buf, "%d\n", dax_region->id);
+ return sysfs_emit(buf, "%d\n", dax_region->id);
}
static DEVICE_ATTR_RO(id);
@@ -259,8 +275,8 @@ static ssize_t region_size_show(struct device *dev,
{
struct dax_region *dax_region = dev_get_drvdata(dev);
- return sprintf(buf, "%llu\n", (unsigned long long)
- resource_size(&dax_region->res));
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)resource_size(&dax_region->res));
}
static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
region_size_show, NULL);
@@ -270,7 +286,7 @@ static ssize_t region_align_show(struct device *dev,
{
struct dax_region *dax_region = dev_get_drvdata(dev);
- return sprintf(buf, "%u\n", dax_region->align);
+ return sysfs_emit(buf, "%u\n", dax_region->align);
}
static struct device_attribute dev_attr_region_align =
__ATTR(align, 0400, region_align_show, NULL);
@@ -283,7 +299,7 @@ static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
resource_size_t size = resource_size(&dax_region->res);
struct resource *res;
- device_lock_assert(dax_region->dev);
+ lockdep_assert_held(&dax_region_rwsem);
for_each_dax_region_resource(dax_region, res)
size -= resource_size(res);
@@ -295,12 +311,15 @@ static ssize_t available_size_show(struct device *dev,
{
struct dax_region *dax_region = dev_get_drvdata(dev);
unsigned long long size;
+ int rc;
- device_lock(dev);
+ rc = down_read_interruptible(&dax_region_rwsem);
+ if (rc)
+ return rc;
size = dax_region_avail_size(dax_region);
- device_unlock(dev);
+ up_read(&dax_region_rwsem);
- return sprintf(buf, "%llu\n", size);
+ return sysfs_emit(buf, "%llu\n", size);
}
static DEVICE_ATTR_RO(available_size);
@@ -314,10 +333,12 @@ static ssize_t seed_show(struct device *dev,
if (is_static(dax_region))
return -EINVAL;
- device_lock(dev);
+ rc = down_read_interruptible(&dax_region_rwsem);
+ if (rc)
+ return rc;
seed = dax_region->seed;
- rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
- device_unlock(dev);
+ rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : "");
+ up_read(&dax_region_rwsem);
return rc;
}
@@ -333,14 +354,18 @@ static ssize_t create_show(struct device *dev,
if (is_static(dax_region))
return -EINVAL;
- device_lock(dev);
+ rc = down_read_interruptible(&dax_region_rwsem);
+ if (rc)
+ return rc;
youngest = dax_region->youngest;
- rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
- device_unlock(dev);
+ rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : "");
+ up_read(&dax_region_rwsem);
return rc;
}
+static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data);
+
static ssize_t create_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
@@ -358,7 +383,9 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr,
if (val != 1)
return -EINVAL;
- device_lock(dev);
+ rc = down_write_killable(&dax_region_rwsem);
+ if (rc)
+ return rc;
avail = dax_region_avail_size(dax_region);
if (avail == 0)
rc = -ENOSPC;
@@ -367,8 +394,9 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr,
.dax_region = dax_region,
.size = 0,
.id = -1,
+ .memmap_on_memory = false,
};
- struct dev_dax *dev_dax = devm_create_dev_dax(&data);
+ struct dev_dax *dev_dax = __devm_create_dev_dax(&data);
if (IS_ERR(dev_dax))
rc = PTR_ERR(dev_dax);
@@ -386,7 +414,7 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr,
rc = len;
}
}
- device_unlock(dev);
+ up_write(&dax_region_rwsem);
return rc;
}
@@ -416,7 +444,7 @@ static void trim_dev_dax_range(struct dev_dax *dev_dax)
struct range *range = &dev_dax->ranges[i].range;
struct dax_region *dax_region = dev_dax->region;
- device_lock_assert(dax_region->dev);
+ lockdep_assert_held_write(&dax_region_rwsem);
dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
(unsigned long long)range->start,
(unsigned long long)range->end);
@@ -440,10 +468,12 @@ static void unregister_dev_dax(void *dev)
dev_dbg(dev, "%s\n", __func__);
+ down_write(&dax_region_rwsem);
kill_dev_dax(dev_dax);
device_del(dev);
free_dev_dax_ranges(dev_dax);
put_device(dev);
+ up_write(&dax_region_rwsem);
}
static void dax_region_free(struct kref *kref)
@@ -462,11 +492,10 @@ static void dax_region_put(struct dax_region *dax_region)
/* a return value >= 0 indicates this invocation invalidated the id */
static int __free_dev_dax_id(struct dev_dax *dev_dax)
{
- struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
int rc = dev_dax->id;
- device_lock_assert(dev);
+ lockdep_assert_held_write(&dax_dev_rwsem);
if (!dev_dax->dyn_id || dev_dax->id < 0)
return -1;
@@ -479,12 +508,13 @@ static int __free_dev_dax_id(struct dev_dax *dev_dax)
static int free_dev_dax_id(struct dev_dax *dev_dax)
{
- struct device *dev = &dev_dax->dev;
int rc;
- device_lock(dev);
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc)
+ return rc;
rc = __free_dev_dax_id(dev_dax);
- device_unlock(dev);
+ up_write(&dax_dev_rwsem);
return rc;
}
@@ -521,6 +551,7 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
device_lock(dev);
device_lock(victim);
dev_dax = to_dev_dax(victim);
+ down_write(&dax_dev_rwsem);
if (victim->driver || dev_dax_size(dev_dax))
rc = -EBUSY;
else {
@@ -540,6 +571,7 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
} else
rc = -EBUSY;
}
+ up_write(&dax_dev_rwsem);
device_unlock(victim);
/* won the race to invalidate the device, clean it up */
@@ -662,12 +694,9 @@ static void unregister_dax_mapping(void *data)
struct device *dev = data;
struct dax_mapping *mapping = to_dax_mapping(dev);
struct dev_dax *dev_dax = to_dev_dax(dev->parent);
- struct dax_region *dax_region = dev_dax->region;
dev_dbg(dev, "%s\n", __func__);
- device_lock_assert(dax_region->dev);
-
dev_dax->ranges[mapping->range_id].mapping = NULL;
mapping->range_id = -1;
@@ -678,24 +707,22 @@ static struct dev_dax_range *get_dax_range(struct device *dev)
{
struct dax_mapping *mapping = to_dax_mapping(dev);
struct dev_dax *dev_dax = to_dev_dax(dev->parent);
- struct dax_region *dax_region = dev_dax->region;
+ int rc;
- device_lock(dax_region->dev);
+ rc = down_write_killable(&dax_region_rwsem);
+ if (rc)
+ return NULL;
if (mapping->range_id < 0) {
- device_unlock(dax_region->dev);
+ up_write(&dax_region_rwsem);
return NULL;
}
return &dev_dax->ranges[mapping->range_id];
}
-static void put_dax_range(struct dev_dax_range *dax_range)
+static void put_dax_range(void)
{
- struct dax_mapping *mapping = dax_range->mapping;
- struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
- struct dax_region *dax_region = dev_dax->region;
-
- device_unlock(dax_region->dev);
+ up_write(&dax_region_rwsem);
}
static ssize_t start_show(struct device *dev,
@@ -707,8 +734,8 @@ static ssize_t start_show(struct device *dev,
dax_range = get_dax_range(dev);
if (!dax_range)
return -ENXIO;
- rc = sprintf(buf, "%#llx\n", dax_range->range.start);
- put_dax_range(dax_range);
+ rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start);
+ put_dax_range();
return rc;
}
@@ -723,8 +750,8 @@ static ssize_t end_show(struct device *dev,
dax_range = get_dax_range(dev);
if (!dax_range)
return -ENXIO;
- rc = sprintf(buf, "%#llx\n", dax_range->range.end);
- put_dax_range(dax_range);
+ rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end);
+ put_dax_range();
return rc;
}
@@ -739,8 +766,8 @@ static ssize_t pgoff_show(struct device *dev,
dax_range = get_dax_range(dev);
if (!dax_range)
return -ENXIO;
- rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
- put_dax_range(dax_range);
+ rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff);
+ put_dax_range();
return rc;
}
@@ -762,7 +789,7 @@ static const struct attribute_group *dax_mapping_attribute_groups[] = {
NULL,
};
-static struct device_type dax_mapping_type = {
+static const struct device_type dax_mapping_type = {
.release = dax_mapping_release,
.groups = dax_mapping_attribute_groups,
};
@@ -774,7 +801,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
struct device *dev;
int rc;
- device_lock_assert(dax_region->dev);
+ lockdep_assert_held_write(&dax_region_rwsem);
if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
"region disabled\n"))
@@ -820,7 +847,7 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
struct resource *alloc;
int i, rc;
- device_lock_assert(dax_region->dev);
+ lockdep_assert_held_write(&dax_region_rwsem);
/* handle the seed alloc special case */
if (!size) {
@@ -874,13 +901,12 @@ static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, r
{
int last_range = dev_dax->nr_range - 1;
struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
- struct dax_region *dax_region = dev_dax->region;
bool is_shrink = resource_size(res) > size;
struct range *range = &dax_range->range;
struct device *dev = &dev_dax->dev;
int rc;
- device_lock_assert(dax_region->dev);
+ lockdep_assert_held_write(&dax_region_rwsem);
if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
return -EINVAL;
@@ -906,12 +932,15 @@ static ssize_t size_show(struct device *dev,
{
struct dev_dax *dev_dax = to_dev_dax(dev);
unsigned long long size;
+ int rc;
- device_lock(dev);
+ rc = down_read_interruptible(&dax_dev_rwsem);
+ if (rc)
+ return rc;
size = dev_dax_size(dev_dax);
- device_unlock(dev);
+ up_read(&dax_dev_rwsem);
- return sprintf(buf, "%llu\n", size);
+ return sysfs_emit(buf, "%llu\n", size);
}
static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
@@ -1079,17 +1108,27 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
}
- device_lock(dax_region->dev);
+ rc = down_write_killable(&dax_region_rwsem);
+ if (rc)
+ return rc;
if (!dax_region->dev->driver) {
- device_unlock(dax_region->dev);
- return -ENXIO;
+ rc = -ENXIO;
+ goto err_region;
}
- device_lock(dev);
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc)
+ goto err_dev;
+
rc = dev_dax_resize(dax_region, dev_dax, val);
- device_unlock(dev);
- device_unlock(dax_region->dev);
- return rc == 0 ? len : rc;
+err_dev:
+ up_write(&dax_dev_rwsem);
+err_region:
+ up_write(&dax_region_rwsem);
+
+ if (rc == 0)
+ return len;
+ return rc;
}
static DEVICE_ATTR_RW(size);
@@ -1136,19 +1175,24 @@ static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
if (rc)
return rc;
- rc = -ENXIO;
- device_lock(dax_region->dev);
+ rc = down_write_killable(&dax_region_rwsem);
+ if (rc)
+ return rc;
if (!dax_region->dev->driver) {
- device_unlock(dax_region->dev);
+ up_write(&dax_region_rwsem);
+ return rc;
+ }
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc) {
+ up_write(&dax_region_rwsem);
return rc;
}
- device_lock(dev);
to_alloc = range_len(&r);
if (alloc_is_aligned(dev_dax, to_alloc))
rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
- device_unlock(dev);
- device_unlock(dax_region->dev);
+ up_write(&dax_dev_rwsem);
+ up_write(&dax_region_rwsem);
return rc == 0 ? len : rc;
}
@@ -1159,7 +1203,7 @@ static ssize_t align_show(struct device *dev,
{
struct dev_dax *dev_dax = to_dev_dax(dev);
- return sprintf(buf, "%d\n", dev_dax->align);
+ return sysfs_emit(buf, "%d\n", dev_dax->align);
}
static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
@@ -1195,13 +1239,19 @@ static ssize_t align_store(struct device *dev, struct device_attribute *attr,
if (!dax_align_valid(val))
return -EINVAL;
- device_lock(dax_region->dev);
+ rc = down_write_killable(&dax_region_rwsem);
+ if (rc)
+ return rc;
if (!dax_region->dev->driver) {
- device_unlock(dax_region->dev);
+ up_write(&dax_region_rwsem);
return -ENXIO;
}
- device_lock(dev);
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc) {
+ up_write(&dax_region_rwsem);
+ return rc;
+ }
if (dev->driver) {
rc = -EBUSY;
goto out_unlock;
@@ -1213,8 +1263,8 @@ static ssize_t align_store(struct device *dev, struct device_attribute *attr,
if (rc)
dev_dax->align = align_save;
out_unlock:
- device_unlock(dev);
- device_unlock(dax_region->dev);
+ up_write(&dax_dev_rwsem);
+ up_write(&dax_region_rwsem);
return rc == 0 ? len : rc;
}
static DEVICE_ATTR_RW(align);
@@ -1231,7 +1281,7 @@ static ssize_t target_node_show(struct device *dev,
{
struct dev_dax *dev_dax = to_dev_dax(dev);
- return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
+ return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax));
}
static DEVICE_ATTR_RO(target_node);
@@ -1247,7 +1297,7 @@ static ssize_t resource_show(struct device *dev,
else
start = dev_dax->ranges[0].range.start;
- return sprintf(buf, "%#llx\n", start);
+ return sysfs_emit(buf, "%#llx\n", start);
}
static DEVICE_ATTR(resource, 0400, resource_show, NULL);
@@ -1258,17 +1308,59 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
* We only ever expect to handle device-dax instances, i.e. the
* @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
*/
- return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
+ return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
}
static DEVICE_ATTR_RO(modalias);
static ssize_t numa_node_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", dev_to_node(dev));
+ return sysfs_emit(buf, "%d\n", dev_to_node(dev));
}
static DEVICE_ATTR_RO(numa_node);
+static ssize_t memmap_on_memory_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
+}
+
+static ssize_t memmap_on_memory_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+ bool val;
+ int rc;
+
+ rc = kstrtobool(buf, &val);
+ if (rc)
+ return rc;
+
+ if (val == true && !mhp_supports_memmap_on_memory()) {
+ dev_dbg(dev, "memmap_on_memory is not available\n");
+ return -EOPNOTSUPP;
+ }
+
+ rc = down_write_killable(&dax_dev_rwsem);
+ if (rc)
+ return rc;
+
+ if (dev_dax->memmap_on_memory != val && dev->driver &&
+ to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
+ up_write(&dax_dev_rwsem);
+ return -EBUSY;
+ }
+
+ dev_dax->memmap_on_memory = val;
+ up_write(&dax_dev_rwsem);
+
+ return len;
+}
+static DEVICE_ATTR_RW(memmap_on_memory);
+
static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
@@ -1295,6 +1387,7 @@ static struct attribute *dev_dax_attributes[] = {
&dev_attr_align.attr,
&dev_attr_resource.attr,
&dev_attr_numa_node.attr,
+ &dev_attr_memmap_on_memory.attr,
NULL,
};
@@ -1324,7 +1417,7 @@ static const struct device_type dev_dax_type = {
.groups = dax_attribute_groups,
};
-struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
{
struct dax_region *dax_region = data->dax_region;
struct device *parent = dax_region->dev;
@@ -1400,6 +1493,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
dev_dax->align = dax_region->align;
ida_init(&dev_dax->ida);
+ dev_dax->memmap_on_memory = data->memmap_on_memory;
+
inode = dax_inode(dax_dev);
dev->devt = inode->i_rdev;
dev->bus = &dax_bus_type;
@@ -1437,6 +1532,17 @@ err_id:
return ERR_PTR(rc);
}
+
+struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+{
+ struct dev_dax *dev_dax;
+
+ down_write(&dax_region_rwsem);
+ dev_dax = __devm_create_dev_dax(data);
+ up_write(&dax_region_rwsem);
+
+ return dev_dax;
+}
EXPORT_SYMBOL_GPL(devm_create_dev_dax);
int __dax_driver_register(struct dax_device_driver *dax_drv,
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index 1ccd23360124..cbbf64443098 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -23,6 +23,7 @@ struct dev_dax_data {
struct dev_pagemap *pgmap;
resource_size_t size;
int id;
+ bool memmap_on_memory;
};
struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
index 8bc9d04034d6..13cd94d32ff7 100644
--- a/drivers/dax/cxl.c
+++ b/drivers/dax/cxl.c
@@ -26,6 +26,7 @@ static int cxl_dax_region_probe(struct device *dev)
.dax_region = dax_region,
.id = -1,
.size = range_len(&cxlr_dax->hpa_range),
+ .memmap_on_memory = true,
};
return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data));
@@ -42,6 +43,7 @@ static struct cxl_driver cxl_dax_region_driver = {
module_cxl_driver(cxl_dax_region_driver);
MODULE_ALIAS_CXL(CXL_DEVICE_DAX_REGION);
+MODULE_DESCRIPTION("CXL DAX: direct access to CXL regions");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Intel Corporation");
-MODULE_IMPORT_NS(CXL);
+MODULE_IMPORT_NS("CXL");
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index 27cf2daaaa79..0867115aeef2 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -40,6 +40,12 @@ struct dax_region {
struct device *youngest;
};
+/**
+ * struct dax_mapping - device to display mapping range attributes
+ * @dev: device representing this range
+ * @range_id: index within dev_dax ranges array
+ * @id: ida of this mapping
+ */
struct dax_mapping {
struct device dev;
int range_id;
@@ -47,6 +53,18 @@ struct dax_mapping {
};
/**
+ * struct dev_dax_range - tuple represenging a range of memory used by dev_dax
+ * @pgoff: page offset
+ * @range: resource-span
+ * @mapping: reference to the dax_mapping for this range
+ */
+struct dev_dax_range {
+ unsigned long pgoff;
+ struct range range;
+ struct dax_mapping *mapping;
+};
+
+/**
* struct dev_dax - instance data for a subdivision of a dax region, and
* data while the device is activated in the driver.
* @region - parent region
@@ -58,7 +76,7 @@ struct dax_mapping {
* @dev - device core
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
* @nr_range: size of @ranges
- * @ranges: resource-span + pgoff tuples for the instance
+ * @ranges: range tuples of memory used
*/
struct dev_dax {
struct dax_region *region;
@@ -70,12 +88,9 @@ struct dev_dax {
struct ida ida;
struct device dev;
struct dev_pagemap *pgmap;
+ bool memmap_on_memory;
int nr_range;
- struct dev_dax_range {
- unsigned long pgoff;
- struct range range;
- struct dax_mapping *mapping;
- } *ranges;
+ struct dev_dax_range *ranges;
};
/*
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 93ebedc5ec8c..22999a402e02 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -4,7 +4,6 @@
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
-#include <linux/pfn_t.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/dax.h>
@@ -14,8 +13,9 @@
#include "dax-private.h"
#include "bus.h"
-static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
- const char *func)
+static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
+ unsigned long start, unsigned long end, struct file *file,
+ const char *func)
{
struct device *dev = &dev_dax->dev;
unsigned long mask;
@@ -24,7 +24,7 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -ENXIO;
/* prevent private mappings from being established */
- if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
+ if ((vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
dev_info_ratelimited(dev,
"%s: %s: fail, attempted private mapping\n",
current->comm, func);
@@ -32,15 +32,15 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
}
mask = dev_dax->align - 1;
- if (vma->vm_start & mask || vma->vm_end & mask) {
+ if (start & mask || end & mask) {
dev_info_ratelimited(dev,
"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
- current->comm, func, vma->vm_start, vma->vm_end,
+ current->comm, func, start, end,
mask);
return -EINVAL;
}
- if (!vma_is_dax(vma)) {
+ if (!file_is_dax(file)) {
dev_info_ratelimited(dev,
"%s: %s: fail, vma is not DAX capable\n",
current->comm, func);
@@ -50,6 +50,13 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return 0;
}
+static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
+ const char *func)
+{
+ return __check_vma(dev_dax, vma->vm_flags, vma->vm_start, vma->vm_end,
+ vma->vm_file, func);
+}
+
/* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */
__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
@@ -73,7 +80,7 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
return -1;
}
-static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
+static void dax_set_mapping(struct vm_fault *vmf, unsigned long pfn,
unsigned long fault_size)
{
unsigned long i, nr_pages = fault_size / PAGE_SIZE;
@@ -86,17 +93,16 @@ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
nr_pages = 1;
pgoff = linear_page_index(vmf->vma,
- ALIGN(vmf->address, fault_size));
+ ALIGN_DOWN(vmf->address, fault_size));
for (i = 0; i < nr_pages; i++) {
- struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+ struct folio *folio = pfn_folio(pfn + i);
- page = compound_head(page);
- if (page->mapping)
+ if (folio->mapping)
continue;
- page->mapping = filp->f_mapping;
- page->index = pgoff + i;
+ folio->mapping = filp->f_mapping;
+ folio->index = pgoff + i;
}
}
@@ -105,7 +111,7 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
{
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
- pfn_t pfn;
+ unsigned long pfn;
unsigned int fault_size = PAGE_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
@@ -126,11 +132,12 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = PHYS_PFN(phys);
dax_set_mapping(vmf, pfn, fault_size);
- return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
+ return vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn),
+ vmf->flags & FAULT_FLAG_WRITE);
}
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
@@ -140,7 +147,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
pgoff_t pgoff;
- pfn_t pfn;
+ unsigned long pfn;
unsigned int fault_size = PMD_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
@@ -169,11 +176,12 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = PHYS_PFN(phys);
dax_set_mapping(vmf, pfn, fault_size);
- return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
+ return vmf_insert_folio_pmd(vmf, page_folio(pfn_to_page(pfn)),
+ vmf->flags & FAULT_FLAG_WRITE);
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
@@ -184,7 +192,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
pgoff_t pgoff;
- pfn_t pfn;
+ unsigned long pfn;
unsigned int fault_size = PUD_SIZE;
@@ -214,11 +222,12 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = PHYS_PFN(phys);
dax_set_mapping(vmf, pfn, fault_size);
- return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
+ return vmf_insert_folio_pud(vmf, page_folio(pfn_to_page(pfn)),
+ vmf->flags & FAULT_FLAG_WRITE);
}
#else
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
@@ -235,9 +244,9 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
int id;
struct dev_dax *dev_dax = filp->private_data;
- dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) order:%d\n", current->comm,
- (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
- vmf->vma->vm_start, vmf->vma->vm_end, order);
+ dev_dbg(&dev_dax->dev, "%s: op=%s addr=%#lx order=%d\n", current->comm,
+ (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
+ vmf->address & ~((1UL << (order + PAGE_SHIFT)) - 1), order);
id = dax_read_lock();
if (order == 0)
@@ -284,8 +293,9 @@ static const struct vm_operations_struct dax_vm_ops = {
.pagesize = dev_dax_pagesize,
};
-static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
+static int dax_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *filp = desc->file;
struct dev_dax *dev_dax = filp->private_data;
int rc, id;
@@ -296,13 +306,14 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
* fault time.
*/
id = dax_read_lock();
- rc = check_vma(dev_dax, vma, __func__);
+ rc = __check_vma(dev_dax, desc->vm_flags, desc->start, desc->end, filp,
+ __func__);
dax_read_unlock(id);
if (rc)
return rc;
- vma->vm_ops = &dax_vm_ops;
- vm_flags_set(vma, VM_HUGEPAGE);
+ desc->vm_ops = &dax_vm_ops;
+ desc->vm_flags |= VM_HUGEPAGE;
return 0;
}
@@ -329,14 +340,13 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
if ((off + len_align) < off)
goto out;
- addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
- pgoff, flags);
+ addr_align = mm_get_unmapped_area(filp, addr, len_align, pgoff, flags);
if (!IS_ERR_VALUE(addr_align)) {
addr_align += (off - addr_align) & (align - 1);
return addr_align;
}
out:
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(filp, addr, len, pgoff, flags);
}
static const struct address_space_operations dev_dax_aops = {
@@ -376,8 +386,8 @@ static const struct file_operations dax_fops = {
.open = dax_open,
.release = dax_release,
.get_unmapped_area = dax_get_unmapped_area,
- .mmap = dax_mmap,
- .mmap_supported_flags = MAP_SYNC,
+ .mmap_prepare = dax_mmap_prepare,
+ .fop_flags = FOP_MMAP_SYNC,
};
static void dev_dax_cdev_del(void *cdev)
@@ -482,6 +492,7 @@ static void __exit dax_exit(void)
}
MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Device DAX: direct access device driver");
MODULE_LICENSE("GPL v2");
module_init(dax_init);
module_exit(dax_exit);
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 5d2ddef0f8f5..c18451a37e4f 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -2,7 +2,6 @@
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/module.h>
-#include <linux/pfn_t.h>
#include <linux/dax.h>
#include "../bus.h"
@@ -36,6 +35,7 @@ static int dax_hmem_probe(struct platform_device *pdev)
.dax_region = dax_region,
.id = -1,
.size = region_idle ? 0 : range_len(&mri->range),
+ .memmap_on_memory = false,
};
return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data));
@@ -167,5 +167,6 @@ MODULE_SOFTDEP("pre: cxl_acpi");
MODULE_ALIAS("platform:hmem*");
MODULE_ALIAS("platform:hmem_platform*");
+MODULE_DESCRIPTION("HMEM DAX: direct access to 'specific purpose' memory");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index c57acb73e3db..c036e4d0b610 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -5,13 +5,14 @@
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/device.h>
-#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/memory-tiers.h>
+#include <linux/memory_hotplug.h>
+#include <linux/string_helpers.h>
#include "dax-private.h"
#include "bus.h"
@@ -49,14 +50,31 @@ struct dax_kmem_data {
struct resource *res[];
};
-static struct memory_dev_type *dax_slowmem_type;
+static DEFINE_MUTEX(kmem_memory_type_lock);
+static LIST_HEAD(kmem_memory_types);
+
+static struct memory_dev_type *kmem_find_alloc_memory_type(int adist)
+{
+ guard(mutex)(&kmem_memory_type_lock);
+ return mt_find_alloc_memory_type(adist, &kmem_memory_types);
+}
+
+static void kmem_put_memory_types(void)
+{
+ guard(mutex)(&kmem_memory_type_lock);
+ mt_put_memory_types(&kmem_memory_types);
+}
+
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
- unsigned long total_len = 0;
+ unsigned long total_len = 0, orig_len = 0;
struct dax_kmem_data *data;
+ struct memory_dev_type *mtype;
int i, rc, mapped = 0;
+ mhp_t mhp_flags;
int numa_node;
+ int adist = MEMTIER_DEFAULT_DAX_ADISTANCE;
/*
* Ensure good NUMA information for the persistent memory.
@@ -71,9 +89,15 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
return -EINVAL;
}
+ mt_calc_adistance(numa_node, &adist);
+ mtype = kmem_find_alloc_memory_type(adist);
+ if (IS_ERR(mtype))
+ return PTR_ERR(mtype);
+
for (i = 0; i < dev_dax->nr_range; i++) {
struct range range;
+ orig_len += range_len(&dev_dax->ranges[i].range);
rc = dax_kmem_range(dev_dax, i, &range);
if (rc) {
dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
@@ -86,9 +110,15 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
if (!total_len) {
dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
return -EINVAL;
+ } else if (total_len != orig_len) {
+ char buf[16];
+
+ string_get_size(orig_len - total_len, 1, STRING_UNITS_2,
+ buf, sizeof(buf));
+ dev_warn(dev, "DAX region truncated by %s due to alignment\n", buf);
}
- init_node_memory_type(numa_node, dax_slowmem_type);
+ init_node_memory_type(numa_node, mtype);
rc = -ENOMEM;
data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL);
@@ -136,12 +166,16 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
*/
res->flags = IORESOURCE_SYSTEM_RAM;
+ mhp_flags = MHP_NID_IS_MGID;
+ if (dev_dax->memmap_on_memory)
+ mhp_flags |= MHP_MEMMAP_ON_MEMORY;
+
/*
* Ensure that future kexec'd kernels will not treat
* this as RAM automatically.
*/
rc = add_memory_driver_managed(data->mgid, range.start,
- range_len(&range), kmem_name, MHP_NID_IS_MGID);
+ range_len(&range), kmem_name, mhp_flags);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
@@ -167,7 +201,7 @@ err_reg_mgid:
err_res_name:
kfree(data);
err_dax_kmem_data:
- clear_node_memory_type(numa_node, dax_slowmem_type);
+ clear_node_memory_type(numa_node, mtype);
return rc;
}
@@ -219,7 +253,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
* for that. This implies this reference will be around
* till next reboot.
*/
- clear_node_memory_type(node, dax_slowmem_type);
+ clear_node_memory_type(node, NULL);
}
}
#else
@@ -251,12 +285,6 @@ static int __init dax_kmem_init(void)
if (!kmem_name)
return -ENOMEM;
- dax_slowmem_type = alloc_memory_type(MEMTIER_DEFAULT_DAX_ADISTANCE);
- if (IS_ERR(dax_slowmem_type)) {
- rc = PTR_ERR(dax_slowmem_type);
- goto err_dax_slowmem_type;
- }
-
rc = dax_driver_register(&device_dax_kmem_driver);
if (rc)
goto error_dax_driver;
@@ -264,8 +292,7 @@ static int __init dax_kmem_init(void)
return rc;
error_dax_driver:
- put_memory_type(dax_slowmem_type);
-err_dax_slowmem_type:
+ kmem_put_memory_types();
kfree_const(kmem_name);
return rc;
}
@@ -275,10 +302,11 @@ static void __exit dax_kmem_exit(void)
dax_driver_unregister(&device_dax_kmem_driver);
if (!any_hotremove_failed)
kfree_const(kmem_name);
- put_memory_type(dax_slowmem_type);
+ kmem_put_memory_types();
}
MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("KMEM DAX: map dax-devices as System-RAM");
MODULE_LICENSE("GPL v2");
module_init(dax_kmem_init);
module_exit(dax_kmem_exit);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index ae0cb113a5d3..bee93066a849 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -2,7 +2,6 @@
/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
#include <linux/memremap.h>
#include <linux/module.h>
-#include <linux/pfn_t.h>
#include "../nvdimm/pfn.h"
#include "../nvdimm/nd.h"
#include "bus.h"
@@ -63,6 +62,7 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev)
.id = id,
.pgmap = &pgmap,
.size = range_len(&range),
+ .memmap_on_memory = false,
};
return devm_create_dev_dax(&data);
@@ -93,6 +93,7 @@ static void __exit dax_pmem_exit(void)
}
module_exit(dax_pmem_exit);
+MODULE_DESCRIPTION("PMEM DAX: direct access to persistent memory");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
diff --git a/drivers/dax/pmem/Makefile b/drivers/dax/pmem/Makefile
deleted file mode 100644
index 191c31f0d4f0..000000000000
--- a/drivers/dax/pmem/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
-obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o
-
-dax_pmem-y := pmem.o
-dax_pmem_core-y := core.o
-dax_pmem_compat-y := compat.o
diff --git a/drivers/dax/pmem/pmem.c b/drivers/dax/pmem/pmem.c
deleted file mode 100644
index dfe91a2990fe..000000000000
--- a/drivers/dax/pmem/pmem.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2016 - 2018 Intel Corporation. All rights reserved. */
-#include <linux/percpu-refcount.h>
-#include <linux/memremap.h>
-#include <linux/module.h>
-#include <linux/pfn_t.h>
-#include <linux/nd.h>
-#include "../bus.h"
-
-
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 0da9232ea175..c00b9dff4a06 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -7,12 +7,12 @@
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/magic.h>
-#include <linux/pfn_t.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include <linux/dax.h>
#include <linux/fs.h>
+#include <linux/cacheinfo.h>
#include "dax-private.h"
/**
@@ -147,7 +147,7 @@ enum dax_device_flags {
* pages accessible at the device relative @pgoff.
*/
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
- enum dax_access_mode mode, void **kaddr, pfn_t *pfn)
+ enum dax_access_mode mode, void **kaddr, unsigned long *pfn)
{
long avail;
@@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(dax_alive);
* that any fault handlers or operations that might have seen
* dax_alive(), have completed. Any operations that start after
* synchronize_srcu() has run will abort upon seeing !dax_alive().
+ *
+ * Note, because alloc_dax() returns an ERR_PTR() on error, callers
+ * typically store its result into a local variable in order to check
+ * the result. Therefore, care must be taken to populate the struct
+ * device dax_dev field make sure the dax_dev is not leaked.
*/
void kill_dax(struct dax_device *dax_dev)
{
@@ -326,7 +331,8 @@ void kill_dax(struct dax_device *dax_dev)
return;
if (dax_dev->holder_data != NULL)
- dax_holder_notify_failure(dax_dev, 0, U64_MAX, 0);
+ dax_holder_notify_failure(dax_dev, 0, U64_MAX,
+ MF_MEM_PRE_REMOVE);
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu);
@@ -382,7 +388,7 @@ static const struct super_operations dax_sops = {
.alloc_inode = dax_alloc_inode,
.destroy_inode = dax_destroy_inode,
.free_inode = dax_free_inode,
- .drop_inode = generic_delete_inode,
+ .drop_inode = inode_just_drop,
};
static int dax_init_fs_context(struct fs_context *fc)
@@ -427,7 +433,7 @@ static struct dax_device *dax_dev_get(dev_t devt)
return NULL;
dax_dev = to_dax_dev(inode);
- if (inode->i_state & I_NEW) {
+ if (inode_state_read_once(inode) & I_NEW) {
set_bit(DAXDEV_ALIVE, &dax_dev->flags);
inode->i_cdev = &dax_dev->cdev;
inode->i_mode = S_IFCHR;
@@ -445,6 +451,14 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
dev_t devt;
int minor;
+ /*
+ * Unavailable on architectures with virtually aliased data caches,
+ * except for device-dax (NULL operations pointer), which does
+ * not use aliased mappings from the kernel.
+ */
+ if (ops && cpu_dcache_is_aliasing())
+ return ERR_PTR(-EOPNOTSUPP);
+
if (WARN_ON_ONCE(ops && !ops->zero_page_range))
return ERR_PTR(-EINVAL);
@@ -531,8 +545,7 @@ static int dax_fs_init(void)
int rc;
dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
- (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
init_once);
if (!dax_cache)
return -ENOMEM;
@@ -592,6 +605,7 @@ static void __exit dax_core_exit(void)
}
MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("DAX: direct access to differentiated memory");
MODULE_LICENSE("GPL v2");
subsys_initcall(dax_core_init);
module_exit(dax_core_exit);