summaryrefslogtreecommitdiff
path: root/drivers/dax/device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dax/device.c')
-rw-r--r--drivers/dax/device.c208
1 files changed, 116 insertions, 92 deletions
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index dd8222a42808..22999a402e02 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -4,7 +4,6 @@
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
-#include <linux/pfn_t.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/dax.h>
@@ -14,8 +13,9 @@
#include "dax-private.h"
#include "bus.h"
-static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
- const char *func)
+static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags,
+ unsigned long start, unsigned long end, struct file *file,
+ const char *func)
{
struct device *dev = &dev_dax->dev;
unsigned long mask;
@@ -24,7 +24,7 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -ENXIO;
/* prevent private mappings from being established */
- if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
+ if ((vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
dev_info_ratelimited(dev,
"%s: %s: fail, attempted private mapping\n",
current->comm, func);
@@ -32,15 +32,15 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
}
mask = dev_dax->align - 1;
- if (vma->vm_start & mask || vma->vm_end & mask) {
+ if (start & mask || end & mask) {
dev_info_ratelimited(dev,
"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
- current->comm, func, vma->vm_start, vma->vm_end,
+ current->comm, func, start, end,
mask);
return -EINVAL;
}
- if (!vma_is_dax(vma)) {
+ if (!file_is_dax(file)) {
dev_info_ratelimited(dev,
"%s: %s: fail, vma is not DAX capable\n",
current->comm, func);
@@ -50,6 +50,13 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return 0;
}
+static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
+ const char *func)
+{
+ return __check_vma(dev_dax, vma->vm_flags, vma->vm_start, vma->vm_end,
+ vma->vm_file, func);
+}
+
/* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */
__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
@@ -73,11 +80,38 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
return -1;
}
+static void dax_set_mapping(struct vm_fault *vmf, unsigned long pfn,
+ unsigned long fault_size)
+{
+ unsigned long i, nr_pages = fault_size / PAGE_SIZE;
+ struct file *filp = vmf->vma->vm_file;
+ struct dev_dax *dev_dax = filp->private_data;
+ pgoff_t pgoff;
+
+ /* mapping is only set on the head */
+ if (dev_dax->pgmap->vmemmap_shift)
+ nr_pages = 1;
+
+ pgoff = linear_page_index(vmf->vma,
+ ALIGN_DOWN(vmf->address, fault_size));
+
+ for (i = 0; i < nr_pages; i++) {
+ struct folio *folio = pfn_folio(pfn + i);
+
+ if (folio->mapping)
+ continue;
+
+ folio->mapping = filp->f_mapping;
+ folio->index = pgoff + i;
+ }
+}
+
static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
+ unsigned long pfn;
unsigned int fault_size = PAGE_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
@@ -98,18 +132,22 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = PHYS_PFN(phys);
- return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+ dax_set_mapping(vmf, pfn, fault_size);
+
+ return vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn),
+ vmf->flags & FAULT_FLAG_WRITE);
}
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
pgoff_t pgoff;
+ unsigned long pfn;
unsigned int fault_size = PMD_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__))
@@ -138,19 +176,23 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = PHYS_PFN(phys);
+
+ dax_set_mapping(vmf, pfn, fault_size);
- return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+ return vmf_insert_folio_pmd(vmf, page_folio(pfn_to_page(pfn)),
+ vmf->flags & FAULT_FLAG_WRITE);
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
unsigned long pud_addr = vmf->address & PUD_MASK;
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
pgoff_t pgoff;
+ unsigned long pfn;
unsigned int fault_size = PUD_SIZE;
@@ -180,72 +222,42 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS;
}
- *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+ pfn = PHYS_PFN(phys);
+
+ dax_set_mapping(vmf, pfn, fault_size);
- return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+ return vmf_insert_folio_pud(vmf, page_folio(pfn_to_page(pfn)),
+ vmf->flags & FAULT_FLAG_WRITE);
}
#else
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
- struct vm_fault *vmf, pfn_t *pfn)
+ struct vm_fault *vmf)
{
return VM_FAULT_FALLBACK;
}
#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
- enum page_entry_size pe_size)
+static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
{
struct file *filp = vmf->vma->vm_file;
- unsigned long fault_size;
vm_fault_t rc = VM_FAULT_SIGBUS;
int id;
- pfn_t pfn;
struct dev_dax *dev_dax = filp->private_data;
- dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
- (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
- vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
+ dev_dbg(&dev_dax->dev, "%s: op=%s addr=%#lx order=%d\n", current->comm,
+ (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
+ vmf->address & ~((1UL << (order + PAGE_SHIFT)) - 1), order);
id = dax_read_lock();
- switch (pe_size) {
- case PE_SIZE_PTE:
- fault_size = PAGE_SIZE;
- rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
- break;
- case PE_SIZE_PMD:
- fault_size = PMD_SIZE;
- rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
- break;
- case PE_SIZE_PUD:
- fault_size = PUD_SIZE;
- rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
- break;
- default:
+ if (order == 0)
+ rc = __dev_dax_pte_fault(dev_dax, vmf);
+ else if (order == PMD_ORDER)
+ rc = __dev_dax_pmd_fault(dev_dax, vmf);
+ else if (order == PUD_ORDER)
+ rc = __dev_dax_pud_fault(dev_dax, vmf);
+ else
rc = VM_FAULT_SIGBUS;
- }
- if (rc == VM_FAULT_NOPAGE) {
- unsigned long i;
- pgoff_t pgoff;
-
- /*
- * In the device-dax case the only possibility for a
- * VM_FAULT_NOPAGE result is when device-dax capacity is
- * mapped. No need to consider the zero page, or racing
- * conflicting mappings.
- */
- pgoff = linear_page_index(vmf->vma, vmf->address
- & ~(fault_size - 1));
- for (i = 0; i < fault_size / PAGE_SIZE; i++) {
- struct page *page;
-
- page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
- if (page->mapping)
- continue;
- page->mapping = filp->f_mapping;
- page->index = pgoff + i;
- }
- }
dax_read_unlock(id);
return rc;
@@ -253,7 +265,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
{
- return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
+ return dev_dax_huge_fault(vmf, 0);
}
static int dev_dax_may_split(struct vm_area_struct *vma, unsigned long addr)
@@ -281,8 +293,9 @@ static const struct vm_operations_struct dax_vm_ops = {
.pagesize = dev_dax_pagesize,
};
-static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
+static int dax_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *filp = desc->file;
struct dev_dax *dev_dax = filp->private_data;
int rc, id;
@@ -293,13 +306,14 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
* fault time.
*/
id = dax_read_lock();
- rc = check_vma(dev_dax, vma, __func__);
+ rc = __check_vma(dev_dax, desc->vm_flags, desc->start, desc->end, filp,
+ __func__);
dax_read_unlock(id);
if (rc)
return rc;
- vma->vm_ops = &dax_vm_ops;
- vma->vm_flags |= VM_HUGEPAGE;
+ desc->vm_ops = &dax_vm_ops;
+ desc->vm_flags |= VM_HUGEPAGE;
return 0;
}
@@ -326,19 +340,17 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
if ((off + len_align) < off)
goto out;
- addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
- pgoff, flags);
+ addr_align = mm_get_unmapped_area(filp, addr, len_align, pgoff, flags);
if (!IS_ERR_VALUE(addr_align)) {
addr_align += (off - addr_align) & (align - 1);
return addr_align;
}
out:
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(filp, addr, len, pgoff, flags);
}
static const struct address_space_operations dev_dax_aops = {
- .set_page_dirty = __set_page_dirty_no_writeback,
- .invalidatepage = noop_invalidatepage,
+ .dirty_folio = noop_dirty_folio,
};
static int dax_open(struct inode *inode, struct file *filp)
@@ -374,8 +386,8 @@ static const struct file_operations dax_fops = {
.open = dax_open,
.release = dax_release,
.get_unmapped_area = dax_get_unmapped_area,
- .mmap = dax_mmap,
- .mmap_supported_flags = MAP_SYNC,
+ .mmap_prepare = dax_mmap_prepare,
+ .fop_flags = FOP_MMAP_SYNC,
};
static void dev_dax_cdev_del(void *cdev)
@@ -388,7 +400,7 @@ static void dev_dax_kill(void *dev_dax)
kill_dev_dax(dev_dax);
}
-int dev_dax_probe(struct dev_dax *dev_dax)
+static int dev_dax_probe(struct dev_dax *dev_dax)
{
struct dax_device *dax_dev = dev_dax->dax_dev;
struct device *dev = &dev_dax->dev;
@@ -398,17 +410,34 @@ int dev_dax_probe(struct dev_dax *dev_dax)
void *addr;
int rc, i;
- pgmap = dev_dax->pgmap;
- if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
- "static pgmap / multi-range device conflict\n"))
- return -EINVAL;
+ if (static_dev_dax(dev_dax)) {
+ if (dev_dax->nr_range > 1) {
+ dev_warn(dev,
+ "static pgmap / multi-range device conflict\n");
+ return -EINVAL;
+ }
- if (!pgmap) {
- pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
- * (dev_dax->nr_range - 1), GFP_KERNEL);
+ pgmap = dev_dax->pgmap;
+ } else {
+ if (dev_dax->pgmap) {
+ dev_warn(dev,
+ "dynamic-dax with pre-populated page map\n");
+ return -EINVAL;
+ }
+
+ pgmap = devm_kzalloc(dev,
+ struct_size(pgmap, ranges, dev_dax->nr_range - 1),
+ GFP_KERNEL);
if (!pgmap)
return -ENOMEM;
+
pgmap->nr_range = dev_dax->nr_range;
+ dev_dax->pgmap = pgmap;
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct range *range = &dev_dax->ranges[i].range;
+ pgmap->ranges[i] = *range;
+ }
}
for (i = 0; i < dev_dax->nr_range; i++) {
@@ -420,12 +449,12 @@ int dev_dax_probe(struct dev_dax *dev_dax)
i, range->start, range->end);
return -EBUSY;
}
- /* don't update the range for static pgmap */
- if (!dev_dax->pgmap)
- pgmap->ranges[i] = *range;
}
pgmap->type = MEMORY_DEVICE_GENERIC;
+ if (dev_dax->align > PAGE_SIZE)
+ pgmap->vmemmap_shift =
+ order_base_2(dev_dax->align >> PAGE_SHIFT);
addr = devm_memremap_pages(dev, pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
@@ -433,11 +462,7 @@ int dev_dax_probe(struct dev_dax *dev_dax)
inode = dax_inode(dax_dev);
cdev = inode->i_cdev;
cdev_init(cdev, &dax_fops);
- if (dev->class) {
- /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */
- cdev->owner = dev->parent->driver->owner;
- } else
- cdev->owner = dev->driver->owner;
+ cdev->owner = dev->driver->owner;
cdev_set_parent(cdev, &dev->kobj);
rc = cdev_add(cdev, dev->devt, 1);
if (rc)
@@ -450,12 +475,10 @@ int dev_dax_probe(struct dev_dax *dev_dax)
run_dax(dax_dev);
return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax);
}
-EXPORT_SYMBOL_GPL(dev_dax_probe);
static struct dax_device_driver device_dax_driver = {
.probe = dev_dax_probe,
- /* all probe actions are unwound by devm, so .remove isn't necessary */
- .match_always = 1,
+ .type = DAXDRV_DEVICE_TYPE,
};
static int __init dax_init(void)
@@ -469,6 +492,7 @@ static void __exit dax_exit(void)
}
MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Device DAX: direct access device driver");
MODULE_LICENSE("GPL v2");
module_init(dax_init);
module_exit(dax_exit);