diff options
Diffstat (limited to 'drivers/dax/device.c')
| -rw-r--r-- | drivers/dax/device.c | 208 |
1 files changed, 116 insertions, 92 deletions
diff --git a/drivers/dax/device.c b/drivers/dax/device.c index dd8222a42808..22999a402e02 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -4,7 +4,6 @@ #include <linux/pagemap.h> #include <linux/module.h> #include <linux/device.h> -#include <linux/pfn_t.h> #include <linux/cdev.h> #include <linux/slab.h> #include <linux/dax.h> @@ -14,8 +13,9 @@ #include "dax-private.h" #include "bus.h" -static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, - const char *func) +static int __check_vma(struct dev_dax *dev_dax, vm_flags_t vm_flags, + unsigned long start, unsigned long end, struct file *file, + const char *func) { struct device *dev = &dev_dax->dev; unsigned long mask; @@ -24,7 +24,7 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, return -ENXIO; /* prevent private mappings from being established */ - if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { + if ((vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { dev_info_ratelimited(dev, "%s: %s: fail, attempted private mapping\n", current->comm, func); @@ -32,15 +32,15 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, } mask = dev_dax->align - 1; - if (vma->vm_start & mask || vma->vm_end & mask) { + if (start & mask || end & mask) { dev_info_ratelimited(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", - current->comm, func, vma->vm_start, vma->vm_end, + current->comm, func, start, end, mask); return -EINVAL; } - if (!vma_is_dax(vma)) { + if (!file_is_dax(file)) { dev_info_ratelimited(dev, "%s: %s: fail, vma is not DAX capable\n", current->comm, func); @@ -50,6 +50,13 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, return 0; } +static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, + const char *func) +{ + return __check_vma(dev_dax, vma->vm_flags, vma->vm_start, vma->vm_end, + vma->vm_file, func); +} + /* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) @@ -73,11 +80,38 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, return -1; } +static void dax_set_mapping(struct vm_fault *vmf, unsigned long pfn, + unsigned long fault_size) +{ + unsigned long i, nr_pages = fault_size / PAGE_SIZE; + struct file *filp = vmf->vma->vm_file; + struct dev_dax *dev_dax = filp->private_data; + pgoff_t pgoff; + + /* mapping is only set on the head */ + if (dev_dax->pgmap->vmemmap_shift) + nr_pages = 1; + + pgoff = linear_page_index(vmf->vma, + ALIGN_DOWN(vmf->address, fault_size)); + + for (i = 0; i < nr_pages; i++) { + struct folio *folio = pfn_folio(pfn + i); + + if (folio->mapping) + continue; + + folio->mapping = filp->f_mapping; + folio->index = pgoff + i; + } +} + static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { struct device *dev = &dev_dax->dev; phys_addr_t phys; + unsigned long pfn; unsigned int fault_size = PAGE_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -98,18 +132,22 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = PHYS_PFN(phys); - return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); + dax_set_mapping(vmf, pfn, fault_size); + + return vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn), + vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { unsigned long pmd_addr = vmf->address & PMD_MASK; struct device *dev = &dev_dax->dev; phys_addr_t phys; pgoff_t pgoff; + unsigned long pfn; unsigned int fault_size = PMD_SIZE; if (check_vma(dev_dax, vmf->vma, __func__)) @@ -138,19 +176,23 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = PHYS_PFN(phys); + + dax_set_mapping(vmf, pfn, fault_size); - return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_folio_pmd(vmf, page_folio(pfn_to_page(pfn)), + vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { unsigned long pud_addr = vmf->address & PUD_MASK; struct device *dev = &dev_dax->dev; phys_addr_t phys; pgoff_t pgoff; + unsigned long pfn; unsigned int fault_size = PUD_SIZE; @@ -180,72 +222,42 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); + pfn = PHYS_PFN(phys); + + dax_set_mapping(vmf, pfn, fault_size); - return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); + return vmf_insert_folio_pud(vmf, page_folio(pfn_to_page(pfn)), + vmf->flags & FAULT_FLAG_WRITE); } #else static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf, pfn_t *pfn) + struct vm_fault *vmf) { return VM_FAULT_FALLBACK; } #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, - enum page_entry_size pe_size) +static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order) { struct file *filp = vmf->vma->vm_file; - unsigned long fault_size; vm_fault_t rc = VM_FAULT_SIGBUS; int id; - pfn_t pfn; struct dev_dax *dev_dax = filp->private_data; - dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, - (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", - vmf->vma->vm_start, vmf->vma->vm_end, pe_size); + dev_dbg(&dev_dax->dev, "%s: op=%s addr=%#lx order=%d\n", current->comm, + (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", + vmf->address & ~((1UL << (order + PAGE_SHIFT)) - 1), order); id = dax_read_lock(); - switch (pe_size) { - case PE_SIZE_PTE: - fault_size = PAGE_SIZE; - rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); - break; - case PE_SIZE_PMD: - fault_size = PMD_SIZE; - rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); - break; - case PE_SIZE_PUD: - fault_size = PUD_SIZE; - rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); - break; - default: + if (order == 0) + rc = __dev_dax_pte_fault(dev_dax, vmf); + else if (order == PMD_ORDER) + rc = __dev_dax_pmd_fault(dev_dax, vmf); + else if (order == PUD_ORDER) + rc = __dev_dax_pud_fault(dev_dax, vmf); + else rc = VM_FAULT_SIGBUS; - } - if (rc == VM_FAULT_NOPAGE) { - unsigned long i; - pgoff_t pgoff; - - /* - * In the device-dax case the only possibility for a - * VM_FAULT_NOPAGE result is when device-dax capacity is - * mapped. No need to consider the zero page, or racing - * conflicting mappings. - */ - pgoff = linear_page_index(vmf->vma, vmf->address - & ~(fault_size - 1)); - for (i = 0; i < fault_size / PAGE_SIZE; i++) { - struct page *page; - - page = pfn_to_page(pfn_t_to_pfn(pfn) + i); - if (page->mapping) - continue; - page->mapping = filp->f_mapping; - page->index = pgoff + i; - } - } dax_read_unlock(id); return rc; @@ -253,7 +265,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, static vm_fault_t dev_dax_fault(struct vm_fault *vmf) { - return dev_dax_huge_fault(vmf, PE_SIZE_PTE); + return dev_dax_huge_fault(vmf, 0); } static int dev_dax_may_split(struct vm_area_struct *vma, unsigned long addr) @@ -281,8 +293,9 @@ static const struct vm_operations_struct dax_vm_ops = { .pagesize = dev_dax_pagesize, }; -static int dax_mmap(struct file *filp, struct vm_area_struct *vma) +static int dax_mmap_prepare(struct vm_area_desc *desc) { + struct file *filp = desc->file; struct dev_dax *dev_dax = filp->private_data; int rc, id; @@ -293,13 +306,14 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma) * fault time. */ id = dax_read_lock(); - rc = check_vma(dev_dax, vma, __func__); + rc = __check_vma(dev_dax, desc->vm_flags, desc->start, desc->end, filp, + __func__); dax_read_unlock(id); if (rc) return rc; - vma->vm_ops = &dax_vm_ops; - vma->vm_flags |= VM_HUGEPAGE; + desc->vm_ops = &dax_vm_ops; + desc->vm_flags |= VM_HUGEPAGE; return 0; } @@ -326,19 +340,17 @@ static unsigned long dax_get_unmapped_area(struct file *filp, if ((off + len_align) < off) goto out; - addr_align = current->mm->get_unmapped_area(filp, addr, len_align, - pgoff, flags); + addr_align = mm_get_unmapped_area(filp, addr, len_align, pgoff, flags); if (!IS_ERR_VALUE(addr_align)) { addr_align += (off - addr_align) & (align - 1); return addr_align; } out: - return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); + return mm_get_unmapped_area(filp, addr, len, pgoff, flags); } static const struct address_space_operations dev_dax_aops = { - .set_page_dirty = __set_page_dirty_no_writeback, - .invalidatepage = noop_invalidatepage, + .dirty_folio = noop_dirty_folio, }; static int dax_open(struct inode *inode, struct file *filp) @@ -374,8 +386,8 @@ static const struct file_operations dax_fops = { .open = dax_open, .release = dax_release, .get_unmapped_area = dax_get_unmapped_area, - .mmap = dax_mmap, - .mmap_supported_flags = MAP_SYNC, + .mmap_prepare = dax_mmap_prepare, + .fop_flags = FOP_MMAP_SYNC, }; static void dev_dax_cdev_del(void *cdev) @@ -388,7 +400,7 @@ static void dev_dax_kill(void *dev_dax) kill_dev_dax(dev_dax); } -int dev_dax_probe(struct dev_dax *dev_dax) +static int dev_dax_probe(struct dev_dax *dev_dax) { struct dax_device *dax_dev = dev_dax->dax_dev; struct device *dev = &dev_dax->dev; @@ -398,17 +410,34 @@ int dev_dax_probe(struct dev_dax *dev_dax) void *addr; int rc, i; - pgmap = dev_dax->pgmap; - if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1, - "static pgmap / multi-range device conflict\n")) - return -EINVAL; + if (static_dev_dax(dev_dax)) { + if (dev_dax->nr_range > 1) { + dev_warn(dev, + "static pgmap / multi-range device conflict\n"); + return -EINVAL; + } - if (!pgmap) { - pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range) - * (dev_dax->nr_range - 1), GFP_KERNEL); + pgmap = dev_dax->pgmap; + } else { + if (dev_dax->pgmap) { + dev_warn(dev, + "dynamic-dax with pre-populated page map\n"); + return -EINVAL; + } + + pgmap = devm_kzalloc(dev, + struct_size(pgmap, ranges, dev_dax->nr_range - 1), + GFP_KERNEL); if (!pgmap) return -ENOMEM; + pgmap->nr_range = dev_dax->nr_range; + dev_dax->pgmap = pgmap; + + for (i = 0; i < dev_dax->nr_range; i++) { + struct range *range = &dev_dax->ranges[i].range; + pgmap->ranges[i] = *range; + } } for (i = 0; i < dev_dax->nr_range; i++) { @@ -420,12 +449,12 @@ int dev_dax_probe(struct dev_dax *dev_dax) i, range->start, range->end); return -EBUSY; } - /* don't update the range for static pgmap */ - if (!dev_dax->pgmap) - pgmap->ranges[i] = *range; } pgmap->type = MEMORY_DEVICE_GENERIC; + if (dev_dax->align > PAGE_SIZE) + pgmap->vmemmap_shift = + order_base_2(dev_dax->align >> PAGE_SHIFT); addr = devm_memremap_pages(dev, pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); @@ -433,11 +462,7 @@ int dev_dax_probe(struct dev_dax *dev_dax) inode = dax_inode(dax_dev); cdev = inode->i_cdev; cdev_init(cdev, &dax_fops); - if (dev->class) { - /* for the CONFIG_DEV_DAX_PMEM_COMPAT case */ - cdev->owner = dev->parent->driver->owner; - } else - cdev->owner = dev->driver->owner; + cdev->owner = dev->driver->owner; cdev_set_parent(cdev, &dev->kobj); rc = cdev_add(cdev, dev->devt, 1); if (rc) @@ -450,12 +475,10 @@ int dev_dax_probe(struct dev_dax *dev_dax) run_dax(dax_dev); return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax); } -EXPORT_SYMBOL_GPL(dev_dax_probe); static struct dax_device_driver device_dax_driver = { .probe = dev_dax_probe, - /* all probe actions are unwound by devm, so .remove isn't necessary */ - .match_always = 1, + .type = DAXDRV_DEVICE_TYPE, }; static int __init dax_init(void) @@ -469,6 +492,7 @@ static void __exit dax_exit(void) } MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Device DAX: direct access device driver"); MODULE_LICENSE("GPL v2"); module_init(dax_init); module_exit(dax_exit); |
