summaryrefslogtreecommitdiff
path: root/drivers/pci/p2pdma.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 10:43:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 10:43:59 -0800
commitce8a79d5601aab94c02ed4539c48e8605422ac94 (patch)
tree7830a97a475d57284640c8e2d3516521722708b6 /drivers/pci/p2pdma.c
parent96f7e448b9f4546ffd0356ffceb2b9586777f316 (diff)
parentf596da3efaf4130ff61cd029558845808df9bf99 (diff)
Merge tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - Support some passthrough commands without CAP_SYS_ADMIN (Kanchan Joshi) - Refactor PCIe probing and reset (Christoph Hellwig) - Various fabrics authentication fixes and improvements (Sagi Grimberg) - Avoid fallback to sequential scan due to transient issues (Uday Shankar) - Implement support for the DEAC bit in Write Zeroes (Christoph Hellwig) - Allow overriding the IEEE OUI and firmware revision in configfs for nvmet (Aleksandr Miloserdov) - Force reconnect when number of queue changes in nvmet (Daniel Wagner) - Minor fixes and improvements (Uros Bizjak, Joel Granados, Sagi Grimberg, Christoph Hellwig, Christophe JAILLET) - Fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni) - Use the common tagset helpers in nvme-pci driver (Christoph Hellwig) - Cleanup the nvme-pci removal path (Christoph Hellwig) - Use kstrtobool() instead of strtobool (Christophe JAILLET) - Allow unprivileged passthrough of Identify Controller (Joel Granados) - Support io stats on the mpath device (Sagi Grimberg) - Minor nvmet cleanup (Sagi Grimberg) - MD pull requests via Song: - Code cleanups (Christoph) - Various fixes - Floppy pull request from Denis: - Fix a memory leak in the init error path (Yuan) - Series fixing some batch wakeup issues with sbitmap (Gabriel) - Removal of the pktcdvd driver that was deprecated more than 5 years ago, and subsequent removal of the devnode callback in struct block_device_operations as no users are now left (Greg) - Fix for partition read on an exclusively opened bdev (Jan) - Series of elevator API cleanups (Jinlong, Christoph) - Series of fixes and cleanups for blk-iocost (Kemeng) - Series of fixes and cleanups for blk-throttle (Kemeng) - Series adding concurrent support for sync queues in BFQ (Yu) - Series bringing drbd a bit closer to the out-of-tree maintained version (Christian, Joel, Lars, Philipp) - Misc drbd fixes (Wang) - blk-wbt fixes and tweaks for enable/disable (Yu) - Fixes for mq-deadline for zoned devices (Damien) - Add support for read-only and offline zones for null_blk (Shin'ichiro) - Series fixing the delayed holder tracking, as used by DM (Yu, Christoph) - Series enabling bio alloc caching for IRQ based IO (Pavel) - Series enabling userspace peer-to-peer DMA (Logan) - BFQ waker fixes (Khazhismel) - Series fixing elevator refcount issues (Christoph, Jinlong) - Series cleaning up references around queue destruction (Christoph) - Series doing quiesce by tagset, enabling cleanups in drivers (Christoph, Chao) - Series untangling the queue kobject and queue references (Christoph) - Misc fixes and cleanups (Bart, David, Dawei, Jinlong, Kemeng, Ye, Yang, Waiman, Shin'ichiro, Randy, Pankaj, Christoph) * tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux: (247 commits) blktrace: Fix output non-blktrace event when blk_classic option enabled block: sed-opal: Don't include <linux/kernel.h> sed-opal: allow using IOC_OPAL_SAVE for locking too blk-cgroup: Fix typo in comment block: remove bio_set_op_attrs nvmet: don't open-code NVME_NS_ATTR_RO enumeration nvme-pci: use the tagset alloc/free helpers nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers nvme: consolidate setting the tagset flags nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set block: bio_copy_data_iter nvme-pci: split out a nvme_pci_ctrl_is_dead helper nvme-pci: return early on ctrl state mismatch in nvme_reset_work nvme-pci: rename nvme_disable_io_queues nvme-pci: cleanup nvme_suspend_queue nvme-pci: remove nvme_pci_disable nvme-pci: remove nvme_disable_admin_queue nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl nvme: use nvme_wait_ready in nvme_shutdown_ctrl ...
Diffstat (limited to 'drivers/pci/p2pdma.c')
-rw-r--r--drivers/pci/p2pdma.c124
1 files changed, 124 insertions, 0 deletions
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 5565f67d6537..86812d2073ea 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -89,6 +89,90 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(published);
+static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, struct vm_area_struct *vma)
+{
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+ size_t len = vma->vm_end - vma->vm_start;
+ struct pci_p2pdma *p2pdma;
+ struct percpu_ref *ref;
+ unsigned long vaddr;
+ void *kaddr;
+ int ret;
+
+ /* prevent private mappings from being established */
+ if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
+ pci_info_ratelimited(pdev,
+ "%s: fail, attempted private mapping\n",
+ current->comm);
+ return -EINVAL;
+ }
+
+ if (vma->vm_pgoff) {
+ pci_info_ratelimited(pdev,
+ "%s: fail, attempted mapping with non-zero offset\n",
+ current->comm);
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ p2pdma = rcu_dereference(pdev->p2pdma);
+ if (!p2pdma) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref);
+ if (!kaddr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * vm_insert_page() can sleep, so a reference is taken to mapping
+ * such that rcu_read_unlock() can be done before inserting the
+ * pages
+ */
+ if (unlikely(!percpu_ref_tryget_live_rcu(ref))) {
+ ret = -ENODEV;
+ goto out_free_mem;
+ }
+ rcu_read_unlock();
+
+ for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
+ ret = vm_insert_page(vma, vaddr, virt_to_page(kaddr));
+ if (ret) {
+ gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+ return ret;
+ }
+ percpu_ref_get(ref);
+ put_page(virt_to_page(kaddr));
+ kaddr += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+
+ percpu_ref_put(ref);
+
+ return 0;
+out_free_mem:
+ gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len);
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+static struct bin_attribute p2pmem_alloc_attr = {
+ .attr = { .name = "allocate", .mode = 0660 },
+ .mmap = p2pmem_alloc_mmap,
+ /*
+ * Some places where we want to call mmap (ie. python) will check
+ * that the file size is greater than the mmap size before allowing
+ * the mmap to continue. To work around this, just set the size
+ * to be very large.
+ */
+ .size = SZ_1T,
+};
+
static struct attribute *p2pmem_attrs[] = {
&dev_attr_size.attr,
&dev_attr_available.attr,
@@ -96,11 +180,32 @@ static struct attribute *p2pmem_attrs[] = {
NULL,
};
+static struct bin_attribute *p2pmem_bin_attrs[] = {
+ &p2pmem_alloc_attr,
+ NULL,
+};
+
static const struct attribute_group p2pmem_group = {
.attrs = p2pmem_attrs,
+ .bin_attrs = p2pmem_bin_attrs,
.name = "p2pmem",
};
+static void p2pdma_page_free(struct page *page)
+{
+ struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
+ struct percpu_ref *ref;
+
+ gen_pool_free_owner(pgmap->provider->p2pdma->pool,
+ (uintptr_t)page_to_virt(page), PAGE_SIZE,
+ (void **)&ref);
+ percpu_ref_put(ref);
+}
+
+static const struct dev_pagemap_ops p2pdma_pgmap_ops = {
+ .page_free = p2pdma_page_free,
+};
+
static void pci_p2pdma_release(void *data)
{
struct pci_dev *pdev = data;
@@ -152,6 +257,19 @@ out:
return error;
}
+static void pci_p2pdma_unmap_mappings(void *data)
+{
+ struct pci_dev *pdev = data;
+
+ /*
+ * Removing the alloc attribute from sysfs will call
+ * unmap_mapping_range() on the inode, teardown any existing userspace
+ * mappings and prevent new ones from being created.
+ */
+ sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr,
+ p2pmem_group.name);
+}
+
/**
* pci_p2pdma_add_resource - add memory for use as p2p memory
* @pdev: the device to add the memory to
@@ -198,6 +316,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
pgmap->range.end = pgmap->range.start + size - 1;
pgmap->nr_range = 1;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+ pgmap->ops = &p2pdma_pgmap_ops;
p2p_pgmap->provider = pdev;
p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
@@ -209,6 +328,11 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
goto pgmap_free;
}
+ error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings,
+ pdev);
+ if (error)
+ goto pages_free;
+
p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,