diff options
Diffstat (limited to 'drivers/pci/p2pdma.c')
| -rw-r--r-- | drivers/pci/p2pdma.c | 425 |
1 files changed, 303 insertions, 122 deletions
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 1015274bd2fe..4a2fc7ab42c3 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "pci-p2pdma: " fmt #include <linux/ctype.h> +#include <linux/dma-map-ops.h> #include <linux/pci-p2pdma.h> #include <linux/module.h> #include <linux/slab.h> @@ -20,23 +21,16 @@ #include <linux/seq_buf.h> #include <linux/xarray.h> -enum pci_p2pdma_map_type { - PCI_P2PDMA_MAP_UNKNOWN = 0, - PCI_P2PDMA_MAP_NOT_SUPPORTED, - PCI_P2PDMA_MAP_BUS_ADDR, - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - struct pci_p2pdma { struct gen_pool *pool; bool p2pmem_published; struct xarray map_types; + struct p2pdma_provider mem[PCI_STD_NUM_BARS]; }; struct pci_p2pdma_pagemap { struct dev_pagemap pgmap; - struct pci_dev *provider; - u64 bus_offset; + struct p2pdma_provider *mem; }; static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap) @@ -95,6 +89,99 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(published); +static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); + size_t len = vma->vm_end - vma->vm_start; + struct pci_p2pdma *p2pdma; + struct percpu_ref *ref; + unsigned long vaddr; + void *kaddr; + int ret; + + /* prevent private mappings from being established */ + if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { + pci_info_ratelimited(pdev, + "%s: fail, attempted private mapping\n", + current->comm); + return -EINVAL; + } + + if (vma->vm_pgoff) { + pci_info_ratelimited(pdev, + "%s: fail, attempted mapping with non-zero offset\n", + current->comm); + return -EINVAL; + } + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (!p2pdma) { + ret = -ENODEV; + goto out; + } + + kaddr = (void *)gen_pool_alloc_owner(p2pdma->pool, len, (void **)&ref); + if (!kaddr) { + ret = -ENOMEM; + goto out; + } + + /* + * vm_insert_page() can sleep, so a reference is taken to mapping + * such that rcu_read_unlock() can be done before inserting the + * pages + */ + if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { + ret = -ENODEV; + goto out_free_mem; + } + rcu_read_unlock(); + + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + struct page *page = virt_to_page(kaddr); + + /* + * Initialise the refcount for the freshly allocated page. As + * we have just allocated the page no one else should be + * using it. + */ + VM_WARN_ON_ONCE_PAGE(!page_ref_count(page), page); + set_page_count(page, 1); + ret = vm_insert_page(vma, vaddr, page); + if (ret) { + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); + return ret; + } + percpu_ref_get(ref); + put_page(page); + kaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + + percpu_ref_put(ref); + + return 0; +out_free_mem: + gen_pool_free(p2pdma->pool, (uintptr_t)kaddr, len); +out: + rcu_read_unlock(); + return ret; +} + +static const struct bin_attribute p2pmem_alloc_attr = { + .attr = { .name = "allocate", .mode = 0660 }, + .mmap = p2pmem_alloc_mmap, + /* + * Some places where we want to call mmap (ie. python) will check + * that the file size is greater than the mmap size before allowing + * the mmap to continue. To work around this, just set the size + * to be very large. + */ + .size = SZ_1T, +}; + static struct attribute *p2pmem_attrs[] = { &dev_attr_size.attr, &dev_attr_available.attr, @@ -102,11 +189,35 @@ static struct attribute *p2pmem_attrs[] = { NULL, }; +static const struct bin_attribute *const p2pmem_bin_attrs[] = { + &p2pmem_alloc_attr, + NULL, +}; + static const struct attribute_group p2pmem_group = { .attrs = p2pmem_attrs, + .bin_attrs = p2pmem_bin_attrs, .name = "p2pmem", }; +static void p2pdma_folio_free(struct folio *folio) +{ + struct page *page = &folio->page; + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page)); + /* safe to dereference while a reference is held to the percpu ref */ + struct pci_p2pdma *p2pdma = rcu_dereference_protected( + to_pci_dev(pgmap->mem->owner)->p2pdma, 1); + struct percpu_ref *ref; + + gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page), + PAGE_SIZE, (void **)&ref); + percpu_ref_put(ref); +} + +static const struct dev_pagemap_ops p2pdma_pgmap_ops = { + .folio_free = p2pdma_folio_free, +}; + static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; @@ -118,44 +229,137 @@ static void pci_p2pdma_release(void *data) /* Flush and disable pci_alloc_p2p_mem() */ pdev->p2pdma = NULL; - synchronize_rcu(); + if (p2pdma->pool) + synchronize_rcu(); + xa_destroy(&p2pdma->map_types); + + if (!p2pdma->pool) + return; gen_pool_destroy(p2pdma->pool); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); - xa_destroy(&p2pdma->map_types); } -static int pci_p2pdma_setup(struct pci_dev *pdev) +/** + * pcim_p2pdma_init - Initialise peer-to-peer DMA providers + * @pdev: The PCI device to enable P2PDMA for + * + * This function initializes the peer-to-peer DMA infrastructure + * for a PCI device. It allocates and sets up the necessary data + * structures to support P2PDMA operations, including mapping type + * tracking. + */ +int pcim_p2pdma_init(struct pci_dev *pdev) { - int error = -ENOMEM; struct pci_p2pdma *p2p; + int i, ret; + + p2p = rcu_dereference_protected(pdev->p2pdma, 1); + if (p2p) + return 0; p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL); if (!p2p) return -ENOMEM; xa_init(&p2p->map_types); + /* + * Iterate over all standard PCI BARs and record only those that + * correspond to MMIO regions. Skip non-memory resources (e.g. I/O + * port BARs) since they cannot be used for peer-to-peer (P2P) + * transactions. + */ + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; - p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); - if (!p2p->pool) - goto out; + p2p->mem[i].owner = &pdev->dev; + p2p->mem[i].bus_offset = + pci_bus_address(pdev, i) - pci_resource_start(pdev, i); + } - error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); - if (error) - goto out_pool_destroy; + ret = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); + if (ret) + goto out_p2p; - error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); - if (error) + rcu_assign_pointer(pdev->p2pdma, p2p); + return 0; + +out_p2p: + devm_kfree(&pdev->dev, p2p); + return ret; +} +EXPORT_SYMBOL_GPL(pcim_p2pdma_init); + +/** + * pcim_p2pdma_provider - Get peer-to-peer DMA provider + * @pdev: The PCI device to enable P2PDMA for + * @bar: BAR index to get provider + * + * This function gets peer-to-peer DMA provider for a PCI device. The lifetime + * of the provider (and of course the MMIO) is bound to the lifetime of the + * driver. A driver calling this function must ensure that all references to the + * provider, and any DMA mappings created for any MMIO, are all cleaned up + * before the driver remove() completes. + * + * Since P2P is almost always shared with a second driver this means some system + * to notify, invalidate and revoke the MMIO's DMA must be in place to use this + * function. For example a revoke can be built using DMABUF. + */ +struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar) +{ + struct pci_p2pdma *p2p; + + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) + return NULL; + + p2p = rcu_dereference_protected(pdev->p2pdma, 1); + if (WARN_ON(!p2p)) + /* Someone forgot to call to pcim_p2pdma_init() before */ + return NULL; + + return &p2p->mem[bar]; +} +EXPORT_SYMBOL_GPL(pcim_p2pdma_provider); + +static int pci_p2pdma_setup_pool(struct pci_dev *pdev) +{ + struct pci_p2pdma *p2pdma; + int ret; + + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + if (p2pdma->pool) + /* We already setup pools, do nothing, */ + return 0; + + p2pdma->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); + if (!p2pdma->pool) + return -ENOMEM; + + ret = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); + if (ret) goto out_pool_destroy; - rcu_assign_pointer(pdev->p2pdma, p2p); return 0; out_pool_destroy: - gen_pool_destroy(p2p->pool); -out: - devm_kfree(&pdev->dev, p2p); - return error; + gen_pool_destroy(p2pdma->pool); + p2pdma->pool = NULL; + return ret; +} + +static void pci_p2pdma_unmap_mappings(void *data) +{ + struct pci_p2pdma_pagemap *p2p_pgmap = data; + + /* + * Removing the alloc attribute from sysfs will call + * unmap_mapping_range() on the inode, teardown any existing userspace + * mappings and prevent new ones from being created. + */ + sysfs_remove_file_from_group(&p2p_pgmap->mem->owner->kobj, + &p2pmem_alloc_attr.attr, + p2pmem_group.name); } /** @@ -172,6 +376,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { struct pci_p2pdma_pagemap *p2p_pgmap; + struct p2pdma_provider *mem; struct dev_pagemap *pgmap; struct pci_p2pdma *p2pdma; void *addr; @@ -189,11 +394,21 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, if (size + offset > pci_resource_len(pdev, bar)) return -EINVAL; - if (!pdev->p2pdma) { - error = pci_p2pdma_setup(pdev); - if (error) - return error; - } + error = pcim_p2pdma_init(pdev); + if (error) + return error; + + error = pci_p2pdma_setup_pool(pdev); + if (error) + return error; + + mem = pcim_p2pdma_provider(pdev, bar); + /* + * We checked validity of BAR prior to call + * to pcim_p2pdma_provider. It should never return NULL. + */ + if (WARN_ON(!mem)) + return -EINVAL; p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); if (!p2p_pgmap) @@ -204,10 +419,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pgmap->range.end = pgmap->range.start + size - 1; pgmap->nr_range = 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; - - p2p_pgmap->provider = pdev; - p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - - pci_resource_start(pdev, bar); + pgmap->ops = &p2pdma_pgmap_ops; + p2p_pgmap->mem = mem; addr = devm_memremap_pages(&pdev->dev, pgmap); if (IS_ERR(addr)) { @@ -215,6 +428,11 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } + error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, + p2p_pgmap); + if (error) + goto pages_free; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, @@ -231,7 +449,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pages_free: devm_memunmap_pages(&pdev->dev, pgmap); pgmap_free: - devm_kfree(&pdev->dev, pgmap); + devm_kfree(&pdev->dev, p2p_pgmap); return error; } EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); @@ -315,25 +533,27 @@ static const struct pci_p2pdma_whitelist_entry { /* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */ {PCI_VENDOR_ID_INTEL, 0x2f00, REQ_SAME_HOST_BRIDGE}, {PCI_VENDOR_ID_INTEL, 0x2f01, REQ_SAME_HOST_BRIDGE}, - /* Intel SkyLake-E */ + /* Intel Skylake-E */ {PCI_VENDOR_ID_INTEL, 0x2030, 0}, {PCI_VENDOR_ID_INTEL, 0x2031, 0}, {PCI_VENDOR_ID_INTEL, 0x2032, 0}, {PCI_VENDOR_ID_INTEL, 0x2033, 0}, {PCI_VENDOR_ID_INTEL, 0x2020, 0}, + {PCI_VENDOR_ID_INTEL, 0x09a2, 0}, {} }; /* - * This lookup function tries to find the PCI device corresponding to a given - * host bridge. + * If the first device on host's root bus is either devfn 00.0 or a PCIe + * Root Port, return it. Otherwise return NULL. * - * It assumes the host bridge device is the first PCI device in the - * bus->devices list and that the devfn is 00.0. These assumptions should hold - * for all the devices in the whitelist above. + * We often use a devfn 00.0 "host bridge" in the pci_p2pdma_whitelist[] + * (though there is no PCI/PCIe requirement for such a device). On some + * platforms, e.g., Intel Skylake, there is no such host bridge device, and + * pci_p2pdma_whitelist[] may contain a Root Port at any devfn. * - * This function is equivalent to pci_get_slot(host->bus, 0), however it does - * not take the pci_bus_sem lock seeing __host_bridge_whitelist() must not + * This function is similar to pci_get_slot(host->bus, 0), but it does + * not take the pci_bus_sem lock since __host_bridge_whitelist() must not * sleep. * * For this to be safe, the caller should hold a reference to a device on the @@ -349,10 +569,14 @@ static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host) if (!root) return NULL; - if (root->devfn != PCI_DEVFN(0, 0)) - return NULL; - return root; + if (root->devfn == PCI_DEVFN(0, 0)) + return root; + + if (pci_pcie_type(root) == PCI_EXP_TYPE_ROOT_PORT) + return root; + + return NULL; } static bool __host_bridge_whitelist(struct pci_host_bridge *host, @@ -406,8 +630,7 @@ static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, static unsigned long map_types_idx(struct pci_dev *client) { - return (pci_domain_nr(client->bus) << 16) | - (client->bus->number << 8) | client->devfn; + return (pci_domain_nr(client->bus) << 16) | pci_dev_id(client); } /* @@ -536,7 +759,7 @@ done: p2pdma = rcu_dereference(provider->p2pdma); if (p2pdma) xa_store(&p2pdma->map_types, map_types_idx(client), - xa_mk_value(map_type), GFP_KERNEL); + xa_mk_value(map_type), GFP_ATOMIC); rcu_read_unlock(); return map_type; } @@ -604,7 +827,7 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); * pci_has_p2pmem - check if a given PCI device has published any p2pmem * @pdev: PCI device to check */ -bool pci_has_p2pmem(struct pci_dev *pdev) +static bool pci_has_p2pmem(struct pci_dev *pdev) { struct pci_p2pdma *p2pdma; bool res; @@ -616,12 +839,10 @@ bool pci_has_p2pmem(struct pci_dev *pdev) return res; } -EXPORT_SYMBOL_GPL(pci_has_p2pmem); /** * pci_p2pmem_find_many - find a peer-to-peer DMA memory device compatible with - * the specified list of clients and shortest distance (as determined - * by pci_p2pmem_dma()) + * the specified list of clients and shortest distance * @clients: array of devices to check (NULL-terminated) * @num_clients: number of client devices in the list * @@ -649,7 +870,7 @@ struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) if (!closest_pdevs) return NULL; - while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { + for_each_pci_dev(pdev) { if (!pci_has_p2pmem(pdev)) continue; @@ -673,7 +894,7 @@ struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) } if (dev_cnt) - pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]); + pdev = pci_dev_get(closest_pdevs[get_random_u32_below(dev_cnt)]); for (i = 0; i < dev_cnt; i++) pci_dev_put(closest_pdevs[i]); @@ -713,7 +934,6 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) if (unlikely(!percpu_ref_tryget_live_rcu(ref))) { gen_pool_free(p2pdma->pool, (unsigned long) ret, size); ret = NULL; - goto out; } out: rcu_read_unlock(); @@ -841,15 +1061,26 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); -static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, - struct device *dev) +/** + * pci_p2pdma_map_type - Determine the mapping type for P2PDMA transfers + * @provider: P2PDMA provider structure + * @dev: Target device for the transfer + * + * Determines how peer-to-peer DMA transfers should be mapped between + * the provider and the target device. The mapping type indicates whether + * the transfer can be done directly through PCI switches or must go + * through the host bridge. + */ +enum pci_p2pdma_map_type pci_p2pdma_map_type(struct p2pdma_provider *provider, + struct device *dev) { enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; - struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; + struct pci_dev *pdev = to_pci_dev(provider->owner); struct pci_dev *client; struct pci_p2pdma *p2pdma; + int dist; - if (!provider->p2pdma) + if (!pdev->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; if (!dev_is_pci(dev)) @@ -858,80 +1089,30 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, client = to_pci_dev(dev); rcu_read_lock(); - p2pdma = rcu_dereference(provider->p2pdma); + p2pdma = rcu_dereference(pdev->p2pdma); if (p2pdma) type = xa_to_value(xa_load(&p2pdma->map_types, map_types_idx(client))); rcu_read_unlock(); - return type; -} - -static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, - struct device *dev, struct scatterlist *sg, int nents) -{ - struct scatterlist *s; - int i; - for_each_sg(sg, s, nents, i) { - s->dma_address = sg_phys(s) + p2p_pgmap->bus_offset; - sg_dma_len(s) = s->length; - } + if (type == PCI_P2PDMA_MAP_UNKNOWN) + return calc_map_type_and_dist(pdev, client, &dist, true); - return nents; -} - -/** - * pci_p2pdma_map_sg_attrs - map a PCI peer-to-peer scatterlist for DMA - * @dev: device doing the DMA request - * @sg: scatter list to map - * @nents: elements in the scatterlist - * @dir: DMA direction - * @attrs: DMA attributes passed to dma_map_sg() (if called) - * - * Scatterlists mapped with this function should be unmapped using - * pci_p2pdma_unmap_sg_attrs(). - * - * Returns the number of SG entries mapped or 0 on error. - */ -int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct pci_p2pdma_pagemap *p2p_pgmap = - to_p2p_pgmap(sg_page(sg)->pgmap); - - switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) { - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - return dma_map_sg_attrs(dev, sg, nents, dir, attrs); - case PCI_P2PDMA_MAP_BUS_ADDR: - return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents); - default: - WARN_ON_ONCE(1); - return 0; - } + return type; } -EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); -/** - * pci_p2pdma_unmap_sg_attrs - unmap a PCI peer-to-peer scatterlist that was - * mapped with pci_p2pdma_map_sg() - * @dev: device doing the DMA request - * @sg: scatter list to map - * @nents: number of elements returned by pci_p2pdma_map_sg() - * @dir: DMA direction - * @attrs: DMA attributes passed to dma_unmap_sg() (if called) - */ -void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page) { - enum pci_p2pdma_map_type map_type; + struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(page_pgmap(page)); - map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev); + if (state->mem == p2p_pgmap->mem) + return; - if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) - dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); + state->mem = p2p_pgmap->mem; + state->map = pci_p2pdma_map_type(p2p_pgmap->mem, dev); } -EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs); /** * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store |
