diff options
Diffstat (limited to 'drivers/dma-buf')
-rw-r--r-- | drivers/dma-buf/Kconfig | 2 | ||||
-rw-r--r-- | drivers/dma-buf/dma-buf.c | 127 | ||||
-rw-r--r-- | drivers/dma-buf/dma-fence-array.c | 112 | ||||
-rw-r--r-- | drivers/dma-buf/dma-fence-unwrap.c | 126 | ||||
-rw-r--r-- | drivers/dma-buf/dma-fence.c | 18 | ||||
-rw-r--r-- | drivers/dma-buf/dma-heap.c | 31 | ||||
-rw-r--r-- | drivers/dma-buf/dma-resv.c | 11 | ||||
-rw-r--r-- | drivers/dma-buf/heaps/cma_heap.c | 16 | ||||
-rw-r--r-- | drivers/dma-buf/heaps/system_heap.c | 6 | ||||
-rw-r--r-- | drivers/dma-buf/st-dma-fence-chain.c | 6 | ||||
-rw-r--r-- | drivers/dma-buf/st-dma-fence.c | 8 | ||||
-rw-r--r-- | drivers/dma-buf/sw_sync.c | 6 | ||||
-rw-r--r-- | drivers/dma-buf/sync_debug.c | 4 | ||||
-rw-r--r-- | drivers/dma-buf/sync_trace.h | 2 | ||||
-rw-r--r-- | drivers/dma-buf/udmabuf.c | 338 |
15 files changed, 542 insertions, 271 deletions
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index e4dc53a36428..fee04fdb0822 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -35,6 +35,8 @@ config UDMABUF default n depends on DMA_SHARED_BUFFER depends on MEMFD_CREATE || COMPILE_TEST + depends on MMU + select VMAP_PFN help A driver to let userspace turn memfd regions into dma-bufs. Qemu can use this to create host dmabufs for guest framebuffers. diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 8fe5aa67b167..5baa83b85515 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,12 +35,35 @@ static inline int is_dma_buf_file(struct file *); -struct dma_buf_list { - struct list_head head; - struct mutex lock; -}; +#if IS_ENABLED(CONFIG_DEBUG_FS) +static DEFINE_MUTEX(debugfs_list_mutex); +static LIST_HEAD(debugfs_list); + +static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) +{ + mutex_lock(&debugfs_list_mutex); + list_add(&dmabuf->list_node, &debugfs_list); + mutex_unlock(&debugfs_list_mutex); +} + +static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) +{ + if (!dmabuf) + return; -static struct dma_buf_list db_list; + mutex_lock(&debugfs_list_mutex); + list_del(&dmabuf->list_node); + mutex_unlock(&debugfs_list_mutex); +} +#else +static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) +{ +} + +static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) +{ +} +#endif static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) { @@ -89,17 +112,10 @@ static void dma_buf_release(struct dentry *dentry) static int dma_buf_file_release(struct inode *inode, struct file *file) { - struct dma_buf *dmabuf; - if (!is_dma_buf_file(file)) return -EINVAL; - dmabuf = file->private_data; - if (dmabuf) { - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); - } + __dma_buf_debugfs_list_del(file->private_data); return 0; } @@ -160,8 +176,9 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence) dmabuf = file->private_data; /* only support discovering the end of the buffer, - but also allow SEEK_SET to maintain the idiomatic - SEEK_END(0), SEEK_CUR(0) pattern */ + * but also allow SEEK_SET to maintain the idiomatic + * SEEK_END(0), SEEK_CUR(0) pattern. + */ if (whence == SEEK_END) base = dmabuf->size; else if (whence == SEEK_SET) @@ -542,7 +559,7 @@ static struct file *dma_buf_getfile(size_t size, int flags) * Override ->i_ino with the unique and dmabuffs specific * value. */ - inode->i_ino = atomic64_add_return(1, &dmabuf_inode); + inode->i_ino = atomic64_inc_return(&dmabuf_inode); flags &= O_ACCMODE | O_NONBLOCK; file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", flags, &dma_buf_fops); @@ -672,9 +689,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; - mutex_lock(&db_list.lock); - list_add(&dmabuf->list_node, &db_list.head); - mutex_unlock(&db_list.lock); + __dma_buf_debugfs_list_add(dmabuf); return dmabuf; @@ -688,7 +703,7 @@ err_module: module_put(exp_info->owner); return ERR_PTR(ret); } -EXPORT_SYMBOL_NS_GPL(dma_buf_export, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_export, "DMA_BUF"); /** * dma_buf_fd - returns a file descriptor for the given struct dma_buf @@ -712,7 +727,7 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags) return fd; } -EXPORT_SYMBOL_NS_GPL(dma_buf_fd, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_fd, "DMA_BUF"); /** * dma_buf_get - returns the struct dma_buf related to an fd @@ -738,7 +753,7 @@ struct dma_buf *dma_buf_get(int fd) return file->private_data; } -EXPORT_SYMBOL_NS_GPL(dma_buf_get, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_get, "DMA_BUF"); /** * dma_buf_put - decreases refcount of the buffer @@ -757,7 +772,7 @@ void dma_buf_put(struct dma_buf *dmabuf) fput(dmabuf->file); } -EXPORT_SYMBOL_NS_GPL(dma_buf_put, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_put, "DMA_BUF"); static void mangle_sg_table(struct sg_table *sg_table) { @@ -768,13 +783,14 @@ static void mangle_sg_table(struct sg_table *sg_table) /* To catch abuse of the underlying struct page by importers mix * up the bits, but take care to preserve the low SG_ bits to * not corrupt the sgt. The mixing is undone in __unmap_dma_buf - * before passing the sgt back to the exporter. */ + * before passing the sgt back to the exporter. + */ for_each_sgtable_sg(sg_table, sg, i) sg->page_link ^= ~0xffUL; #endif } -static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, +static struct sg_table *__map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction direction) { struct sg_table *sg_table; @@ -962,7 +978,7 @@ err_unlock: dma_buf_detach(dmabuf, attach); return ERR_PTR(ret); } -EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, "DMA_BUF"); /** * dma_buf_attach - Wrapper for dma_buf_dynamic_attach @@ -977,7 +993,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, { return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL); } -EXPORT_SYMBOL_NS_GPL(dma_buf_attach, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_attach, "DMA_BUF"); static void __unmap_dma_buf(struct dma_buf_attachment *attach, struct sg_table *sg_table, @@ -1021,7 +1037,7 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) kfree(attach); } -EXPORT_SYMBOL_NS_GPL(dma_buf_detach, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_detach, "DMA_BUF"); /** * dma_buf_pin - Lock down the DMA-buf @@ -1051,7 +1067,7 @@ int dma_buf_pin(struct dma_buf_attachment *attach) return ret; } -EXPORT_SYMBOL_NS_GPL(dma_buf_pin, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_pin, "DMA_BUF"); /** * dma_buf_unpin - Unpin a DMA-buf @@ -1072,7 +1088,7 @@ void dma_buf_unpin(struct dma_buf_attachment *attach) if (dmabuf->ops->unpin) dmabuf->ops->unpin(attach); } -EXPORT_SYMBOL_NS_GPL(dma_buf_unpin, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_unpin, "DMA_BUF"); /** * dma_buf_map_attachment - Returns the scatterlist table of the attachment; @@ -1160,7 +1176,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, #endif /* CONFIG_DMA_API_DEBUG */ return sg_table; } -EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment, "DMA_BUF"); /** * dma_buf_map_attachment_unlocked - Returns the scatterlist table of the attachment; @@ -1188,7 +1204,7 @@ dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, return sg_table; } -EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment_unlocked, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment_unlocked, "DMA_BUF"); /** * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might @@ -1220,7 +1236,7 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) dma_buf_unpin(attach); } -EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment, "DMA_BUF"); /** * dma_buf_unmap_attachment_unlocked - unmaps and decreases usecount of the buffer;might @@ -1245,7 +1261,7 @@ void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, dma_buf_unmap_attachment(attach, sg_table, direction); dma_resv_unlock(attach->dmabuf->resv); } -EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment_unlocked, "DMA_BUF"); /** * dma_buf_move_notify - notify attachments that DMA-buf is moving @@ -1265,7 +1281,7 @@ void dma_buf_move_notify(struct dma_buf *dmabuf) if (attach->importer_ops) attach->importer_ops->move_notify(attach); } -EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, "DMA_BUF"); /** * DOC: cpu access @@ -1282,10 +1298,12 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF); * vmap interface is introduced. Note that on very old 32-bit architectures * vmalloc space might be limited and result in vmap calls failing. * - * Interfaces:: + * Interfaces: * - * void \*dma_buf_vmap(struct dma_buf \*dmabuf, struct iosys_map \*map) - * void dma_buf_vunmap(struct dma_buf \*dmabuf, struct iosys_map \*map) + * .. code-block:: c + * + * void *dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map) + * void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) * * The vmap call can fail if there is no vmap support in the exporter, or if * it runs out of vmalloc space. Note that the dma-buf layer keeps a reference @@ -1342,10 +1360,11 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_move_notify, DMA_BUF); * enough, since adding interfaces to intercept pagefaults and allow pte * shootdowns would increase the complexity quite a bit. * - * Interface:: + * Interface: + * + * .. code-block:: c * - * int dma_buf_mmap(struct dma_buf \*, struct vm_area_struct \*, - * unsigned long); + * int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); * * If the importing subsystem simply provides a special-purpose mmap call to * set up a mapping in userspace, calling do_mmap with &dma_buf.file will @@ -1410,7 +1429,7 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, return ret; } -EXPORT_SYMBOL_NS_GPL(dma_buf_begin_cpu_access, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_begin_cpu_access, "DMA_BUF"); /** * dma_buf_end_cpu_access - Must be called after accessing a dma_buf from the @@ -1438,7 +1457,7 @@ int dma_buf_end_cpu_access(struct dma_buf *dmabuf, return ret; } -EXPORT_SYMBOL_NS_GPL(dma_buf_end_cpu_access, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_end_cpu_access, "DMA_BUF"); /** @@ -1480,7 +1499,7 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, return dmabuf->ops->mmap(dmabuf, vma); } -EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, "DMA_BUF"); /** * dma_buf_vmap - Create virtual mapping for the buffer object into kernel @@ -1533,7 +1552,7 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map) return 0; } -EXPORT_SYMBOL_NS_GPL(dma_buf_vmap, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_vmap, "DMA_BUF"); /** * dma_buf_vmap_unlocked - Create virtual mapping for the buffer object into kernel @@ -1560,7 +1579,7 @@ int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) return ret; } -EXPORT_SYMBOL_NS_GPL(dma_buf_vmap_unlocked, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_vmap_unlocked, "DMA_BUF"); /** * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap. @@ -1584,7 +1603,7 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map) iosys_map_clear(&dmabuf->vmap_ptr); } } -EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap, "DMA_BUF"); /** * dma_buf_vunmap_unlocked - Unmap a vmap obtained by dma_buf_vmap. @@ -1600,7 +1619,7 @@ void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map) dma_buf_vunmap(dmabuf, map); dma_resv_unlock(dmabuf->resv); } -EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, DMA_BUF); +EXPORT_SYMBOL_NS_GPL(dma_buf_vunmap_unlocked, "DMA_BUF"); #ifdef CONFIG_DEBUG_FS static int dma_buf_debug_show(struct seq_file *s, void *unused) @@ -1611,7 +1630,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) size_t size = 0; int ret; - ret = mutex_lock_interruptible(&db_list.lock); + ret = mutex_lock_interruptible(&debugfs_list_mutex); if (ret) return ret; @@ -1620,7 +1639,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n", "size", "flags", "mode", "count", "ino"); - list_for_each_entry(buf_obj, &db_list.head, list_node) { + list_for_each_entry(buf_obj, &debugfs_list, list_node) { ret = dma_resv_lock_interruptible(buf_obj->resv, NULL); if (ret) @@ -1657,11 +1676,11 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size); - mutex_unlock(&db_list.lock); + mutex_unlock(&debugfs_list_mutex); return 0; error_unlock: - mutex_unlock(&db_list.lock); + mutex_unlock(&debugfs_list_mutex); return ret; } @@ -1680,7 +1699,7 @@ static int dma_buf_init_debugfs(void) dma_buf_debugfs_dir = d; - d = debugfs_create_file("bufinfo", S_IRUGO, dma_buf_debugfs_dir, + d = debugfs_create_file("bufinfo", 0444, dma_buf_debugfs_dir, NULL, &dma_buf_debug_fops); if (IS_ERR(d)) { pr_debug("dma_buf: debugfs: failed to create node bufinfo\n"); @@ -1718,8 +1737,6 @@ static int __init dma_buf_init(void) if (IS_ERR(dma_buf_mnt)) return PTR_ERR(dma_buf_mnt); - mutex_init(&db_list.lock); - INIT_LIST_HEAD(&db_list.head); dma_buf_init_debugfs(); return 0; } diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 9b3ce8948351..6657d4b30af9 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -70,7 +70,7 @@ static void dma_fence_array_cb_func(struct dma_fence *f, static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = (void *)(&array[1]); + struct dma_fence_array_cb *cb = array->callbacks; unsigned i; for (i = 0; i < array->num_fences; ++i) { @@ -103,10 +103,36 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence) static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); + int num_pending; + unsigned int i; - if (atomic_read(&array->num_pending) > 0) + /* + * We need to read num_pending before checking the enable_signal bit + * to avoid racing with the enable_signaling() implementation, which + * might decrement the counter, and cause a partial check. + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * dma_fence_array_enable_signaling() + * + * The !--num_pending check is here to account for the any_signaled case + * if we race with enable_signaling(), that means the !num_pending check + * in the is_signalling_enabled branch might be outdated (num_pending + * might have been decremented), but that's fine. The user will get the + * right value when testing again later. + */ + num_pending = atomic_read_acquire(&array->num_pending); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { + if (num_pending <= 0) + goto signal; return false; + } + for (i = 0; i < array->num_fences; ++i) { + if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) + goto signal; + } + return false; + +signal: dma_fence_array_clear_pending_error(array); return true; } @@ -144,46 +170,45 @@ const struct dma_fence_ops dma_fence_array_ops = { EXPORT_SYMBOL(dma_fence_array_ops); /** - * dma_fence_array_create - Create a custom fence array + * dma_fence_array_alloc - Allocate a custom fence array + * @num_fences: [in] number of fences to add in the array + * + * Return dma fence array on success, NULL on failure + */ +struct dma_fence_array *dma_fence_array_alloc(int num_fences) +{ + struct dma_fence_array *array; + + return kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); +} +EXPORT_SYMBOL(dma_fence_array_alloc); + +/** + * dma_fence_array_init - Init a custom fence array + * @array: [in] dma fence array to arm * @num_fences: [in] number of fences to add in the array * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use * @signal_on_any: [in] signal on any fence in the array * - * Allocate a dma_fence_array object and initialize the base fence with - * dma_fence_init(). - * In case of error it returns NULL. - * - * The caller should allocate the fences array with num_fences size - * and fill it with the fences it wants to add to the object. Ownership of this - * array is taken and dma_fence_put() is used on each fence on release. - * - * If @signal_on_any is true the fence array signals if any fence in the array - * signals, otherwise it signals when all fences in the array signal. + * Implementation of @dma_fence_array_create without allocation. Useful to init + * a preallocated dma fence array in the path of reclaim or dma fence signaling. */ -struct dma_fence_array *dma_fence_array_create(int num_fences, - struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) { - struct dma_fence_array *array; - size_t size = sizeof(*array); - WARN_ON(!num_fences || !fences); - /* Allocate the callback structures behind the array. */ - size += num_fences * sizeof(struct dma_fence_array_cb); - array = kzalloc(size, GFP_KERNEL); - if (!array) - return NULL; + array->num_fences = num_fences; spin_lock_init(&array->lock); dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock, context, seqno); init_irq_work(&array->work, irq_dma_fence_array_work); - array->num_fences = num_fences; atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); array->fences = fences; @@ -202,6 +227,41 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, */ while (num_fences--) WARN_ON(dma_fence_is_container(fences[num_fences])); +} +EXPORT_SYMBOL(dma_fence_array_init); + +/** + * dma_fence_array_create - Create a custom fence array + * @num_fences: [in] number of fences to add in the array + * @fences: [in] array containing the fences + * @context: [in] fence context to use + * @seqno: [in] sequence number to use + * @signal_on_any: [in] signal on any fence in the array + * + * Allocate a dma_fence_array object and initialize the base fence with + * dma_fence_init(). + * In case of error it returns NULL. + * + * The caller should allocate the fences array with num_fences size + * and fill it with the fences it wants to add to the object. Ownership of this + * array is taken and dma_fence_put() is used on each fence on release. + * + * If @signal_on_any is true the fence array signals if any fence in the array + * signals, otherwise it signals when all fences in the array signal. + */ +struct dma_fence_array *dma_fence_array_create(int num_fences, + struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) +{ + struct dma_fence_array *array; + + array = dma_fence_array_alloc(num_fences); + if (!array) + return NULL; + + dma_fence_array_init(array, num_fences, fences, + context, seqno, signal_on_any); return array; } diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 628af51c81af..6345062731f1 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -12,6 +12,7 @@ #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> #include <linux/slab.h> +#include <linux/sort.h> /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -59,6 +60,25 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); + +static int fence_cmp(const void *_a, const void *_b) +{ + struct dma_fence *a = *(struct dma_fence **)_a; + struct dma_fence *b = *(struct dma_fence **)_b; + + if (a->context < b->context) + return -1; + else if (a->context > b->context) + return 1; + + if (dma_fence_is_later(b, a)) + return 1; + else if (dma_fence_is_later(a, b)) + return -1; + + return 0; +} + /* Implementation for the dma_fence_merge() marco, don't use directly */ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, @@ -67,8 +87,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence_array *result; struct dma_fence *tmp, **array; ktime_t timestamp; - unsigned int i; - size_t count; + int i, j, count; count = 0; timestamp = ns_to_ktime(0); @@ -96,78 +115,55 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, if (!array) return NULL; - /* - * This trashes the input fence array and uses it as position for the - * following merge loop. This works because the dma_fence_merge() - * wrapper macro is creating this temporary array on the stack together - * with the iterators. - */ - for (i = 0; i < num_fences; ++i) - fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]); - count = 0; - do { - unsigned int sel; - -restart: - tmp = NULL; - for (i = 0; i < num_fences; ++i) { - struct dma_fence *next; - - while (fences[i] && dma_fence_is_signaled(fences[i])) - fences[i] = dma_fence_unwrap_next(&iter[i]); - - next = fences[i]; - if (!next) - continue; - - /* - * We can't guarantee that inpute fences are ordered by - * context, but it is still quite likely when this - * function is used multiple times. So attempt to order - * the fences by context as we pass over them and merge - * fences with the same context. - */ - if (!tmp || tmp->context > next->context) { - tmp = next; - sel = i; - - } else if (tmp->context < next->context) { - continue; - - } else if (dma_fence_is_later(tmp, next)) { - fences[i] = dma_fence_unwrap_next(&iter[i]); - goto restart; + for (i = 0; i < num_fences; ++i) { + dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { + if (!dma_fence_is_signaled(tmp)) { + array[count++] = dma_fence_get(tmp); } else { - fences[sel] = dma_fence_unwrap_next(&iter[sel]); - goto restart; + ktime_t t = dma_fence_timestamp(tmp); + + if (ktime_after(t, timestamp)) + timestamp = t; } } + } - if (tmp) { - array[count++] = dma_fence_get(tmp); - fences[sel] = dma_fence_unwrap_next(&iter[sel]); - } - } while (tmp); + if (count == 0 || count == 1) + goto return_fastpath; - if (count == 0) { - tmp = dma_fence_allocate_private_stub(ktime_get()); - goto return_tmp; - } + sort(array, count, sizeof(*array), fence_cmp, NULL); - if (count == 1) { - tmp = array[0]; - goto return_tmp; + /* + * Only keep the most recent fence for each context. + */ + j = 0; + for (i = 1; i < count; i++) { + if (array[i]->context == array[j]->context) + dma_fence_put(array[i]); + else + array[++j] = array[i]; } - - result = dma_fence_array_create(count, array, - dma_fence_context_alloc(1), - 1, false); - if (!result) { - tmp = NULL; - goto return_tmp; + count = ++j; + + if (count > 1) { + result = dma_fence_array_create(count, array, + dma_fence_context_alloc(1), + 1, false); + if (!result) { + for (i = 0; i < count; i++) + dma_fence_put(array[i]); + tmp = NULL; + goto return_tmp; + } + return &result->base; } - return &result->base; + +return_fastpath: + if (count == 0) + tmp = dma_fence_allocate_private_stub(timestamp); + else + tmp = array[0]; return_tmp: kfree(array); diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index e0fd99e61a2d..f0cdd3e99d36 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -102,7 +102,7 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); * * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier * respectively &mmu_interval_notifier callbacks. This means any code required - * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO. + * for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO. * Only GFP_ATOMIC is permissible, which might fail. * * Note that only GPU drivers have a reasonable excuse for both requiring @@ -309,8 +309,8 @@ bool dma_fence_begin_signalling(void) if (in_atomic()) return true; - /* ... and non-recursive readlock */ - lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_); + /* ... and non-recursive successful read_trylock */ + lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_); return false; } @@ -341,7 +341,7 @@ void __dma_fence_might_wait(void) lock_map_acquire(&dma_fence_lockdep_map); lock_map_release(&dma_fence_lockdep_map); if (tmp) - lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_); + lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_); } #endif @@ -412,7 +412,7 @@ int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) unsigned long flags; int ret; - if (!fence) + if (WARN_ON(!fence)) return -EINVAL; spin_lock_irqsave(fence->lock, flags); @@ -464,7 +464,7 @@ int dma_fence_signal(struct dma_fence *fence) int ret; bool tmp; - if (!fence) + if (WARN_ON(!fence)) return -EINVAL; tmp = dma_fence_begin_signalling(); @@ -522,7 +522,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) EXPORT_SYMBOL(dma_fence_wait_timeout); /** - * dma_fence_release - default relese function for fences + * dma_fence_release - default release function for fences * @kref: &dma_fence.recfount * * This is the default release functions for &dma_fence. Drivers shouldn't call @@ -974,8 +974,8 @@ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) EXPORT_SYMBOL(dma_fence_set_deadline); /** - * dma_fence_describe - Dump fence describtion into seq_file - * @fence: the 6fence to describe + * dma_fence_describe - Dump fence description into seq_file + * @fence: the fence to describe * @seq: the seq_file to put the textual description into * * Dump a textual description of the fence and it's state into the seq_file. diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 84ae708fafe7..3cbe87d4a464 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -7,17 +7,15 @@ */ #include <linux/cdev.h> -#include <linux/debugfs.h> #include <linux/device.h> #include <linux/dma-buf.h> +#include <linux/dma-heap.h> #include <linux/err.h> -#include <linux/xarray.h> #include <linux/list.h> -#include <linux/slab.h> #include <linux/nospec.h> -#include <linux/uaccess.h> #include <linux/syscalls.h> -#include <linux/dma-heap.h> +#include <linux/uaccess.h> +#include <linux/xarray.h> #include <uapi/linux/dma-heap.h> #define DEVNAME "dma_heap" @@ -28,9 +26,10 @@ * struct dma_heap - represents a dmabuf heap in the system * @name: used for debugging/device-node name * @ops: ops struct for this heap - * @heap_devt heap device node - * @list list head connecting to list of heaps - * @heap_cdev heap char device + * @priv: private data for this heap + * @heap_devt: heap device node + * @list: list head connecting to list of heaps + * @heap_cdev: heap char device * * Represents a heap of memory from which buffers can be made. */ @@ -50,8 +49,8 @@ static struct class *dma_heap_class; static DEFINE_XARRAY_ALLOC(dma_heap_minors); static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, - unsigned int fd_flags, - unsigned int heap_flags) + u32 fd_flags, + u64 heap_flags) { struct dma_buf *dmabuf; int fd; @@ -193,11 +192,11 @@ static const struct file_operations dma_heap_fops = { }; /** - * dma_heap_get_drvdata() - get per-subdriver data for the heap + * dma_heap_get_drvdata - get per-heap driver data * @heap: DMA-Heap to retrieve private data for * * Returns: - * The per-subdriver data for the heap. + * The per-heap data for the heap. */ void *dma_heap_get_drvdata(struct dma_heap *heap) { @@ -205,8 +204,8 @@ void *dma_heap_get_drvdata(struct dma_heap *heap) } /** - * dma_heap_get_name() - get heap name - * @heap: DMA-Heap to retrieve private data for + * dma_heap_get_name - get heap name + * @heap: DMA-Heap to retrieve the name of * * Returns: * The char* for the heap name. @@ -216,6 +215,10 @@ const char *dma_heap_get_name(struct dma_heap *heap) return heap->name; } +/** + * dma_heap_add - adds a heap to dmabuf heaps + * @exp_info: information needed to register this heap + */ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) { struct dma_heap *heap, *h, *err_ret; diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index eb8b733065b2..5f8d010516f0 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -186,6 +186,13 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) dma_resv_assert_held(obj); + /* Driver and component code should never call this function with + * num_fences=0. If they do it usually points to bugs when calculating + * the number of needed fences dynamically. + */ + if (WARN_ON(!num_fences)) + return -EINVAL; + old = dma_resv_fences_list(obj); if (old && old->max_fences) { if ((old->num_fences + num_fences) <= old->max_fences) @@ -405,7 +412,7 @@ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) * * Beware that the iterator can be restarted. Code which accumulates statistics * or similar needs to check for this with dma_resv_iter_is_restarted(). For - * this reason prefer the locked dma_resv_iter_first() whenver possible. + * this reason prefer the locked dma_resv_iter_first() whenever possible. * * Returns the first fence from an unlocked dma_resv obj. */ @@ -428,7 +435,7 @@ EXPORT_SYMBOL(dma_resv_iter_first_unlocked); * * Beware that the iterator can be restarted. Code which accumulates statistics * or similar needs to check for this with dma_resv_iter_is_restarted(). For - * this reason prefer the locked dma_resv_iter_next() whenver possible. + * this reason prefer the locked dma_resv_iter_next() whenever possible. * * Returns the next fence from an unlocked dma_resv obj. */ diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 4a63567e93ba..9512d050563a 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -165,7 +165,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct cma_heap_buffer *buffer = vma->vm_private_data; - if (vmf->pgoff > buffer->pagecount) + if (vmf->pgoff >= buffer->pagecount) return VM_FAULT_SIGBUS; return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff])); @@ -274,8 +274,8 @@ static const struct dma_buf_ops cma_heap_buf_ops = { static struct dma_buf *cma_heap_allocate(struct dma_heap *heap, unsigned long len, - unsigned long fd_flags, - unsigned long heap_flags) + u32 fd_flags, + u64 heap_flags) { struct cma_heap *cma_heap = dma_heap_get_drvdata(heap); struct cma_heap_buffer *buffer; @@ -309,13 +309,13 @@ static struct dma_buf *cma_heap_allocate(struct dma_heap *heap, struct page *page = cma_pages; while (nr_clear_pages > 0) { - void *vaddr = kmap_atomic(page); + void *vaddr = kmap_local_page(page); memset(vaddr, 0, PAGE_SIZE); - kunmap_atomic(vaddr); + kunmap_local(vaddr); /* * Avoid wasting time zeroing memory if the process - * has been killed by by SIGKILL + * has been killed by SIGKILL. */ if (fatal_signal_pending(current)) goto free_cma; @@ -366,7 +366,7 @@ static const struct dma_heap_ops cma_heap_ops = { .allocate = cma_heap_allocate, }; -static int __add_cma_heap(struct cma *cma, void *data) +static int __init __add_cma_heap(struct cma *cma, void *data) { struct cma_heap *cma_heap; struct dma_heap_export_info exp_info; @@ -391,7 +391,7 @@ static int __add_cma_heap(struct cma *cma, void *data) return 0; } -static int add_default_cma_heap(void) +static int __init add_default_cma_heap(void) { struct cma *default_cma = dev_get_cma_area(NULL); int ret = 0; diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 9076d47ed2ef..26d5dc89ea16 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -333,8 +333,8 @@ static struct page *alloc_largest_available(unsigned long size, static struct dma_buf *system_heap_allocate(struct dma_heap *heap, unsigned long len, - unsigned long fd_flags, - unsigned long heap_flags) + u32 fd_flags, + u64 heap_flags) { struct system_heap_buffer *buffer; DEFINE_DMA_BUF_EXPORT_INFO(exp_info); @@ -421,7 +421,7 @@ static const struct dma_heap_ops system_heap_ops = { .allocate = system_heap_allocate, }; -static int system_heap_create(void) +static int __init system_heap_create(void) { struct dma_heap_export_info exp_info; diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 9c2a0c082a76..ed4b323886e4 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -84,11 +84,11 @@ static int sanitycheck(void *arg) return -ENOMEM; chain = mock_chain(NULL, f, 1); - if (!chain) + if (chain) + dma_fence_enable_sw_signaling(chain); + else err = -ENOMEM; - dma_fence_enable_sw_signaling(chain); - dma_fence_signal(f); dma_fence_put(f); diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index b7c6f7ea9e0c..cf2ce3744ce6 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -402,7 +402,7 @@ static int test_wait_timeout(void *arg) if (dma_fence_wait_timeout(wt.f, false, 2) == -ETIME) { if (timer_pending(&wt.timer)) { - pr_notice("Timer did not fire within the jiffie!\n"); + pr_notice("Timer did not fire within the jiffy!\n"); err = 0; /* not our fault! */ } else { pr_err("Wait reported incomplete after timeout\n"); @@ -540,6 +540,12 @@ static int race_signal_callback(void *arg) t[i].before = pass; t[i].task = kthread_run(thread_signal_callback, &t[i], "dma-fence:%d", i); + if (IS_ERR(t[i].task)) { + ret = PTR_ERR(t[i].task); + while (--i >= 0) + kthread_stop_put(t[i].task); + return ret; + } get_task_struct(t[i].task); } diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index c353029789cf..f5905d67dedb 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -173,11 +173,6 @@ static bool timeline_fence_signaled(struct dma_fence *fence) return !__dma_fence_is_later(fence->seqno, parent->value, fence->ops); } -static bool timeline_fence_enable_signaling(struct dma_fence *fence) -{ - return true; -} - static void timeline_fence_value_str(struct dma_fence *fence, char *str, int size) { @@ -211,7 +206,6 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin static const struct dma_fence_ops timeline_fence_ops = { .get_driver_name = timeline_fence_get_driver_name, .get_timeline_name = timeline_fence_get_timeline_name, - .enable_signaling = timeline_fence_enable_signaling, .signaled = timeline_fence_signaled, .release = timeline_fence_release, .fence_value_str = timeline_fence_value_str, diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 101394f16930..237bce21d1e7 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -110,12 +110,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) seq_printf(s, "%s: %d\n", obj->name, obj->value); - spin_lock_irq(&obj->lock); + spin_lock(&obj->lock); /* Caller already disabled IRQ. */ list_for_each(pos, &obj->pt_list) { struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irq(&obj->lock); + spin_unlock(&obj->lock); } static void sync_print_sync_file(struct seq_file *s, diff --git a/drivers/dma-buf/sync_trace.h b/drivers/dma-buf/sync_trace.h index 06e468a218ff..d71dcf954b8d 100644 --- a/drivers/dma-buf/sync_trace.h +++ b/drivers/dma-buf/sync_trace.h @@ -20,7 +20,7 @@ TRACE_EVENT(sync_timeline, ), TP_fast_assign( - __assign_str(name, timeline->name); + __assign_str(name); __entry->value = timeline->value; ), diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index c40645999648..cc7398cc17d6 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -10,6 +10,7 @@ #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/shmem_fs.h> +#include <linux/hugetlb.h> #include <linux/slab.h> #include <linux/udmabuf.h> #include <linux/vmalloc.h> @@ -25,9 +26,22 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is struct udmabuf { pgoff_t pagecount; - struct page **pages; + struct folio **folios; + + /** + * Unlike folios, pinned_folios is only used for unpin. + * So, nr_pinned is not the same to pagecount, the pinned_folios + * only set each folio which already pinned when udmabuf_create. + * Note that, since a folio may be pinned multiple times, each folio + * can be added to pinned_folios multiple times, depending on how many + * times the folio has been pinned when create. + */ + pgoff_t nr_pinned; + struct folio **pinned_folios; + struct sg_table *sg; struct miscdevice *device; + pgoff_t *offsets; }; static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) @@ -35,12 +49,44 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; pgoff_t pgoff = vmf->pgoff; + unsigned long addr, pfn; + vm_fault_t ret; if (pgoff >= ubuf->pagecount) return VM_FAULT_SIGBUS; - vmf->page = ubuf->pages[pgoff]; - get_page(vmf->page); - return 0; + + pfn = folio_pfn(ubuf->folios[pgoff]); + pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT; + + ret = vmf_insert_pfn(vma, vmf->address, pfn); + if (ret & VM_FAULT_ERROR) + return ret; + + /* pre fault */ + pgoff = vma->vm_pgoff; + addr = vma->vm_start; + + for (; addr < vma->vm_end; pgoff++, addr += PAGE_SIZE) { + if (addr == vmf->address) + continue; + + if (WARN_ON(pgoff >= ubuf->pagecount)) + break; + + pfn = folio_pfn(ubuf->folios[pgoff]); + pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT; + + /** + * If the below vmf_insert_pfn() fails, we do not return an + * error here during this pre-fault step. However, an error + * will be returned if the failure occurs when the addr is + * truly accessed. + */ + if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) + break; + } + + return ret; } static const struct vm_operations_struct udmabuf_vm_ops = { @@ -56,17 +102,36 @@ static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma) vma->vm_ops = &udmabuf_vm_ops; vma->vm_private_data = ubuf; + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); return 0; } static int vmap_udmabuf(struct dma_buf *buf, struct iosys_map *map) { struct udmabuf *ubuf = buf->priv; + unsigned long *pfns; void *vaddr; + pgoff_t pg; dma_resv_assert_held(buf->resv); - vaddr = vm_map_ram(ubuf->pages, ubuf->pagecount, -1); + /** + * HVO may free tail pages, so just use pfn to map each folio + * into vmalloc area. + */ + pfns = kvmalloc_array(ubuf->pagecount, sizeof(*pfns), GFP_KERNEL); + if (!pfns) + return -ENOMEM; + + for (pg = 0; pg < ubuf->pagecount; pg++) { + unsigned long pfn = folio_pfn(ubuf->folios[pg]); + + pfn += ubuf->offsets[pg] >> PAGE_SHIFT; + pfns[pg] = pfn; + } + + vaddr = vmap_pfn(pfns, ubuf->pagecount, PAGE_KERNEL); + kvfree(pfns); if (!vaddr) return -EINVAL; @@ -88,23 +153,30 @@ static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf, { struct udmabuf *ubuf = buf->priv; struct sg_table *sg; + struct scatterlist *sgl; + unsigned int i = 0; int ret; sg = kzalloc(sizeof(*sg), GFP_KERNEL); if (!sg) return ERR_PTR(-ENOMEM); - ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount, - 0, ubuf->pagecount << PAGE_SHIFT, - GFP_KERNEL); + + ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL); if (ret < 0) - goto err; + goto err_alloc; + + for_each_sg(sg->sgl, sgl, ubuf->pagecount, i) + sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE, + ubuf->offsets[i]); + ret = dma_map_sgtable(dev, sg, direction, 0); if (ret < 0) - goto err; + goto err_map; return sg; -err: +err_map: sg_free_table(sg); +err_alloc: kfree(sg); return ERR_PTR(ret); } @@ -130,18 +202,51 @@ static void unmap_udmabuf(struct dma_buf_attachment *at, return put_sg_table(at->dev, sg, direction); } +static void unpin_all_folios(struct udmabuf *ubuf) +{ + pgoff_t i; + + for (i = 0; i < ubuf->nr_pinned; ++i) + unpin_folio(ubuf->pinned_folios[i]); + + kvfree(ubuf->pinned_folios); +} + +static __always_inline int init_udmabuf(struct udmabuf *ubuf, pgoff_t pgcnt) +{ + ubuf->folios = kvmalloc_array(pgcnt, sizeof(*ubuf->folios), GFP_KERNEL); + if (!ubuf->folios) + return -ENOMEM; + + ubuf->offsets = kvcalloc(pgcnt, sizeof(*ubuf->offsets), GFP_KERNEL); + if (!ubuf->offsets) + return -ENOMEM; + + ubuf->pinned_folios = kvmalloc_array(pgcnt, + sizeof(*ubuf->pinned_folios), + GFP_KERNEL); + if (!ubuf->pinned_folios) + return -ENOMEM; + + return 0; +} + +static __always_inline void deinit_udmabuf(struct udmabuf *ubuf) +{ + unpin_all_folios(ubuf); + kvfree(ubuf->offsets); + kvfree(ubuf->folios); +} + static void release_udmabuf(struct dma_buf *buf) { struct udmabuf *ubuf = buf->priv; struct device *dev = ubuf->device->this_device; - pgoff_t pg; if (ubuf->sg) put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL); - for (pg = 0; pg < ubuf->pagecount; pg++) - put_page(ubuf->pages[pg]); - kfree(ubuf->pages); + deinit_udmabuf(ubuf); kfree(ubuf); } @@ -192,20 +297,96 @@ static const struct dma_buf_ops udmabuf_ops = { }; #define SEALS_WANTED (F_SEAL_SHRINK) -#define SEALS_DENIED (F_SEAL_WRITE) +#define SEALS_DENIED (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE) + +static int check_memfd_seals(struct file *memfd) +{ + int seals; + + if (!shmem_file(memfd) && !is_file_hugepages(memfd)) + return -EBADFD; + + seals = memfd_fcntl(memfd, F_GET_SEALS, 0); + if (seals == -EINVAL) + return -EBADFD; + + if ((seals & SEALS_WANTED) != SEALS_WANTED || + (seals & SEALS_DENIED) != 0) + return -EINVAL; + + return 0; +} + +static struct dma_buf *export_udmabuf(struct udmabuf *ubuf, + struct miscdevice *device) +{ + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + ubuf->device = device; + exp_info.ops = &udmabuf_ops; + exp_info.size = ubuf->pagecount << PAGE_SHIFT; + exp_info.priv = ubuf; + exp_info.flags = O_RDWR; + + return dma_buf_export(&exp_info); +} + +static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd, + loff_t start, loff_t size, struct folio **folios) +{ + pgoff_t nr_pinned = ubuf->nr_pinned; + pgoff_t upgcnt = ubuf->pagecount; + u32 cur_folio, cur_pgcnt; + pgoff_t pgoff, pgcnt; + long nr_folios; + loff_t end; + + pgcnt = size >> PAGE_SHIFT; + end = start + (pgcnt << PAGE_SHIFT) - 1; + nr_folios = memfd_pin_folios(memfd, start, end, folios, pgcnt, &pgoff); + if (nr_folios <= 0) + return nr_folios ? nr_folios : -EINVAL; + + cur_pgcnt = 0; + for (cur_folio = 0; cur_folio < nr_folios; ++cur_folio) { + pgoff_t subpgoff = pgoff; + size_t fsize = folio_size(folios[cur_folio]); + + ubuf->pinned_folios[nr_pinned++] = folios[cur_folio]; + + for (; subpgoff < fsize; subpgoff += PAGE_SIZE) { + ubuf->folios[upgcnt] = folios[cur_folio]; + ubuf->offsets[upgcnt] = subpgoff; + ++upgcnt; + + if (++cur_pgcnt >= pgcnt) + goto end; + } + + /** + * In a given range, only the first subpage of the first folio + * has an offset, that is returned by memfd_pin_folios(). + * The first subpages of other folios (in the range) have an + * offset of 0. + */ + pgoff = 0; + } +end: + ubuf->pagecount = upgcnt; + ubuf->nr_pinned = nr_pinned; + return 0; +} static long udmabuf_create(struct miscdevice *device, struct udmabuf_create_list *head, struct udmabuf_create_item *list) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct file *memfd = NULL; - struct address_space *mapping = NULL; + unsigned long max_nr_folios = 0; + struct folio **folios = NULL; + pgoff_t pgcnt = 0, pglimit; struct udmabuf *ubuf; - struct dma_buf *buf; - pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit; - struct page *page; - int seals, ret = -EINVAL; + struct dma_buf *dmabuf; + long ret = -EINVAL; u32 i, flags; ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL); @@ -214,79 +395,84 @@ static long udmabuf_create(struct miscdevice *device, pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; for (i = 0; i < head->count; i++) { - if (!IS_ALIGNED(list[i].offset, PAGE_SIZE)) - goto err; - if (!IS_ALIGNED(list[i].size, PAGE_SIZE)) - goto err; - ubuf->pagecount += list[i].size >> PAGE_SHIFT; - if (ubuf->pagecount > pglimit) - goto err; + pgoff_t subpgcnt; + + if (!PAGE_ALIGNED(list[i].offset)) + goto err_noinit; + if (!PAGE_ALIGNED(list[i].size)) + goto err_noinit; + + subpgcnt = list[i].size >> PAGE_SHIFT; + pgcnt += subpgcnt; + if (pgcnt > pglimit) + goto err_noinit; + + max_nr_folios = max_t(unsigned long, subpgcnt, max_nr_folios); } - if (!ubuf->pagecount) + if (!pgcnt) + goto err_noinit; + + ret = init_udmabuf(ubuf, pgcnt); + if (ret) goto err; - ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->pages), - GFP_KERNEL); - if (!ubuf->pages) { + folios = kvmalloc_array(max_nr_folios, sizeof(*folios), GFP_KERNEL); + if (!folios) { ret = -ENOMEM; goto err; } - pgbuf = 0; for (i = 0; i < head->count; i++) { - ret = -EBADFD; - memfd = fget(list[i].memfd); - if (!memfd) - goto err; - mapping = memfd->f_mapping; - if (!shmem_mapping(mapping)) - goto err; - seals = memfd_fcntl(memfd, F_GET_SEALS, 0); - if (seals == -EINVAL) - goto err; - ret = -EINVAL; - if ((seals & SEALS_WANTED) != SEALS_WANTED || - (seals & SEALS_DENIED) != 0) + struct file *memfd = fget(list[i].memfd); + + if (!memfd) { + ret = -EBADFD; goto err; - pgoff = list[i].offset >> PAGE_SHIFT; - pgcnt = list[i].size >> PAGE_SHIFT; - for (pgidx = 0; pgidx < pgcnt; pgidx++) { - page = shmem_read_mapping_page(mapping, pgoff + pgidx); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto err; - } - ubuf->pages[pgbuf++] = page; } + + /* + * Take the inode lock to protect against concurrent + * memfd_add_seals(), which takes this lock in write mode. + */ + inode_lock_shared(file_inode(memfd)); + ret = check_memfd_seals(memfd); + if (ret) + goto out_unlock; + + ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset, + list[i].size, folios); +out_unlock: + inode_unlock_shared(file_inode(memfd)); fput(memfd); - memfd = NULL; + if (ret) + goto err; } - exp_info.ops = &udmabuf_ops; - exp_info.size = ubuf->pagecount << PAGE_SHIFT; - exp_info.priv = ubuf; - exp_info.flags = O_RDWR; - - ubuf->device = device; - buf = dma_buf_export(&exp_info); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); + flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0; + dmabuf = export_udmabuf(ubuf, device); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); goto err; } + /* + * Ownership of ubuf is held by the dmabuf from here. + * If the following dma_buf_fd() fails, dma_buf_put() cleans up both the + * dmabuf and the ubuf (through udmabuf_ops.release). + */ - flags = 0; - if (head->flags & UDMABUF_FLAGS_CLOEXEC) - flags |= O_CLOEXEC; - return dma_buf_fd(buf, flags); + ret = dma_buf_fd(dmabuf, flags); + if (ret < 0) + dma_buf_put(dmabuf); + + kvfree(folios); + return ret; err: - while (pgbuf > 0) - put_page(ubuf->pages[--pgbuf]); - if (memfd) - fput(memfd); - kfree(ubuf->pages); + deinit_udmabuf(ubuf); +err_noinit: kfree(ubuf); + kvfree(folios); return ret; } |