summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-10-03 13:59:56 -0600
committerJens Axboe <axboe@kernel.dk>2019-10-29 10:22:44 -0600
commitc3a31e605620c279163c14068a60869ea3fda203 (patch)
tree3d1f39bf1ab88bc2377c2da7682504b55f686c83
parent08a451739a9b5783f67de51e84cb6d9559bb9dc4 (diff)
io_uring: add support for IORING_REGISTER_FILES_UPDATE
Allows the application to remove/replace/add files to/from a file set. Passes in a struct: struct io_uring_files_update { __u32 offset; __s32 *fds; }; that holds an array of fds, size of array passed in through the usual nr_args part of the io_uring_register() system call. The logic is as follows: 1) If ->fds[i] is -1, the existing file at i + ->offset is removed from the set. 2) If ->fds[i] is a valid fd, the existing file at i + ->offset is replaced with ->fds[i]. For case #2, is the existing file is currently empty (fd == -1), the new fd is simply added to the array. Reviewed-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--fs/io_uring.c175
-rw-r--r--include/uapi/linux/io_uring.h6
2 files changed, 181 insertions, 0 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b85e5feb774a..77774abb1074 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3224,6 +3224,178 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
+static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
+{
+#if defined(CONFIG_UNIX)
+ struct file *file = ctx->user_files[index];
+ struct sock *sock = ctx->ring_sock->sk;
+ struct sk_buff_head list, *head = &sock->sk_receive_queue;
+ struct sk_buff *skb;
+ int i;
+
+ __skb_queue_head_init(&list);
+
+ /*
+ * Find the skb that holds this file in its SCM_RIGHTS. When found,
+ * remove this entry and rearrange the file array.
+ */
+ skb = skb_dequeue(head);
+ while (skb) {
+ struct scm_fp_list *fp;
+
+ fp = UNIXCB(skb).fp;
+ for (i = 0; i < fp->count; i++) {
+ int left;
+
+ if (fp->fp[i] != file)
+ continue;
+
+ unix_notinflight(fp->user, fp->fp[i]);
+ left = fp->count - 1 - i;
+ if (left) {
+ memmove(&fp->fp[i], &fp->fp[i + 1],
+ left * sizeof(struct file *));
+ }
+ fp->count--;
+ if (!fp->count) {
+ kfree_skb(skb);
+ skb = NULL;
+ } else {
+ __skb_queue_tail(&list, skb);
+ }
+ fput(file);
+ file = NULL;
+ break;
+ }
+
+ if (!file)
+ break;
+
+ __skb_queue_tail(&list, skb);
+
+ skb = skb_dequeue(head);
+ }
+
+ if (skb_peek(&list)) {
+ spin_lock_irq(&head->lock);
+ while ((skb = __skb_dequeue(&list)) != NULL)
+ __skb_queue_tail(head, skb);
+ spin_unlock_irq(&head->lock);
+ }
+#else
+ fput(ctx->user_files[index]);
+#endif
+}
+
+static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+ int index)
+{
+#if defined(CONFIG_UNIX)
+ struct sock *sock = ctx->ring_sock->sk;
+ struct sk_buff_head *head = &sock->sk_receive_queue;
+ struct sk_buff *skb;
+
+ /*
+ * See if we can merge this file into an existing skb SCM_RIGHTS
+ * file set. If there's no room, fall back to allocating a new skb
+ * and filling it in.
+ */
+ spin_lock_irq(&head->lock);
+ skb = skb_peek(head);
+ if (skb) {
+ struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+ if (fpl->count < SCM_MAX_FD) {
+ __skb_unlink(skb, head);
+ spin_unlock_irq(&head->lock);
+ fpl->fp[fpl->count] = get_file(file);
+ unix_inflight(fpl->user, fpl->fp[fpl->count]);
+ fpl->count++;
+ spin_lock_irq(&head->lock);
+ __skb_queue_head(head, skb);
+ } else {
+ skb = NULL;
+ }
+ }
+ spin_unlock_irq(&head->lock);
+
+ if (skb) {
+ fput(file);
+ return 0;
+ }
+
+ return __io_sqe_files_scm(ctx, 1, index);
+#else
+ return 0;
+#endif
+}
+
+static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned nr_args)
+{
+ struct io_uring_files_update up;
+ __s32 __user *fds;
+ int fd, i, err;
+ __u32 done;
+
+ if (!ctx->user_files)
+ return -ENXIO;
+ if (!nr_args)
+ return -EINVAL;
+ if (copy_from_user(&up, arg, sizeof(up)))
+ return -EFAULT;
+ if (check_add_overflow(up.offset, nr_args, &done))
+ return -EOVERFLOW;
+ if (done > ctx->nr_user_files)
+ return -EINVAL;
+
+ done = 0;
+ fds = (__s32 __user *) up.fds;
+ while (nr_args) {
+ err = 0;
+ if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+ err = -EFAULT;
+ break;
+ }
+ i = array_index_nospec(up.offset, ctx->nr_user_files);
+ if (ctx->user_files[i]) {
+ io_sqe_file_unregister(ctx, i);
+ ctx->user_files[i] = NULL;
+ }
+ if (fd != -1) {
+ struct file *file;
+
+ file = fget(fd);
+ if (!file) {
+ err = -EBADF;
+ break;
+ }
+ /*
+ * Don't allow io_uring instances to be registered. If
+ * UNIX isn't enabled, then this causes a reference
+ * cycle and this instance can never get freed. If UNIX
+ * is enabled we'll handle it just fine, but there's
+ * still no point in allowing a ring fd as it doesn't
+ * support regular read/write anyway.
+ */
+ if (file->f_op == &io_uring_fops) {
+ fput(file);
+ err = -EBADF;
+ break;
+ }
+ ctx->user_files[i] = file;
+ err = io_sqe_file_register(ctx, file, i);
+ if (err)
+ break;
+ }
+ nr_args--;
+ done++;
+ up.offset++;
+ }
+
+ return done ? done : err;
+}
+
static int io_sq_offload_start(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
@@ -4031,6 +4203,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_sqe_files_unregister(ctx);
break;
+ case IORING_REGISTER_FILES_UPDATE:
+ ret = io_sqe_files_update(ctx, arg, nr_args);
+ break;
case IORING_REGISTER_EVENTFD:
ret = -EINVAL;
if (nr_args != 1)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ea57526a5b89..4f532d9c0554 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -150,5 +150,11 @@ struct io_uring_params {
#define IORING_UNREGISTER_FILES 3
#define IORING_REGISTER_EVENTFD 4
#define IORING_UNREGISTER_EVENTFD 5
+#define IORING_REGISTER_FILES_UPDATE 6
+
+struct io_uring_files_update {
+ __u32 offset;
+ __s32 *fds;
+};
#endif