diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 96 |
1 files changed, 54 insertions, 42 deletions
@@ -68,9 +68,9 @@ struct aio_ring { #define AIO_RING_PAGES 8 struct kioctx_table { - struct rcu_head rcu; - unsigned nr; - struct kioctx *table[]; + struct rcu_head rcu; + unsigned nr; + struct kioctx __rcu *table[]; }; struct kioctx_cpu { @@ -115,7 +115,7 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct work_struct free_work; + struct rcu_work free_rwork; /* see free_ioctx() */ /* * signals when all in-flight requests are done @@ -329,7 +329,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) for (i = 0; i < table->nr; i++) { struct kioctx *ctx; - ctx = table->table[i]; + ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { if (!atomic_read(&ctx->dead)) { ctx->user_id = ctx->mmap_base = vma->vm_start; @@ -576,7 +576,7 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) * actually has a cancel function, hence the cmpxchg() */ - cancel = ACCESS_ONCE(kiocb->ki_cancel); + cancel = READ_ONCE(kiocb->ki_cancel); do { if (!cancel || cancel == KIOCB_CANCELLED) return -EINVAL; @@ -588,10 +588,15 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) return cancel(&kiocb->common); } +/* + * free_ioctx() should be RCU delayed to synchronize against the RCU + * protected lookup_ioctx() and also needs process context to call + * aio_free_ring(). Use rcu_work. + */ static void free_ioctx(struct work_struct *work) { - struct kioctx *ctx = container_of(work, struct kioctx, free_work); - + struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx, + free_rwork); pr_debug("freeing %p\n", ctx); aio_free_ring(ctx); @@ -609,8 +614,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) complete(&ctx->rq_wait->comp); - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + /* Synchronize against RCU protected table->table[] dereferences */ + INIT_RCU_WORK(&ctx->free_rwork, free_ioctx); + queue_rcu_work(system_wq, &ctx->free_rwork); } /* @@ -651,9 +657,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) while (1) { if (table) for (i = 0; i < table->nr; i++) - if (!table->table[i]) { + if (!rcu_access_pointer(table->table[i])) { ctx->id = i; - table->table[i] = ctx; + rcu_assign_pointer(table->table[i], ctx); spin_unlock(&mm->ioctx_lock); /* While kioctx setup is in progress, @@ -834,11 +840,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, } table = rcu_dereference_raw(mm->ioctx_table); - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; + WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); + RCU_INIT_POINTER(table->table[ctx->id], NULL); spin_unlock(&mm->ioctx_lock); - /* percpu_ref_kill() will do the necessary call_rcu() */ + /* free_ioctx_reqs() will do the necessary RCU synchronization */ wake_up_all(&ctx->wait); /* @@ -880,7 +886,8 @@ void exit_aio(struct mm_struct *mm) skipped = 0; for (i = 0; i < table->nr; ++i) { - struct kioctx *ctx = table->table[i]; + struct kioctx *ctx = + rcu_dereference_protected(table->table[i], true); if (!ctx) { skipped++; @@ -1069,7 +1076,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out; - ctx = table->table[id]; + ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { percpu_ref_get(&ctx->users); ret = ctx; @@ -1297,20 +1304,10 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, static long read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, - struct timespec __user *timeout) + ktime_t until) { - ktime_t until = KTIME_MAX; long ret = 0; - if (timeout) { - struct timespec ts; - - if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) - return -EFAULT; - - until = timespec_to_ktime(ts); - } - /* * Note that aio_read_events() is being called as the conditional - i.e. * we're calling it after prepare_to_wait() has set task state to @@ -1826,6 +1823,25 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, return ret; } +static long do_io_getevents(aio_context_t ctx_id, + long min_nr, + long nr, + struct io_event __user *events, + struct timespec64 *ts) +{ + ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX; + struct kioctx *ioctx = lookup_ioctx(ctx_id); + long ret = -EINVAL; + + if (likely(ioctx)) { + if (likely(min_nr <= nr && min_nr >= 0)) + ret = read_events(ioctx, min_nr, nr, events, until); + percpu_ref_put(&ioctx->users); + } + + return ret; +} + /* io_getevents: * Attempts to read at least min_nr events and up to nr events from * the completion queue for the aio_context specified by ctx_id. If @@ -1844,15 +1860,14 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, struct io_event __user *, events, struct timespec __user *, timeout) { - struct kioctx *ioctx = lookup_ioctx(ctx_id); - long ret = -EINVAL; + struct timespec64 ts; - if (likely(ioctx)) { - if (likely(min_nr <= nr && min_nr >= 0)) - ret = read_events(ioctx, min_nr, nr, events, timeout); - percpu_ref_put(&ioctx->users); + if (timeout) { + if (unlikely(get_timespec64(&ts, timeout))) + return -EFAULT; } - return ret; + + return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); } #ifdef CONFIG_COMPAT @@ -1862,17 +1877,14 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, struct io_event __user *, events, struct compat_timespec __user *, timeout) { - struct timespec t; - struct timespec __user *ut = NULL; + struct timespec64 t; if (timeout) { - if (compat_get_timespec(&t, timeout)) + if (compat_get_timespec64(&t, timeout)) return -EFAULT; - ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t))) - return -EFAULT; } - return sys_io_getevents(ctx_id, min_nr, nr, events, ut); + + return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); } #endif |