summaryrefslogtreecommitdiff
path: root/fs/aio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c96
1 files changed, 54 insertions, 42 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 5a2487217072..88d7927ffbc6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -68,9 +68,9 @@ struct aio_ring {
#define AIO_RING_PAGES 8
struct kioctx_table {
- struct rcu_head rcu;
- unsigned nr;
- struct kioctx *table[];
+ struct rcu_head rcu;
+ unsigned nr;
+ struct kioctx __rcu *table[];
};
struct kioctx_cpu {
@@ -115,7 +115,7 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct work_struct free_work;
+ struct rcu_work free_rwork; /* see free_ioctx() */
/*
* signals when all in-flight requests are done
@@ -329,7 +329,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
- ctx = table->table[i];
+ ctx = rcu_dereference(table->table[i]);
if (ctx && ctx->aio_ring_file == file) {
if (!atomic_read(&ctx->dead)) {
ctx->user_id = ctx->mmap_base = vma->vm_start;
@@ -576,7 +576,7 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
* actually has a cancel function, hence the cmpxchg()
*/
- cancel = ACCESS_ONCE(kiocb->ki_cancel);
+ cancel = READ_ONCE(kiocb->ki_cancel);
do {
if (!cancel || cancel == KIOCB_CANCELLED)
return -EINVAL;
@@ -588,10 +588,15 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
return cancel(&kiocb->common);
}
+/*
+ * free_ioctx() should be RCU delayed to synchronize against the RCU
+ * protected lookup_ioctx() and also needs process context to call
+ * aio_free_ring(). Use rcu_work.
+ */
static void free_ioctx(struct work_struct *work)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-
+ struct kioctx *ctx = container_of(to_rcu_work(work), struct kioctx,
+ free_rwork);
pr_debug("freeing %p\n", ctx);
aio_free_ring(ctx);
@@ -609,8 +614,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
complete(&ctx->rq_wait->comp);
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ /* Synchronize against RCU protected table->table[] dereferences */
+ INIT_RCU_WORK(&ctx->free_rwork, free_ioctx);
+ queue_rcu_work(system_wq, &ctx->free_rwork);
}
/*
@@ -651,9 +657,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
while (1) {
if (table)
for (i = 0; i < table->nr; i++)
- if (!table->table[i]) {
+ if (!rcu_access_pointer(table->table[i])) {
ctx->id = i;
- table->table[i] = ctx;
+ rcu_assign_pointer(table->table[i], ctx);
spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress,
@@ -834,11 +840,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
}
table = rcu_dereference_raw(mm->ioctx_table);
- WARN_ON(ctx != table->table[ctx->id]);
- table->table[ctx->id] = NULL;
+ WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
+ RCU_INIT_POINTER(table->table[ctx->id], NULL);
spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */
+ /* free_ioctx_reqs() will do the necessary RCU synchronization */
wake_up_all(&ctx->wait);
/*
@@ -880,7 +886,8 @@ void exit_aio(struct mm_struct *mm)
skipped = 0;
for (i = 0; i < table->nr; ++i) {
- struct kioctx *ctx = table->table[i];
+ struct kioctx *ctx =
+ rcu_dereference_protected(table->table[i], true);
if (!ctx) {
skipped++;
@@ -1069,7 +1076,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr)
goto out;
- ctx = table->table[id];
+ ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) {
percpu_ref_get(&ctx->users);
ret = ctx;
@@ -1297,20 +1304,10 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
static long read_events(struct kioctx *ctx, long min_nr, long nr,
struct io_event __user *event,
- struct timespec __user *timeout)
+ ktime_t until)
{
- ktime_t until = KTIME_MAX;
long ret = 0;
- if (timeout) {
- struct timespec ts;
-
- if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
- return -EFAULT;
-
- until = timespec_to_ktime(ts);
- }
-
/*
* Note that aio_read_events() is being called as the conditional - i.e.
* we're calling it after prepare_to_wait() has set task state to
@@ -1826,6 +1823,25 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return ret;
}
+static long do_io_getevents(aio_context_t ctx_id,
+ long min_nr,
+ long nr,
+ struct io_event __user *events,
+ struct timespec64 *ts)
+{
+ ktime_t until = ts ? timespec64_to_ktime(*ts) : KTIME_MAX;
+ struct kioctx *ioctx = lookup_ioctx(ctx_id);
+ long ret = -EINVAL;
+
+ if (likely(ioctx)) {
+ if (likely(min_nr <= nr && min_nr >= 0))
+ ret = read_events(ioctx, min_nr, nr, events, until);
+ percpu_ref_put(&ioctx->users);
+ }
+
+ return ret;
+}
+
/* io_getevents:
* Attempts to read at least min_nr events and up to nr events from
* the completion queue for the aio_context specified by ctx_id. If
@@ -1844,15 +1860,14 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct io_event __user *, events,
struct timespec __user *, timeout)
{
- struct kioctx *ioctx = lookup_ioctx(ctx_id);
- long ret = -EINVAL;
+ struct timespec64 ts;
- if (likely(ioctx)) {
- if (likely(min_nr <= nr && min_nr >= 0))
- ret = read_events(ioctx, min_nr, nr, events, timeout);
- percpu_ref_put(&ioctx->users);
+ if (timeout) {
+ if (unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
}
- return ret;
+
+ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
}
#ifdef CONFIG_COMPAT
@@ -1862,17 +1877,14 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct io_event __user *, events,
struct compat_timespec __user *, timeout)
{
- struct timespec t;
- struct timespec __user *ut = NULL;
+ struct timespec64 t;
if (timeout) {
- if (compat_get_timespec(&t, timeout))
+ if (compat_get_timespec64(&t, timeout))
return -EFAULT;
- ut = compat_alloc_user_space(sizeof(*ut));
- if (copy_to_user(ut, &t, sizeof(t)))
- return -EFAULT;
}
- return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
+
+ return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
}
#endif