diff options
Diffstat (limited to 'fs/timerfd.c')
| -rw-r--r-- | fs/timerfd.c | 306 |
1 files changed, 205 insertions, 101 deletions
diff --git a/fs/timerfd.c b/fs/timerfd.c index 929312180dd0..9fcea7860ddf 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/timerfd.c * @@ -25,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/rcupdate.h> +#include <linux/time_namespace.h> struct timerfd_ctx { union { @@ -35,10 +37,12 @@ struct timerfd_ctx { ktime_t moffs; wait_queue_head_t wqh; u64 ticks; - int expired; int clockid; + short unsigned expired; + short unsigned settime_flags; /* to show in fdinfo */ struct rcu_head rcu; struct list_head clist; + spinlock_t cancel_lock; bool might_cancel; }; @@ -54,7 +58,7 @@ static inline bool isalarm(struct timerfd_ctx *ctx) /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, - * tintv.tv64 != 0) until the timer is accessed. + * tintv != 0) until the timer is accessed. */ static void timerfd_triggered(struct timerfd_ctx *ctx) { @@ -63,7 +67,7 @@ static void timerfd_triggered(struct timerfd_ctx *ctx) spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; ctx->ticks++; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); spin_unlock_irqrestore(&ctx->wqh.lock, flags); } @@ -75,13 +79,11 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) return HRTIMER_NORESTART; } -static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, - ktime_t now) +static void timerfd_alarmproc(struct alarm *alarm, ktime_t now) { struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, t.alarm); timerfd_triggered(ctx); - return ALARMTIMER_NORESTART; } /* @@ -92,7 +94,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, */ void timerfd_clock_was_set(void) { - ktime_t moffs = ktime_get_monotonic_offset(); + ktime_t moffs = ktime_mono_to_real(0); struct timerfd_ctx *ctx; unsigned long flags; @@ -101,17 +103,33 @@ void timerfd_clock_was_set(void) if (!ctx->might_cancel) continue; spin_lock_irqsave(&ctx->wqh.lock, flags); - if (ctx->moffs.tv64 != moffs.tv64) { - ctx->moffs.tv64 = KTIME_MAX; + if (ctx->moffs != moffs) { + ctx->moffs = KTIME_MAX; ctx->ticks++; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } rcu_read_unlock(); } -static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +static void timerfd_resume_work(struct work_struct *work) +{ + timerfd_clock_was_set(); +} + +static DECLARE_WORK(timerfd_work, timerfd_resume_work); + +/* + * Invoked from timekeeping_resume(). Defer the actual update to work so + * timerfd_clock_was_set() runs in task context. + */ +void timerfd_resume(void) +{ + schedule_work(&timerfd_work); +} + +static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { ctx->might_cancel = false; @@ -121,16 +139,24 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx) } } +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +{ + spin_lock(&ctx->cancel_lock); + __timerfd_remove_cancel(ctx); + spin_unlock(&ctx->cancel_lock); +} + static bool timerfd_canceled(struct timerfd_ctx *ctx) { - if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) + if (!ctx->might_cancel || ctx->moffs != KTIME_MAX) return false; - ctx->moffs = ktime_get_monotonic_offset(); + ctx->moffs = ktime_mono_to_real(0); return true; } static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { + spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { @@ -140,9 +166,10 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } - } else if (ctx->might_cancel) { - timerfd_remove_cancel(ctx); + } else { + __timerfd_remove_cancel(ctx); } + spin_unlock(&ctx->cancel_lock); } static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) @@ -152,13 +179,13 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else - remaining = hrtimer_expires_remaining(&ctx->t.tmr); + remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); - return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; + return remaining < 0 ? 0: remaining; } static int timerfd_setup(struct timerfd_ctx *ctx, int flags, - const struct itimerspec *ktmr) + const struct itimerspec64 *ktmr) { enum hrtimer_mode htmode; ktime_t texp; @@ -167,10 +194,10 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; - texp = timespec_to_ktime(ktmr->it_value); + texp = timespec64_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; - ctx->tintv = timespec_to_ktime(ktmr->it_interval); + ctx->tintv = timespec64_to_ktime(ktmr->it_interval); if (isalarm(ctx)) { alarm_init(&ctx->t.alarm, @@ -178,12 +205,13 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); } else { - hrtimer_init(&ctx->t.tmr, clockid, htmode); + hrtimer_setup(&ctx->t.tmr, timerfd_tmrproc, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); - ctx->t.tmr.function = timerfd_tmrproc; } - if (texp.tv64 != 0) { + if (texp != 0) { + if (flags & TFD_TIMER_ABSTIME) + texp = timens_ktime_to_host(clockid, texp); if (isalarm(ctx)) { if (flags & TFD_TIMER_ABSTIME) alarm_start(&ctx->t.alarm, texp); @@ -196,6 +224,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, if (timerfd_canceled(ctx)) return -ECANCELED; } + + ctx->settime_flags = flags & TFD_SETTIME_FLAGS; return 0; } @@ -213,33 +243,34 @@ static int timerfd_release(struct inode *inode, struct file *file) return 0; } -static unsigned int timerfd_poll(struct file *file, poll_table *wait) +static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; - unsigned int events = 0; + __poll_t events = 0; unsigned long flags; poll_wait(file, &ctx->wqh, wait); spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->ticks) - events |= POLLIN; + events |= EPOLLIN; spin_unlock_irqrestore(&ctx->wqh.lock, flags); return events; } -static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct timerfd_ctx *ctx = file->private_data; ssize_t res; u64 ticks = 0; - if (count < sizeof(ticks)) + if (iov_iter_count(to) < sizeof(ticks)) return -EINVAL; + spin_lock_irq(&ctx->wqh.lock); - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); @@ -258,9 +289,9 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, if (ctx->ticks) { ticks = ctx->ticks; - if (ctx->expired && ctx->tintv.tv64) { + if (ctx->expired && ctx->tintv) { /* - * If tintv.tv64 != 0, this is a periodic timer that + * If tintv != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. @@ -279,35 +310,91 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); - if (ticks) - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); + if (ticks) { + res = copy_to_iter(&ticks, sizeof(ticks), to); + if (!res) + res = -EFAULT; + } return res; } +#ifdef CONFIG_PROC_FS +static void timerfd_show(struct seq_file *m, struct file *file) +{ + struct timerfd_ctx *ctx = file->private_data; + struct timespec64 value, interval; + + spin_lock_irq(&ctx->wqh.lock); + value = ktime_to_timespec64(timerfd_get_remaining(ctx)); + interval = ktime_to_timespec64(ctx->tintv); + spin_unlock_irq(&ctx->wqh.lock); + + seq_printf(m, + "clockid: %d\n" + "ticks: %llu\n" + "settime flags: 0%o\n" + "it_value: (%llu, %llu)\n" + "it_interval: (%llu, %llu)\n", + ctx->clockid, + (unsigned long long)ctx->ticks, + ctx->settime_flags, + (unsigned long long)value.tv_sec, + (unsigned long long)value.tv_nsec, + (unsigned long long)interval.tv_sec, + (unsigned long long)interval.tv_nsec); +} +#else +#define timerfd_show NULL +#endif + +#ifdef CONFIG_CHECKPOINT_RESTORE +static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct timerfd_ctx *ctx = file->private_data; + int ret = 0; + + switch (cmd) { + case TFD_IOC_SET_TICKS: { + u64 ticks; + + if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) + return -EFAULT; + if (!ticks) + return -EINVAL; + + spin_lock_irq(&ctx->wqh.lock); + if (!timerfd_canceled(ctx)) { + ctx->ticks = ticks; + wake_up_locked_poll(&ctx->wqh, EPOLLIN); + } else + ret = -ECANCELED; + spin_unlock_irq(&ctx->wqh.lock); + break; + } + default: + ret = -ENOTTY; + break; + } + + return ret; +} +#else +#define timerfd_ioctl NULL +#endif + static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, - .read = timerfd_read, + .read_iter = timerfd_read_iter, .llseek = noop_llseek, + .show_fdinfo = timerfd_show, + .unlocked_ioctl = timerfd_ioctl, }; -static int timerfd_fget(int fd, struct fd *p) -{ - struct fd f = fdget(fd); - if (!f.file) - return -EBADF; - if (f.file->f_op != &timerfd_fops) { - fdput(f); - return -EINVAL; - } - *p = f; - return 0; -} - SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { - int ufd; - struct timerfd_ctx *ctx; + struct timerfd_ctx *ctx __free(kfree) = NULL; + int ret; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -317,14 +404,21 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && + clockid != CLOCK_BOOTTIME && clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; + if ((clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM) && + !capable(CAP_WAKE_ALARM)) + return -EPERM; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; init_waitqueue_head(&ctx->wqh); + spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; if (isalarm(ctx)) @@ -333,35 +427,41 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); else - hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); + hrtimer_setup(&ctx->t.tmr, timerfd_tmrproc, clockid, HRTIMER_MODE_ABS); - ctx->moffs = ktime_get_monotonic_offset(); + ctx->moffs = ktime_mono_to_real(0); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, - O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); - if (ufd < 0) - kfree(ctx); - - return ufd; + ret = FD_ADD(flags & TFD_SHARED_FCNTL_FLAGS, + anon_inode_getfile_fmode("[timerfd]", &timerfd_fops, ctx, + O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS), + FMODE_NOWAIT)); + if (ret >= 0) + retain_and_null_ptr(ctx); + return ret; } static int do_timerfd_settime(int ufd, int flags, - const struct itimerspec *new, - struct itimerspec *old) + const struct itimerspec64 *new, + struct itimerspec64 *old) { - struct fd f; struct timerfd_ctx *ctx; int ret; if ((flags & ~TFD_SETTIME_FLAGS) || - !timespec_valid(&new->it_value) || - !timespec_valid(&new->it_interval)) + !itimerspec64_valid(new)) return -EINVAL; - ret = timerfd_fget(ufd, &f); - if (ret) - return ret; - ctx = f.file->private_data; + CLASS(fd, f)(ufd); + if (fd_empty(f)) + return -EBADF; + + if (fd_file(f)->f_op != &timerfd_fops) + return -EINVAL; + + ctx = fd_file(f)->private_data; + + if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) + return -EPERM; timerfd_setup_cancel(ctx, flags); @@ -380,7 +480,11 @@ static int do_timerfd_settime(int ufd, int flags, break; } spin_unlock_irq(&ctx->wqh.lock); - cpu_relax(); + + if (isalarm(ctx)) + hrtimer_cancel_wait_running(&ctx->t.alarm.timer); + else + hrtimer_cancel_wait_running(&ctx->t.tmr); } /* @@ -389,15 +493,15 @@ static int do_timerfd_settime(int ufd, int flags, * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ - if (ctx->expired && ctx->tintv.tv64) { + if (ctx->expired && ctx->tintv) { if (isalarm(ctx)) alarm_forward_now(&ctx->t.alarm, ctx->tintv); else hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); } - old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); - old->it_interval = ktime_to_timespec(ctx->tintv); + old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); + old->it_interval = ktime_to_timespec64(ctx->tintv); /* * Re-program the timer to the new value ... @@ -405,21 +509,22 @@ static int do_timerfd_settime(int ufd, int flags, ret = timerfd_setup(ctx, flags, new); spin_unlock_irq(&ctx->wqh.lock); - fdput(f); return ret; } -static int do_timerfd_gettime(int ufd, struct itimerspec *t) +static int do_timerfd_gettime(int ufd, struct itimerspec64 *t) { - struct fd f; struct timerfd_ctx *ctx; - int ret = timerfd_fget(ufd, &f); - if (ret) - return ret; - ctx = f.file->private_data; + CLASS(fd, f)(ufd); + + if (fd_empty(f)) + return -EBADF; + if (fd_file(f)->f_op != &timerfd_fops) + return -EINVAL; + ctx = fd_file(f)->private_data; spin_lock_irq(&ctx->wqh.lock); - if (ctx->expired && ctx->tintv.tv64) { + if (ctx->expired && ctx->tintv) { ctx->expired = 0; if (isalarm(ctx)) { @@ -434,65 +539,64 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t) hrtimer_restart(&ctx->t.tmr); } } - t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); - t->it_interval = ktime_to_timespec(ctx->tintv); + t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); + t->it_interval = ktime_to_timespec64(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); - fdput(f); return 0; } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, - const struct itimerspec __user *, utmr, - struct itimerspec __user *, otmr) + const struct __kernel_itimerspec __user *, utmr, + struct __kernel_itimerspec __user *, otmr) { - struct itimerspec new, old; + struct itimerspec64 new, old; int ret; - if (copy_from_user(&new, utmr, sizeof(new))) + if (get_itimerspec64(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; - if (otmr && copy_to_user(otmr, &old, sizeof(old))) + if (otmr && put_itimerspec64(&old, otmr)) return -EFAULT; return ret; } -SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) +SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr) { - struct itimerspec kotmr; + struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; - return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; + return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; } -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, - const struct compat_itimerspec __user *, utmr, - struct compat_itimerspec __user *, otmr) +#ifdef CONFIG_COMPAT_32BIT_TIME +SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags, + const struct old_itimerspec32 __user *, utmr, + struct old_itimerspec32 __user *, otmr) { - struct itimerspec new, old; + struct itimerspec64 new, old; int ret; - if (get_compat_itimerspec(&new, utmr)) + if (get_old_itimerspec32(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; - if (otmr && put_compat_itimerspec(otmr, &old)) + if (otmr && put_old_itimerspec32(&old, otmr)) return -EFAULT; return ret; } -COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, - struct compat_itimerspec __user *, otmr) +SYSCALL_DEFINE2(timerfd_gettime32, int, ufd, + struct old_itimerspec32 __user *, otmr) { - struct itimerspec kotmr; + struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; - return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0; + return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0; } #endif |
