From 9d9e522010eb5685d8b53e8a24320653d9d4cbbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 22:16:34 +0200 Subject: posix-timers: Prevent RT livelock in itimer_delete() itimer_delete() has a retry loop when the timer is concurrently expired. On non-RT kernels this just spin-waits until the timer callback has completed, except for posix CPU timers which have HAVE_POSIX_CPU_TIMERS_TASK_WORK enabled. In that case and on RT kernels the existing task could live lock when preempting the task which does the timer delivery. Replace spin_unlock() with an invocation of timer_wait_running() to handle it the same way as the other retry loops in the posix timer code. Fixes: ec8f954a40da ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT") Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/87v8g7c50d.ffs@tglx --- kernel/time/posix-timers.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 808a247205a9..ed3c4a954398 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1037,27 +1037,52 @@ retry_delete: } /* - * return timer owned by the process, used by exit_itimers + * Delete a timer if it is armed, remove it from the hash and schedule it + * for RCU freeing. */ static void itimer_delete(struct k_itimer *timer) { -retry_delete: - spin_lock_irq(&timer->it_lock); + unsigned long flags; + + /* + * irqsave is required to make timer_wait_running() work. + */ + spin_lock_irqsave(&timer->it_lock, flags); +retry_delete: + /* + * Even if the timer is not longer accessible from other tasks + * it still might be armed and queued in the underlying timer + * mechanism. Worse, that timer mechanism might run the expiry + * function concurrently. + */ if (timer_delete_hook(timer) == TIMER_RETRY) { - spin_unlock_irq(&timer->it_lock); + /* + * Timer is expired concurrently, prevent livelocks + * and pointless spinning on RT. + * + * timer_wait_running() drops timer::it_lock, which opens + * the possibility for another task to delete the timer. + * + * That's not possible here because this is invoked from + * do_exit() only for the last thread of the thread group. + * So no other task can access and delete that timer. + */ + if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) + return; + goto retry_delete; } list_del(&timer->list); - spin_unlock_irq(&timer->it_lock); + spin_unlock_irqrestore(&timer->it_lock, flags); release_posix_timer(timer, IT_ID_SET); } /* - * This is called by do_exit or de_thread, only when nobody else can - * modify the signal->posix_timers list. Yet we need sighand->siglock - * to prevent the race with /proc/pid/timers. + * Invoked from do_exit() when the last thread of a thread group exits. + * At that point no other task can access the timers of the dying + * task anymore. */ void exit_itimers(struct task_struct *tsk) { @@ -1067,10 +1092,12 @@ void exit_itimers(struct task_struct *tsk) if (list_empty(&tsk->signal->posix_timers)) return; + /* Protect against concurrent read via /proc/$PID/timers */ spin_lock_irq(&tsk->sighand->siglock); list_replace_init(&tsk->signal->posix_timers, &timers); spin_unlock_irq(&tsk->sighand->siglock); + /* The timers are not longer accessible via tsk::signal */ while (!list_empty(&timers)) { tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); -- cgit From 8ce8849dd1e78dadcee0ec9acbd259d239b7069f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 20:58:47 +0200 Subject: posix-timers: Ensure timer ID search-loop limit is valid posix_timer_add() tries to allocate a posix timer ID by starting from the cached ID which was stored by the last successful allocation. This is done in a loop searching the ID space for a free slot one by one. The loop has to terminate when the search wrapped around to the starting point. But that's racy vs. establishing the starting point. That is read out lockless, which leads to the following problem: CPU0 CPU1 posix_timer_add() start = sig->posix_timer_id; lock(hash_lock); ... posix_timer_add() if (++sig->posix_timer_id < 0) start = sig->posix_timer_id; sig->posix_timer_id = 0; So CPU1 can observe a negative start value, i.e. -1, and the loop break never happens because the condition can never be true: if (sig->posix_timer_id == start) break; While this is unlikely to ever turn into an endless loop as the ID space is huge (INT_MAX), the racy read of the start value caught the attention of KCSAN and Dmitry unearthed that incorrectness. Rewrite it so that all id operations are under the hash lock. Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx --- kernel/time/posix-timers.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ed3c4a954398..2d6cf93ca370 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -140,25 +140,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id) static int posix_timer_add(struct k_itimer *timer) { struct signal_struct *sig = current->signal; - int first_free_id = sig->posix_timer_id; struct hlist_head *head; - int ret = -ENOENT; + unsigned int cnt, id; - do { + /* + * FIXME: Replace this by a per signal struct xarray once there is + * a plan to handle the resulting CRIU regression gracefully. + */ + for (cnt = 0; cnt <= INT_MAX; cnt++) { spin_lock(&hash_lock); - head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; - if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { + id = sig->next_posix_timer_id; + + /* Write the next ID back. Clamp it to the positive space */ + sig->next_posix_timer_id = (id + 1) & INT_MAX; + + head = &posix_timers_hashtable[hash(sig, id)]; + if (!__posix_timers_find(head, sig, id)) { hlist_add_head_rcu(&timer->t_hash, head); - ret = sig->posix_timer_id; + spin_unlock(&hash_lock); + return id; } - if (++sig->posix_timer_id < 0) - sig->posix_timer_id = 0; - if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) - /* Loop over all possible ids completed */ - ret = -EAGAIN; spin_unlock(&hash_lock); - } while (ret == -ENOENT); - return ret; + } + /* POSIX return code when no timer ID could be allocated */ + return -EAGAIN; } static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) -- cgit From 7d9909026645064cd31b20cee5939a0f72282261 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:00 +0200 Subject: posix-timers: Clarify timer_wait_running() comment Explain it better and add the CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y aspect for completeness. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183312.985681995@linutronix.de --- kernel/time/posix-timers.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2d6cf93ca370..0c61f29ef5d5 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -836,10 +836,18 @@ static void common_timer_wait_running(struct k_itimer *timer) } /* - * On PREEMPT_RT this prevent priority inversion against softirq kthread in - * case it gets preempted while executing a timer callback. See comments in - * hrtimer_cancel_wait_running. For PREEMPT_RT=n this just results in a - * cpu_relax(). + * On PREEMPT_RT this prevents priority inversion and a potential livelock + * against the ksoftirqd thread in case that ksoftirqd gets preempted while + * executing a hrtimer callback. + * + * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this + * just results in a cpu_relax(). + * + * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is + * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this + * prevents spinning on an eventually scheduled out task and a livelock + * when the task which tries to delete or disarm the timer has preempted + * the task which runs the expiry in task work context. */ static struct k_itimer *timer_wait_running(struct k_itimer *timer, unsigned long *flags) -- cgit From 8d44b958a1a088195524c884f0437660e0522380 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:01 +0200 Subject: posix-timers: Cleanup comments about timer ID tracking Describe the hash table properly and remove the IDR leftover comments. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.038444551@linutronix.de --- kernel/time/posix-timers.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 0c61f29ef5d5..79909a2ac3e4 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -35,20 +35,17 @@ #include "timekeeping.h" #include "posix-timers.h" -/* - * Management arrays for POSIX timers. Timers are now kept in static hash table - * with 512 entries. - * Timer ids are allocated by local routine, which selects proper hash head by - * key, constructed from current->signal address and per signal struct counter. - * This keeps timer ids unique per process, but now they can intersect between - * processes. - */ +static struct kmem_cache *posix_timers_cache; /* - * Lets keep our timers in a slab cache :-) + * Timers are managed in a hash table for lockless lookup. The hash key is + * constructed from current::signal and the timer ID and the timer is + * matched against current::signal and the timer ID when walking the hash + * bucket list. + * + * This allows checkpoint/restore to reconstruct the exact timer IDs for + * a process. */ -static struct kmem_cache *posix_timers_cache; - static DEFINE_HASHTABLE(posix_timers_hashtable, 9); static DEFINE_SPINLOCK(hash_lock); @@ -65,15 +62,6 @@ static const struct k_clock clock_realtime, clock_monotonic; #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif -/* - * The timer ID is turned into a timer address by idr_find(). - * Verifying a valid ID consists of: - * - * a) checking that idr_find() returns other than -1. - * b) checking that the timer id matches the one in the timer itself. - * c) that the timer owner is in the callers thread group. - */ - /* * CLOCKs: The POSIX standard calls for a couple of clocks and allows us * to implement others. This structure defines the various -- cgit From ae88967d71f1b4ffb6e48043993d37a106da8109 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:03 +0200 Subject: posix-timers: Add comments about timer lookup Document how the timer ID validation in the hash table works. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.091081515@linutronix.de --- kernel/time/posix-timers.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 79909a2ac3e4..d7890ac703ae 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -506,6 +506,12 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, return -EAGAIN; spin_lock_init(&new_timer->it_lock); + + /* + * Add the timer to the hash table. The timer is not yet valid + * because new_timer::it_signal is still NULL. The timer id is also + * not yet visible to user space. + */ new_timer_id = posix_timer_add(new_timer); if (new_timer_id < 0) { error = new_timer_id; @@ -551,6 +557,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, goto out; spin_lock_irq(¤t->sighand->siglock); + /* This makes the timer valid in the hash table */ new_timer->it_signal = current->signal; list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); @@ -597,13 +604,6 @@ COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, } #endif -/* - * Locking issues: We need to protect the result of the id look up until - * we get the timer locked down so it is not deleted under us. The - * removal is done under the idr spinlock so we use that here to bridge - * the find to the timer lock. To avoid a dead lock, the timer id MUST - * be release with out holding the timer lock. - */ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; @@ -615,10 +615,35 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) if ((unsigned long long)timer_id > INT_MAX) return NULL; + /* + * The hash lookup and the timers are RCU protected. + * + * Timers are added to the hash in invalid state where + * timr::it_signal == NULL. timer::it_signal is only set after the + * rest of the initialization succeeded. + * + * Timer destruction happens in steps: + * 1) Set timr::it_signal to NULL with timr::it_lock held + * 2) Release timr::it_lock + * 3) Remove from the hash under hash_lock + * 4) Call RCU for removal after the grace period + * + * Holding rcu_read_lock() accross the lookup ensures that + * the timer cannot be freed. + * + * The lookup validates locklessly that timr::it_signal == + * current::it_signal and timr::it_id == @timer_id. timr::it_id + * can't change, but timr::it_signal becomes NULL during + * destruction. + */ rcu_read_lock(); timr = posix_timer_by_id(timer_id); if (timr) { spin_lock_irqsave(&timr->it_lock, *flags); + /* + * Validate under timr::it_lock that timr::it_signal is + * still valid. Pairs with #1 above. + */ if (timr->it_signal == current->signal) { rcu_read_unlock(); return timr; -- cgit From 028cf5eaa12846c4e32104132ff70ca1cd6f5943 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:05 +0200 Subject: posix-timers: Annotate concurrent access to k_itimer:: It_signal k_itimer::it_signal is read lockless in the RCU protected hash lookup, but it can be written concurrently in the timer_create() and timer_delete() path. Annotate these places with READ_ONCE() and WRITE_ONCE() Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.143596887@linutronix.de --- kernel/time/posix-timers.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d7890ac703ae..de3fca8dae55 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -109,9 +109,9 @@ static struct k_itimer *__posix_timers_find(struct hlist_head *head, { struct k_itimer *timer; - hlist_for_each_entry_rcu(timer, head, t_hash, - lockdep_is_held(&hash_lock)) { - if ((timer->it_signal == sig) && (timer->it_id == id)) + hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) { + /* timer->it_signal can be set concurrently */ + if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) return timer; } return NULL; @@ -558,7 +558,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, spin_lock_irq(¤t->sighand->siglock); /* This makes the timer valid in the hash table */ - new_timer->it_signal = current->signal; + WRITE_ONCE(new_timer->it_signal, current->signal); list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); @@ -1052,10 +1052,10 @@ retry_delete: list_del(&timer->list); spin_unlock(¤t->sighand->siglock); /* - * This keeps any tasks waiting on the spin lock from thinking - * they got something (see the lock code above). + * A concurrent lookup could check timer::it_signal lockless. It + * will reevaluate with timer::it_lock held and observe the NULL. */ - timer->it_signal = NULL; + WRITE_ONCE(timer->it_signal, NULL); unlock_timer(timer, flags); release_posix_timer(timer, IT_ID_SET); -- cgit From 72786ff23d5acb7bf3e2535831b2f1dc55c7f44e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:06 +0200 Subject: posix-timers: Set k_itimer:: It_signal to NULL on exit() Technically it's not required to set k_itimer::it_signal to NULL on exit() because there is no other thread anymore which could lookup the timer concurrently. Set it to NULL for consistency sake and add a comment to that effect. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.196462644@linutronix.de --- kernel/time/posix-timers.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index de3fca8dae55..c1b77c597f5f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1101,6 +1101,14 @@ retry_delete: } list_del(&timer->list); + /* + * Setting timer::it_signal to NULL is technically not required + * here as nothing can access the timer anymore legitimately via + * the hash table. Set it to NULL nevertheless so that all deletion + * paths are consistent. + */ + WRITE_ONCE(timer->it_signal, NULL); + spin_unlock_irqrestore(&timer->it_lock, flags); release_posix_timer(timer, IT_ID_SET); } -- cgit From 11fbe6cd41210c7b5173257158a22e11e225622d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:08 +0200 Subject: posix-timers: Remove pointless irqsafe from hash_lock All usage of hash_lock is in thread context. No point in using spin_lock_irqsave()/irqrestore() for a single usage site. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.249063953@linutronix.de --- kernel/time/posix-timers.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index c1b77c597f5f..ed7d260f51ef 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -471,10 +471,9 @@ static void k_itimer_rcu_free(struct rcu_head *head) static void release_posix_timer(struct k_itimer *tmr, int it_id_set) { if (it_id_set) { - unsigned long flags; - spin_lock_irqsave(&hash_lock, flags); + spin_lock(&hash_lock, flags); hlist_del_rcu(&tmr->t_hash); - spin_unlock_irqrestore(&hash_lock, flags); + spin_unlock(&hash_lock, flags); } put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); -- cgit From 8cc96ca2c75f6da59de41321797c87562703c9e1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:09 +0200 Subject: posix-timers: Split release_posix_timers() release_posix_timers() is called for cleaning up both hashed and unhashed timers. The cases are differentiated by an argument and the usage is hideous. Seperate the actual free path out and use it for unhashed timers. Provide a function for hashed timers. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.301432503@linutronix.de --- kernel/time/posix-timers.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ed7d260f51ef..6ac693331df9 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -466,20 +466,21 @@ static void k_itimer_rcu_free(struct rcu_head *head) kmem_cache_free(posix_timers_cache, tmr); } -#define IT_ID_SET 1 -#define IT_ID_NOT_SET 0 -static void release_posix_timer(struct k_itimer *tmr, int it_id_set) -{ - if (it_id_set) { - spin_lock(&hash_lock, flags); - hlist_del_rcu(&tmr->t_hash); - spin_unlock(&hash_lock, flags); - } +static void posix_timer_free(struct k_itimer *tmr) +{ put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); call_rcu(&tmr->rcu, k_itimer_rcu_free); } +static void posix_timer_unhash_and_free(struct k_itimer *tmr) +{ + spin_lock(&hash_lock); + hlist_del_rcu(&tmr->t_hash); + spin_unlock(&hash_lock); + posix_timer_free(tmr); +} + static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); @@ -493,7 +494,6 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, const struct k_clock *kc = clockid_to_kclock(which_clock); struct k_itimer *new_timer; int error, new_timer_id; - int it_id_set = IT_ID_NOT_SET; if (!kc) return -EINVAL; @@ -513,11 +513,10 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, */ new_timer_id = posix_timer_add(new_timer); if (new_timer_id < 0) { - error = new_timer_id; - goto out; + posix_timer_free(new_timer); + return new_timer_id; } - it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; new_timer->kclock = kc; @@ -569,7 +568,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, * new_timer after the unlock call. */ out: - release_posix_timer(new_timer, it_id_set); + posix_timer_unhash_and_free(new_timer); return error; } @@ -1057,7 +1056,7 @@ retry_delete: WRITE_ONCE(timer->it_signal, NULL); unlock_timer(timer, flags); - release_posix_timer(timer, IT_ID_SET); + posix_timer_unhash_and_free(timer); return 0; } @@ -1109,7 +1108,7 @@ retry_delete: WRITE_ONCE(timer->it_signal, NULL); spin_unlock_irqrestore(&timer->it_lock, flags); - release_posix_timer(timer, IT_ID_SET); + posix_timer_unhash_and_free(timer); } /* -- cgit From 01679b5db7172b898be325ff272e10aebd412911 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:11 +0200 Subject: posix-timers: Document sys_clock_getres() correctly The decades old comment about Posix clock resolution is confusing at best. Remove it and add a proper explanation to sys_clock_getres(). Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.356427330@linutronix.de --- kernel/time/posix-timers.c | 81 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 6ac693331df9..1acdd04e5d26 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -67,14 +67,6 @@ static const struct k_clock clock_realtime, clock_monotonic; * to implement others. This structure defines the various * clocks. * - * RESOLUTION: Clock resolution is used to round up timer and interval - * times, NOT to report clock times, which are reported with as - * much resolution as the system can muster. In some cases this - * resolution may depend on the underlying clock hardware and - * may not be quantifiable until run time, and only then is the - * necessary code is written. The standard says we should say - * something about this issue in the documentation... - * * FUNCTIONS: The CLOCKs structure defines possible functions to * handle various clock functions. * @@ -1198,6 +1190,79 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, return err; } +/** + * sys_clock_getres - Get the resolution of a clock + * @which_clock: The clock to get the resolution for + * @tp: Pointer to a a user space timespec64 for storage + * + * POSIX defines: + * + * "The clock_getres() function shall return the resolution of any + * clock. Clock resolutions are implementation-defined and cannot be set by + * a process. If the argument res is not NULL, the resolution of the + * specified clock shall be stored in the location pointed to by res. If + * res is NULL, the clock resolution is not returned. If the time argument + * of clock_settime() is not a multiple of res, then the value is truncated + * to a multiple of res." + * + * Due to the various hardware constraints the real resolution can vary + * wildly and even change during runtime when the underlying devices are + * replaced. The kernel also can use hardware devices with different + * resolutions for reading the time and for arming timers. + * + * The kernel therefore deviates from the POSIX spec in various aspects: + * + * 1) The resolution returned to user space + * + * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, + * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW + * the kernel differentiates only two cases: + * + * I) Low resolution mode: + * + * When high resolution timers are disabled at compile or runtime + * the resolution returned is nanoseconds per tick, which represents + * the precision at which timers expire. + * + * II) High resolution mode: + * + * When high resolution timers are enabled the resolution returned + * is always one nanosecond independent of the actual resolution of + * the underlying hardware devices. + * + * For CLOCK_*_ALARM the actual resolution depends on system + * state. When system is running the resolution is the same as the + * resolution of the other clocks. During suspend the actual + * resolution is the resolution of the underlying RTC device which + * might be way less precise than the clockevent device used during + * running state. + * + * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution + * returned is always nanoseconds per tick. + * + * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution + * returned is always one nanosecond under the assumption that the + * underlying scheduler clock has a better resolution than nanoseconds + * per tick. + * + * For dynamic POSIX clocks (PTP devices) the resolution returned is + * always one nanosecond. + * + * 2) Affect on sys_clock_settime() + * + * The kernel does not truncate the time which is handed in to + * sys_clock_settime(). The kernel internal timekeeping is always using + * nanoseconds precision independent of the clocksource device which is + * used to read the time from. The resolution of that device only + * affects the presicion of the time returned by sys_clock_gettime(). + * + * Returns: + * 0 Success. @tp contains the resolution + * -EINVAL @which_clock is not a valid clock ID + * -EFAULT Copying the resolution to @tp faulted + * -ENODEV Dynamic POSIX clock is not backed by a device + * -EOPNOTSUPP Dynamic POSIX clock does not support getres() + */ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct __kernel_timespec __user *, tp) { -- cgit From a86e9284336729a8f79a65371eacd0c1c7fae142 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:12 +0200 Subject: posix-timers: Document common_clock_get() correctly Replace another confusing and inaccurate set of comments. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.409169321@linutronix.de --- kernel/time/posix-timers.c | 50 +++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 20 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1acdd04e5d26..e1af74ca80ab 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -660,20 +660,16 @@ static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) } /* - * Get the time remaining on a POSIX.1b interval timer. This function - * is ALWAYS called with spin_lock_irq on the timer, thus it must not - * mess with irq. + * Get the time remaining on a POSIX.1b interval timer. * - * We have a couple of messes to clean up here. First there is the case - * of a timer that has a requeue pending. These timers should appear to - * be in the timer list with an expiry as if we were to requeue them - * now. + * Two issues to handle here: * - * The second issue is the SIGEV_NONE timer which may be active but is - * not really ever put in the timer list (to save system resources). - * This timer may be expired, and if so, we will do it here. Otherwise - * it is the same as a requeue pending timer WRT to what we should - * report. + * 1) The timer has a requeue pending. The return value must appear as + * if the timer has been requeued right now. + * + * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued + * into the hrtimer queue and therefore never expired. Emulate expiry + * here taking #1 into account. */ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { @@ -689,8 +685,12 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) cur_setting->it_interval = ktime_to_timespec64(iv); } else if (!timr->it_active) { /* - * SIGEV_NONE oneshot timers are never queued. Check them - * below. + * SIGEV_NONE oneshot timers are never queued and therefore + * timr->it_active is always false. The check below + * vs. remaining time will handle this case. + * + * For all other timers there is nothing to update here, so + * return. */ if (!sig_none) return; @@ -699,18 +699,29 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) now = kc->clock_get_ktime(timr->it_clock); /* - * When a requeue is pending or this is a SIGEV_NONE timer move the - * expiry time forward by intervals, so expiry is > now. + * If this is an interval timer and either has requeue pending or + * is a SIGEV_NONE timer move the expiry time forward by intervals, + * so expiry is > now. */ if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) timr->it_overrun += kc->timer_forward(timr, now); remaining = kc->timer_remaining(timr, now); - /* Return 0 only, when the timer is expired and not pending */ + /* + * As @now is retrieved before a possible timer_forward() and + * cannot be reevaluated by the compiler @remaining is based on the + * same @now value. Therefore @remaining is consistent vs. @now. + * + * Consequently all interval timers, i.e. @iv > 0, cannot have a + * remaining time <= 0 because timer_forward() guarantees to move + * them forward so that the next timer expiry is > @now. + */ if (remaining <= 0) { /* - * A single shot SIGEV_NONE timer must return 0, when - * it is expired ! + * A single shot SIGEV_NONE timer must return 0, when it is + * expired! Timers which have a real signal delivery mode + * must return a remaining time greater than 0 because the + * signal has not yet been delivered. */ if (!sig_none) cur_setting->it_value.tv_nsec = 1; @@ -719,7 +730,6 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) } } -/* Get the time remaining on a POSIX.1b interval timer. */ static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { struct k_itimer *timr; -- cgit From 65cade468dee537885b51897ba41ba05a4dcf6ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:14 +0200 Subject: posix-timers: Document sys_clock_getoverrun() Document the syscall in detail and with coherent sentences. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.462051641@linutronix.de --- kernel/time/posix-timers.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index e1af74ca80ab..191ecf59cb98 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -783,14 +783,23 @@ SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, #endif -/* - * Get the number of overruns of a POSIX.1b interval timer. This is to - * be the overrun of the timer last delivered. At the same time we are - * accumulating overruns on the next timer. The overrun is frozen when - * the signal is delivered, either at the notify time (if the info block - * is not queued) or at the actual delivery time (as we are informed by - * the call back to posixtimer_rearm(). So all we need to do is - * to pick up the frozen overrun. +/** + * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer + * @timer_id: The timer ID which identifies the timer + * + * The "overrun count" of a timer is one plus the number of expiration + * intervals which have elapsed between the first expiry, which queues the + * signal and the actual signal delivery. On signal delivery the "overrun + * count" is calculated and cached, so it can be returned directly here. + * + * As this is relative to the last queued signal the returned overrun count + * is meaningless outside of the signal delivery path and even there it + * does not accurately reflect the current state when user space evaluates + * it. + * + * Returns: + * -EINVAL @timer_id is invalid + * 1..INT_MAX The number of overruns related to the last delivered signal */ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { -- cgit From 3561fcb402b7ab7fdb4c1746dae4995889506605 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:16 +0200 Subject: posix-timers: Document sys_clock_settime() permissions in place The documentation of sys_clock_settime() permissions is at a random place and mostly word salad. Remove it and add a concise comment into sys_clock_settime(). Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.514700292@linutronix.de --- kernel/time/posix-timers.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 191ecf59cb98..03ef6af1633b 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -74,13 +74,6 @@ static const struct k_clock clock_realtime, clock_monotonic; * following: 1.) The k_itimer struct (sched.h) is used for * the timer. 2.) The list, it_lock, it_clock, it_id and * it_pid fields are not modified by timer code. - * - * Permissions: It is assumed that the clock_settime() function defined - * for each clock will take care of permission checks. Some - * clocks may be set able by any user (i.e. local process - * clocks) others not. Currently the only set able clock we - * have is CLOCK_REALTIME and its high res counter part, both of - * which we beg off on and pass to do_sys_settimeofday(). */ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); @@ -1159,6 +1152,10 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, if (get_timespec64(&new_tp, tp)) return -EFAULT; + /* + * Permission checks have to be done inside the clock specific + * setter callback. + */ return kc->clock_set(which_clock, &new_tp); } -- cgit From 640fe745d7d4b6e47f3b455cb5de99a08c6b6d23 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:17 +0200 Subject: posix-timers: Document nanosleep() details The descriptions for common_nsleep() is wrong and common_nsleep_timens() lacks any form of comment. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.567072835@linutronix.de --- kernel/time/posix-timers.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 03ef6af1633b..54adb4cfd4f2 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1370,7 +1370,7 @@ SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, #endif /* - * nanosleep for monotonic and realtime clocks + * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI */ static int common_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) @@ -1382,8 +1382,13 @@ static int common_nsleep(const clockid_t which_clock, int flags, which_clock); } +/* + * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME + * + * Absolute nanosleeps for these clocks are time-namespace adjusted. + */ static int common_nsleep_timens(const clockid_t which_clock, int flags, - const struct timespec64 *rqtp) + const struct timespec64 *rqtp) { ktime_t texp = timespec64_to_ktime(*rqtp); -- cgit From 52f090b164b59c06a4da87c0599424809a6bba16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:19 +0200 Subject: posix-timers: Add proper comments in do_timer_create() The comment about timer lifetime at the end of the function is misplaced and uncomprehensible. Make it understandable and put it at the right place. Add a new comment about the visibility of the new timer ID to user space. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.619897296@linutronix.de --- kernel/time/posix-timers.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 54adb4cfd4f2..20d3b991ec49 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -529,12 +529,17 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, new_timer->sigq->info.si_tid = new_timer->it_id; new_timer->sigq->info.si_code = SI_TIMER; - if (copy_to_user(created_timer_id, - &new_timer_id, sizeof (new_timer_id))) { + if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) { error = -EFAULT; goto out; } - + /* + * After succesful copy out, the timer ID is visible to user space + * now but not yet valid because new_timer::signal is still NULL. + * + * Complete the initialization with the clock specific create + * callback. + */ error = kc->timer_create(new_timer); if (error) goto out; @@ -544,14 +549,11 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, WRITE_ONCE(new_timer->it_signal, current->signal); list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); - - return 0; /* - * In the case of the timer belonging to another task, after - * the task is unlocked, the timer is owned by the other task - * and may cease to exist at any time. Don't use or modify - * new_timer after the unlock call. + * After unlocking sighand::siglock @new_timer is subject to + * concurrent removal and cannot be touched anymore */ + return 0; out: posix_timer_unhash_and_free(new_timer); return error; -- cgit From c575689d66378611bb04137b96e67ebb3ac8482b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:20 +0200 Subject: posix-timers: Comment SIGEV_THREAD_ID properly Replace the word salad. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.672220780@linutronix.de --- kernel/time/posix-timers.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 20d3b991ec49..c8b0f52e5eb6 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -53,12 +53,9 @@ static const struct k_clock * const posix_clocks[]; static const struct k_clock *clockid_to_kclock(const clockid_t id); static const struct k_clock clock_realtime, clock_monotonic; -/* - * we assume that the new SIGEV_THREAD_ID shares no bits with the other - * SIGEV values. Here we put out an error if this assumption fails. - */ +/* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ - ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) + ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif -- cgit From 02972d7955341b711d4c392f14faa9f9cd69d551 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:22 +0200 Subject: posix-timers: Clarify posix_timer_rearm() comment Yet another incomprehensible piece of art. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.724863461@linutronix.de --- kernel/time/posix-timers.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index c8b0f52e5eb6..d8d2169408f4 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -275,15 +275,9 @@ static void common_hrtimer_rearm(struct k_itimer *timr) } /* - * This function is exported for use by the signal deliver code. It is - * called just prior to the info block being released and passes that - * block to us. It's function is to update the overrun entry AND to - * restart the timer. It should only be called if the timer is to be - * restarted (i.e. we have flagged this in the sys_private entry of the - * info block). - * - * To protect against the timer going away while the interrupt is queued, - * we require that the it_requeue_pending flag be set. + * This function is called from the signal delivery code if + * info->si_sys_private is not zero, which indicates that the timer has to + * be rearmed. Restart the timer and update info::si_overrun. */ void posixtimer_rearm(struct kernel_siginfo *info) { -- cgit From 84999b8bdb4969816c7bb7c14c3a55ed42aa4b94 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 21:07:37 +0200 Subject: posix-timers: Clarify posix_timer_fn() comments Make the issues vs. SIG_IGN understandable and remove the 15 years old promise that a proper solution is already on the horizon. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/874jnrdmrq.ffs@tglx --- kernel/time/posix-timers.c | 62 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d8d2169408f4..ae8799e97fcd 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -326,11 +326,11 @@ int posix_timer_event(struct k_itimer *timr, int si_private) } /* - * This function gets called when a POSIX.1b interval timer expires. It - * is used as a callback from the kernel internal timer. The - * run_timer_list code ALWAYS calls with interrupts on. - - * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. + * This function gets called when a POSIX.1b interval timer expires from + * the HRTIMER interrupt (soft interrupt on RT kernels). + * + * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI + * based timers. */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { @@ -348,9 +348,10 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) if (posix_timer_event(timr, si_private)) { /* - * signal was not sent because of sig_ignor - * we will not get a call back to restart it AND - * it should be restarted. + * The signal was not queued due to SIG_IGN. As a + * consequence the timer is not going to be rearmed from + * the signal delivery path. But as a real signal handler + * can be installed later the timer must be rearmed here. */ if (timr->it_interval != 0) { ktime_t now = hrtimer_cb_get_time(timer); @@ -359,34 +360,35 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * FIXME: What we really want, is to stop this * timer completely and restart it in case the * SIG_IGN is removed. This is a non trivial - * change which involves sighand locking - * (sigh !), which we don't want to do late in - * the release cycle. + * change to the signal handling code. + * + * For now let timers with an interval less than a + * jiffie expire every jiffie and recheck for a + * valid signal handler. + * + * This avoids interrupt starvation in case of a + * very small interval, which would expire the + * timer immediately again. * - * For now we just let timers with an interval - * less than a jiffie expire every jiffie to - * avoid softirq starvation in case of SIG_IGN - * and a very small interval, which would put - * the timer right back on the softirq pending - * list. By moving now ahead of time we trick - * hrtimer_forward() to expire the timer - * later, while we still maintain the overrun - * accuracy, but have some inconsistency in - * the timer_gettime() case. This is at least - * better than a starved softirq. A more - * complex fix which solves also another related - * inconsistency is already in the pipeline. + * Moving now ahead of time by one jiffie tricks + * hrtimer_forward() to expire the timer later, + * while it still maintains the overrun accuracy + * for the price of a slight inconsistency in the + * timer_gettime() case. This is at least better + * than a timer storm. + * + * Only required when high resolution timers are + * enabled as the periodic tick based timers are + * automatically aligned to the next tick. */ -#ifdef CONFIG_HIGH_RES_TIMERS - { - ktime_t kj = NSEC_PER_SEC / HZ; + if (IS_ENABLED(CONFIG_HIGHRES_TIMERS)) { + ktime_t kj = TICK_NSEC; if (timr->it_interval < kj) now = ktime_add(now, kj); } -#endif - timr->it_overrun += hrtimer_forward(timer, now, - timr->it_interval); + + timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; timr->it_active = 1; -- cgit From 200dbd6d14e6a3b22162d78b4f7a253426dba7ee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:25 +0200 Subject: posix-timers: Remove pointless comments Documenting the obvious is just consuming space for no value. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.832240451@linutronix.de --- kernel/time/posix-timers.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index ae8799e97fcd..d357728b7f00 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -59,19 +59,6 @@ static const struct k_clock clock_realtime, clock_monotonic; #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif -/* - * CLOCKs: The POSIX standard calls for a couple of clocks and allows us - * to implement others. This structure defines the various - * clocks. - * - * FUNCTIONS: The CLOCKs structure defines possible functions to - * handle various clock functions. - * - * The standard POSIX timer management code assumes the - * following: 1.) The k_itimer struct (sched.h) is used for - * the timer. 2.) The list, it_lock, it_clock, it_id and - * it_pid fields are not modified by timer code. - */ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); #define lock_timer(tid, flags) \ @@ -141,7 +128,6 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) spin_unlock_irqrestore(&timr->it_lock, flags); } -/* Get clock_realtime */ static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_real_ts64(tp); @@ -153,7 +139,6 @@ static ktime_t posix_get_realtime_ktime(clockid_t which_clock) return ktime_get_real(); } -/* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, const struct timespec64 *tp) { @@ -166,9 +151,6 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, return do_adjtimex(t); } -/* - * Get monotonic time for posix timers - */ static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_ts64(tp); @@ -181,9 +163,6 @@ static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) return ktime_get(); } -/* - * Get monotonic-raw time for posix timers - */ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { ktime_get_raw_ts64(tp); @@ -191,7 +170,6 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) return 0; } - static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_real_ts64(tp); @@ -242,9 +220,6 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) return 0; } -/* - * Initialize everything, well, just everything in Posix clocks/timers ;) - */ static __init int init_posix_timers(void) { posix_timers_cache = kmem_cache_create("posix_timers_cache", -- cgit From b96ce4931fcd13d73e32c62c2df3fa8f9f467e33 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Apr 2023 20:49:27 +0200 Subject: posix-timers: Polish coding style in a few places Make it consistent with the TIP tree documentation. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20230425183313.888493625@linutronix.de --- kernel/time/posix-timers.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d357728b7f00..e3cddd5f3c7f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -309,10 +309,10 @@ int posix_timer_event(struct k_itimer *timr, int si_private) */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { + enum hrtimer_restart ret = HRTIMER_NORESTART; struct k_itimer *timr; unsigned long flags; int si_private = 0; - enum hrtimer_restart ret = HRTIMER_NORESTART; timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); @@ -400,8 +400,8 @@ static struct pid *good_sigevent(sigevent_t * event) static struct k_itimer * alloc_posix_timer(void) { - struct k_itimer *tmr; - tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); + struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); + if (!tmr) return tmr; if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { @@ -695,8 +695,8 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { - struct k_itimer *timr; const struct k_clock *kc; + struct k_itimer *timr; unsigned long flags; int ret = 0; @@ -767,8 +767,8 @@ SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { struct k_itimer *timr; - int overrun; unsigned long flags; + int overrun; timr = lock_timer(timer_id, &flags); if (!timr) @@ -941,8 +941,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, const struct __kernel_itimerspec __user *, new_setting, struct __kernel_itimerspec __user *, old_setting) { - struct itimerspec64 new_spec, old_spec; - struct itimerspec64 *rtn = old_setting ? &old_spec : NULL; + struct itimerspec64 new_spec, old_spec, *rtn; int error = 0; if (!new_setting) @@ -951,6 +950,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, if (get_itimerspec64(&new_spec, new_setting)) return -EFAULT; + rtn = old_setting ? &old_spec : NULL; error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old_setting) { if (put_itimerspec64(&old_spec, old_setting)) -- cgit From b9a40f24d8ea86f89f1299a3f2dccd601febe3e5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 9 Jun 2023 11:46:43 +0200 Subject: posix-timers: Refer properly to CONFIG_HIGH_RES_TIMERS Commit c78f261e5dcb ("posix-timers: Clarify posix_timer_fn() comments") turns an ifdef CONFIG_HIGH_RES_TIMERS into an conditional on "IS_ENABLED(CONFIG_HIGHRES_TIMERS)"; note that the new conditional refers to "HIGHRES_TIMERS" not "HIGH_RES_TIMERS" as before. Fix this typo introduced in that refactoring. Fixes: c78f261e5dcb ("posix-timers: Clarify posix_timer_fn() comments") Signed-off-by: Lukas Bulwahn Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230609094643.26253-1-lukas.bulwahn@gmail.com --- kernel/time/posix-timers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index e3cddd5f3c7f..b924f0f096fa 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -356,7 +356,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * enabled as the periodic tick based timers are * automatically aligned to the next tick. */ - if (IS_ENABLED(CONFIG_HIGHRES_TIMERS)) { + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) { ktime_t kj = TICK_NSEC; if (timr->it_interval < kj) -- cgit From 986af8dc5af834071a8a7afdf2c3b7a57d562df2 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sat, 10 Jun 2023 02:28:56 +0800 Subject: alarmtimer: Remove unnecessary initialization of variable 'ret' ret is assigned before checked, so it does not need to initialize the variable Signed-off-by: Li zeming Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230609182856.4660-1-zeming@nfschina.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 82b28ab0f328..09c9cde06f20 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -847,7 +847,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, struct restart_block *restart = ¤t->restart_block; struct alarm alarm; ktime_t exp; - int ret = 0; + int ret; if (!alarmtimer_get_rtcdev()) return -EOPNOTSUPP; -- cgit From fc4b4d96f793c827a5abf23df3e793592dbd38ce Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sat, 10 Jun 2023 02:20:59 +0800 Subject: alarmtimer: Remove unnecessary (void *) cast Pointers of type void * do not require a type cast when they are assigned to a real pointer. Signed-off-by: Li zeming Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230609182059.4509-1-zeming@nfschina.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 09c9cde06f20..8d9f13d847f0 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -751,7 +751,7 @@ static int alarm_timer_create(struct k_itimer *new_timer) static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, ktime_t now) { - struct task_struct *task = (struct task_struct *)alarm->data; + struct task_struct *task = alarm->data; alarm->data = NULL; if (task) -- cgit From a7e282c77785c7eabf98836431b1f029481085ad Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 5 May 2023 00:12:53 +0800 Subject: tick/rcu: Fix bogus ratelimit condition The ratelimit logic in report_idle_softirq() is broken because the exit condition is always true: static int ratelimit; if (ratelimit < 10) return false; ---> always returns here ratelimit++; ---> no chance to run Make it check for >= 10 instead. Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") Signed-off-by: Wen Yang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/tencent_5AAA3EEAB42095C9B7740BE62FBF9A67E007@qq.com --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 52254679ec48..89055050d1ac 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1030,7 +1030,7 @@ static bool report_idle_softirq(void) return false; } - if (ratelimit < 10) + if (ratelimit >= 10) return false; /* On RT, softirqs handling may be waiting on some lock */ -- cgit From ccaa4926c2264ca2a2fcad4b3511fe435d7d4d15 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 21 Jun 2023 08:59:28 +0100 Subject: hrtimer: Add missing sparse annotations to hrtimer locking Sparse warns about lock imbalance vs. the hrtimer_base lock due to missing sparse annotations: kernel/time/hrtimer.c:175:33: warning: context imbalance in 'lock_hrtimer_base' - wrong count at exit kernel/time/hrtimer.c:1301:28: warning: context imbalance in 'hrtimer_start_range_ns' - unexpected unlock kernel/time/hrtimer.c:1336:28: warning: context imbalance in 'hrtimer_try_to_cancel' - unexpected unlock kernel/time/hrtimer.c:1457:9: warning: context imbalance in '__hrtimer_get_remaining' - unexpected unlock Add the annotations to the relevant functions. Signed-off-by: Ben Dooks Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230621075928.394481-1-ben.dooks@codethink.co.uk --- kernel/time/hrtimer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e8c08292defc..238262e4aba7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -164,6 +164,7 @@ static inline bool is_migration_base(struct hrtimer_clock_base *base) static struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) + __acquires(&timer->base->lock) { struct hrtimer_clock_base *base; @@ -280,6 +281,7 @@ static inline bool is_migration_base(struct hrtimer_clock_base *base) static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) + __acquires(&timer->base->cpu_base->lock) { struct hrtimer_clock_base *base = timer->base; @@ -1013,6 +1015,7 @@ void hrtimers_resume_local(void) */ static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) + __releases(&timer->base->cpu_base->lock) { raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); } -- cgit