diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/time/ntp.c | 229 | ||||
| -rw-r--r-- | kernel/time/ntp_internal.h | 7 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 74 |
3 files changed, 193 insertions, 117 deletions
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 069ca78fb0bf..7404d3831527 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -494,65 +494,74 @@ out: return leap; } +#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_hw_clock(struct work_struct *work); -static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock); - -static void sched_sync_hw_clock(struct timespec64 now, - unsigned long target_nsec, bool fail) +static DECLARE_WORK(sync_work, sync_hw_clock); +static struct hrtimer sync_hrtimer; +#define SYNC_PERIOD_NS (11UL * 60 * NSEC_PER_SEC) +static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer) { - struct timespec64 next; - - ktime_get_real_ts64(&next); - if (!fail) - next.tv_sec = 659; - else { - /* - * Try again as soon as possible. Delaying long periods - * decreases the accuracy of the work queue timer. Due to this - * the algorithm is very likely to require a short-sleep retry - * after the above long sleep to synchronize ts_nsec. - */ - next.tv_sec = 0; - } - - /* Compute the needed delay that will get to tv_nsec == target_nsec */ - next.tv_nsec = target_nsec - next.tv_nsec; - if (next.tv_nsec <= 0) - next.tv_nsec += NSEC_PER_SEC; - if (next.tv_nsec >= NSEC_PER_SEC) { - next.tv_sec++; - next.tv_nsec -= NSEC_PER_SEC; - } + queue_work(system_power_efficient_wq, &sync_work); - queue_delayed_work(system_power_efficient_wq, &sync_work, - timespec64_to_jiffies(&next)); + return HRTIMER_NORESTART; } -static void sync_rtc_clock(void) +static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry) { - unsigned long target_nsec; - struct timespec64 adjust, now; - int rc; + ktime_t exp = ktime_set(ktime_get_real_seconds(), 0); - if (!IS_ENABLED(CONFIG_RTC_SYSTOHC)) - return; + if (retry) + exp = ktime_add_ns(exp, 2 * NSEC_PER_SEC - offset_nsec); + else + exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec); - ktime_get_real_ts64(&now); + hrtimer_start(&sync_hrtimer, exp, HRTIMER_MODE_ABS); +} - adjust = now; - if (persistent_clock_is_local) - adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); +/* + * Check whether @now is correct versus the required time to update the RTC + * and calculate the value which needs to be written to the RTC so that the + * next seconds increment of the RTC after the write is aligned with the next + * seconds increment of clock REALTIME. + * + * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds + * + * t2.tv_nsec == 0 + * tsched = t2 - set_offset_nsec + * newval = t2 - NSEC_PER_SEC + * + * ==> neval = tsched + set_offset_nsec - NSEC_PER_SEC + * + * As the execution of this code is not guaranteed to happen exactly at + * tsched this allows it to happen within a fuzzy region: + * + * abs(now - tsched) < FUZZ + * + * If @now is not inside the allowed window the function returns false. + */ +static inline bool rtc_tv_nsec_ok(unsigned long set_offset_nsec, + struct timespec64 *to_set, + const struct timespec64 *now) +{ + /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */ + const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; + struct timespec64 delay = {.tv_sec = -1, + .tv_nsec = set_offset_nsec}; - /* - * The current RTC in use will provide the target_nsec it wants to be - * called at, and does rtc_tv_nsec_ok internally. - */ - rc = rtc_set_ntp_time(adjust, &target_nsec); - if (rc == -ENODEV) - return; + *to_set = timespec64_add(*now, delay); + + if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { + to_set->tv_nsec = 0; + return true; + } - sched_sync_hw_clock(now, target_nsec, rc); + if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { + to_set->tv_sec++; + to_set->tv_nsec = 0; + return true; + } + return false; } #ifdef CONFIG_GENERIC_CMOS_UPDATE @@ -560,48 +569,47 @@ int __weak update_persistent_clock64(struct timespec64 now64) { return -ENODEV; } +#else +static inline int update_persistent_clock64(struct timespec64 now64) +{ + return -ENODEV; +} #endif -static bool sync_cmos_clock(void) +#ifdef CONFIG_RTC_SYSTOHC +/* Save NTP synchronized time to the RTC */ +static int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec) { - static bool no_cmos; - struct timespec64 now; - struct timespec64 adjust; - int rc = -EPROTO; - long target_nsec = NSEC_PER_SEC / 2; + struct rtc_device *rtc; + struct rtc_time tm; + int err = -ENODEV; - if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE)) - return false; + rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE); + if (!rtc) + return -ENODEV; - if (no_cmos) - return false; + if (!rtc->ops || !rtc->ops->set_time) + goto out_close; - /* - * Historically update_persistent_clock64() has followed x86 - * semantics, which match the MC146818A/etc RTC. This RTC will store - * 'adjust' and then in .5s it will advance once second. - * - * Architectures are strongly encouraged to use rtclib and not - * implement this legacy API. - */ - ktime_get_real_ts64(&now); - if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) { - if (persistent_clock_is_local) - adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); - rc = update_persistent_clock64(adjust); - /* - * The machine does not support update_persistent_clock64 even - * though it defines CONFIG_GENERIC_CMOS_UPDATE. - */ - if (rc == -ENODEV) { - no_cmos = true; - return false; - } + /* First call might not have the correct offset */ + if (*offset_nsec == rtc->set_offset_nsec) { + rtc_time64_to_tm(to_set->tv_sec, &tm); + err = rtc_set_time(rtc, &tm); + } else { + /* Store the update offset and let the caller try again */ + *offset_nsec = rtc->set_offset_nsec; + err = -EAGAIN; } - - sched_sync_hw_clock(now, target_nsec, rc); - return true; +out_close: + rtc_class_close(rtc); + return err; +} +#else +static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec) +{ + return -ENODEV; } +#endif /* * If we have an externally synchronized Linux clock, then update RTC clock @@ -613,24 +621,64 @@ static bool sync_cmos_clock(void) */ static void sync_hw_clock(struct work_struct *work) { - if (!ntp_synced()) - return; + /* + * The default synchronization offset is 500ms for the deprecated + * update_persistent_clock64() under the assumption that it uses + * the infamous CMOS clock (MC146818). + */ + static unsigned long offset_nsec = NSEC_PER_SEC / 2; + struct timespec64 now, to_set; + int res = -EAGAIN; - if (sync_cmos_clock()) + /* + * Don't update if STA_UNSYNC is set and if ntp_notify_cmos_timer() + * managed to schedule the work between the timer firing and the + * work being able to rearm the timer. Wait for the timer to expire. + */ + if (!ntp_synced() || hrtimer_is_queued(&sync_hrtimer)) return; - sync_rtc_clock(); + ktime_get_real_ts64(&now); + /* If @now is not in the allowed window, try again */ + if (!rtc_tv_nsec_ok(offset_nsec, &to_set, &now)) + goto rearm; + + /* Take timezone adjusted RTCs into account */ + if (persistent_clock_is_local) + to_set.tv_sec -= (sys_tz.tz_minuteswest * 60); + + /* Try the legacy RTC first. */ + res = update_persistent_clock64(to_set); + if (res != -ENODEV) + goto rearm; + + /* Try the RTC class */ + res = update_rtc(&to_set, &offset_nsec); + if (res == -ENODEV) + return; +rearm: + sched_sync_hw_clock(offset_nsec, res != 0); } void ntp_notify_cmos_timer(void) { - if (!ntp_synced()) - return; + /* + * When the work is currently executed but has not yet the timer + * rearmed this queues the work immediately again. No big issue, + * just a pointless work scheduled. + */ + if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) + queue_work(system_power_efficient_wq, &sync_work); +} - if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) || - IS_ENABLED(CONFIG_RTC_SYSTOHC)) - queue_delayed_work(system_power_efficient_wq, &sync_work, 0); +static void __init ntp_init_cmos_sync(void) +{ + hrtimer_init(&sync_hrtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + sync_hrtimer.function = sync_timer_callback; } +#else /* CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */ +static inline void __init ntp_init_cmos_sync(void) { } +#endif /* !CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */ /* * Propagate a new txc->status value into the NTP state: @@ -1044,4 +1092,5 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); + ntp_init_cmos_sync(); } diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 908ecaa65fc3..23d1b74c3065 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -12,4 +12,11 @@ extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai, struct audit_ntp_data *ad); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); + +#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) +extern void ntp_notify_cmos_timer(void); +#else +static inline void ntp_notify_cmos_timer(void) { } +#endif + #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cc7cba20382e..a9e68936822d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -57,36 +57,42 @@ static ktime_t last_jiffies_update; static void tick_do_update_jiffies64(ktime_t now) { unsigned long ticks = 1; - ktime_t delta; + ktime_t delta, nextp; /* - * Do a quick check without holding jiffies_lock. The READ_ONCE() + * 64bit can do a quick check without holding jiffies lock and + * without looking at the sequence count. The smp_load_acquire() * pairs with the update done later in this function. * - * This is also an intentional data race which is even safe on - * 32bit in theory. If there is a concurrent update then the check - * might give a random answer. It does not matter because if it - * returns then the concurrent update is already taking care, if it - * falls through then it will pointlessly contend on jiffies_lock. - * - * Though there is one nasty case on 32bit due to store tearing of - * the 64bit value. If the first 32bit store makes the quick check - * return on all other CPUs and the writing CPU context gets - * delayed to complete the second store (scheduled out on virt) - * then jiffies can become stale for up to ~2^32 nanoseconds - * without noticing. After that point all CPUs will wait for - * jiffies lock. - * - * OTOH, this is not any different than the situation with NOHZ=off - * where one CPU is responsible for updating jiffies and - * timekeeping. If that CPU goes out for lunch then all other CPUs - * will operate on stale jiffies until it decides to come back. + * 32bit cannot do that because the store of tick_next_period + * consists of two 32bit stores and the first store could move it + * to a random point in the future. */ - if (ktime_before(now, READ_ONCE(tick_next_period))) - return; + if (IS_ENABLED(CONFIG_64BIT)) { + if (ktime_before(now, smp_load_acquire(&tick_next_period))) + return; + } else { + unsigned int seq; - /* Reevaluate with jiffies_lock held */ + /* + * Avoid contention on jiffies_lock and protect the quick + * check with the sequence count. + */ + do { + seq = read_seqcount_begin(&jiffies_seq); + nextp = tick_next_period; + } while (read_seqcount_retry(&jiffies_seq, seq)); + + if (ktime_before(now, nextp)) + return; + } + + /* Quick check failed, i.e. update is required. */ raw_spin_lock(&jiffies_lock); + /* + * Reevaluate with the lock held. Another CPU might have done the + * update already. + */ if (ktime_before(now, tick_next_period)) { raw_spin_unlock(&jiffies_lock); return; @@ -112,11 +118,25 @@ static void tick_do_update_jiffies64(ktime_t now) jiffies_64 += ticks; /* - * Keep the tick_next_period variable up to date. WRITE_ONCE() - * pairs with the READ_ONCE() in the lockless quick check above. + * Keep the tick_next_period variable up to date. */ - WRITE_ONCE(tick_next_period, - ktime_add_ns(last_jiffies_update, TICK_NSEC)); + nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC); + + if (IS_ENABLED(CONFIG_64BIT)) { + /* + * Pairs with smp_load_acquire() in the lockless quick + * check above and ensures that the update to jiffies_64 is + * not reordered vs. the store to tick_next_period, neither + * by the compiler nor by the CPU. + */ + smp_store_release(&tick_next_period, nextp); + } else { + /* + * A plain store is good enough on 32bit as the quick check + * above is protected by the sequence count. + */ + tick_next_period = nextp; + } /* * Release the sequence count. calc_global_load() below is not |
