diff options
Diffstat (limited to 'kernel/time/clocksource.c')
| -rw-r--r-- | kernel/time/clocksource.c | 131 |
1 files changed, 67 insertions, 64 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d25ba49e313c..a1890a073196 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -20,9 +20,11 @@ #include "tick-internal.h" #include "timekeeping_internal.h" +static void clocksource_enqueue(struct clocksource *cs); + static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end) { - u64 delta = clocksource_delta(end, start, cs->mask); + u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta); if (likely(delta < cs->max_cycles)) return clocksource_cyc2ns(delta, cs->mult, cs->shift); @@ -113,7 +115,6 @@ static u64 suspend_start; /* * Threshold: 0.0312s, when doubled: 0.0625s. - * Also a default for cs->uncertainty_margin when registering clocks. */ #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5) @@ -125,6 +126,13 @@ static u64 suspend_start; * * The default of 500 parts per million is based on NTP's limits. * If a clocksource is good enough for NTP, it is good enough for us! + * + * In other words, by default, even if a clocksource is extremely + * precise (for example, with a sub-nanosecond period), the maximum + * permissible skew between the clocksource watchdog and the clocksource + * under test is not permitted to go below the 500ppm minimum defined + * by MAX_SKEW_USEC. This 500ppm minimum may be overridden using the + * CLOCKSOURCE_WATCHDOG_MAX_SKEW_US Kconfig option. */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US #define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US @@ -132,6 +140,13 @@ static u64 suspend_start; #define MAX_SKEW_USEC (125 * WATCHDOG_INTERVAL / HZ) #endif +/* + * Default for maximum permissible skew when cs->uncertainty_margin is + * not specified, and the lower bound even when cs->uncertainty_margin + * is specified. This is also the default that is used when registering + * clocks with unspecified cs->uncertainty_margin, so this macro is used + * even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels. + */ #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC) #ifdef CONFIG_CLOCKSOURCE_WATCHDOG @@ -158,7 +173,6 @@ static inline void clocksource_watchdog_unlock(unsigned long *flags) } static int clocksource_watchdog_kthread(void *data); -static void __clocksource_change_rating(struct clocksource *cs, int rating); static void clocksource_watchdog_work(struct work_struct *work) { @@ -178,6 +192,13 @@ static void clocksource_watchdog_work(struct work_struct *work) kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); } +static void clocksource_change_rating(struct clocksource *cs, int rating) +{ + list_del(&cs->list); + cs->rating = rating; + clocksource_enqueue(cs); +} + static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); @@ -231,6 +252,7 @@ enum wd_read_status { static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) { + int64_t md = 2 * watchdog->uncertainty_margin; unsigned int nretries, max_retries; int64_t wd_delay, wd_seq_delay; u64 wd_end, wd_end2; @@ -245,8 +267,8 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, local_irq_enable(); wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end); - if (wd_delay <= WATCHDOG_MAX_SKEW) { - if (nretries > 1 || nretries >= max_retries) { + if (wd_delay <= md + cs->uncertainty_margin) { + if (nretries > 1 && nretries >= max_retries) { pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } @@ -258,12 +280,12 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, * there is too much external interferences that cause * significant delay in reading both clocksource and watchdog. * - * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2, - * report system busy, reinit the watchdog and skip the current + * If consecutive WD read-back delay > md, report + * system busy, reinit the watchdog and skip the current * watchdog test. */ wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2); - if (wd_seq_delay > WATCHDOG_MAX_SKEW/2) + if (wd_seq_delay > md) goto skip_test; } @@ -288,7 +310,7 @@ static void clocksource_verify_choose_cpus(void) { int cpu, i, n = verify_n_cpus; - if (n < 0) { + if (n < 0 || n >= num_online_cpus()) { /* Check all of the CPUs. */ cpumask_copy(&cpus_chosen, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpus_chosen); @@ -301,9 +323,7 @@ static void clocksource_verify_choose_cpus(void) return; /* Make sure to select at least one CPU other than the current CPU. */ - cpu = cpumask_first(cpu_online_mask); - if (cpu == smp_processor_id()) - cpu = cpumask_next(cpu, cpu_online_mask); + cpu = cpumask_any_but(cpu_online_mask, smp_processor_id()); if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) return; cpumask_set_cpu(cpu, &cpus_chosen); @@ -320,10 +340,7 @@ static void clocksource_verify_choose_cpus(void) * CPUs that are currently online. */ for (i = 1; i < n; i++) { - cpu = get_random_u32_below(nr_cpu_ids); - cpu = cpumask_next(cpu - 1, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); + cpu = cpumask_random(cpu_online_mask); if (!WARN_ON_ONCE(cpu >= nr_cpu_ids)) cpumask_set_cpu(cpu, &cpus_chosen); } @@ -351,16 +368,18 @@ void clocksource_verify_percpu(struct clocksource *cs) cpumask_clear(&cpus_ahead); cpumask_clear(&cpus_behind); cpus_read_lock(); - preempt_disable(); + migrate_disable(); clocksource_verify_choose_cpus(); if (cpumask_empty(&cpus_chosen)) { - preempt_enable(); + migrate_enable(); cpus_read_unlock(); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); return; } testcpu = smp_processor_id(); - pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", + cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + preempt_disable(); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; @@ -380,6 +399,7 @@ void clocksource_verify_percpu(struct clocksource *cs) cs_nsec_min = cs_nsec; } preempt_enable(); + migrate_enable(); cpus_read_unlock(); if (!cpumask_empty(&cpus_ahead)) pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n", @@ -387,9 +407,8 @@ void clocksource_verify_percpu(struct clocksource *cs) if (!cpumask_empty(&cpus_behind)) pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n", cpumask_pr_args(&cpus_behind), testcpu, cs->name); - if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind)) - pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", - testcpu, cs_nsec_min, cs_nsec_max, cs->name); + pr_info(" CPU %d check durations %lldns - %lldns for clocksource %s.\n", + testcpu, cs_nsec_min, cs_nsec_max, cs->name); } EXPORT_SYMBOL_GPL(clocksource_verify_percpu); @@ -564,9 +583,7 @@ static void clocksource_watchdog(struct timer_list *unused) * Cycle through CPUs to check if the CPUs stay synchronized * to each other. */ - next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(cpu_online_mask); + next_cpu = cpumask_next_wrap(raw_smp_processor_id(), cpu_online_mask); /* * Arm timer if not already pending: could race with concurrent @@ -594,7 +611,7 @@ static inline void clocksource_stop_watchdog(void) { if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) return; - del_timer(&watchdog_timer); + timer_delete(&watchdog_timer); watchdog_running = 0; } @@ -683,7 +700,7 @@ static int __clocksource_watchdog_kthread(void) list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { if (cs->flags & CLOCK_SOURCE_UNSTABLE) { list_del_init(&cs->wd_list); - __clocksource_change_rating(cs, 0); + clocksource_change_rating(cs, 0); select = 1; } if (cs->flags & CLOCK_SOURCE_RESELECT) { @@ -971,6 +988,15 @@ static inline void clocksource_update_max_deferment(struct clocksource *cs) cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, cs->mask, &cs->max_cycles); + + /* + * Threshold for detecting negative motion in clocksource_delta(). + * + * Allow for 0.875 of the counter width so that overly long idle + * sleeps, which go slightly over mask/2, do not trigger the + * negative motion detection. + */ + cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3); } static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) @@ -1146,14 +1172,19 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq } /* - * If the uncertainty margin is not specified, calculate it. - * If both scale and freq are non-zero, calculate the clock - * period, but bound below at 2*WATCHDOG_MAX_SKEW. However, - * if either of scale or freq is zero, be very conservative and - * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the - * uncertainty margin. Allow stupidly small uncertainty margins - * to be specified by the caller for testing purposes, but warn - * to discourage production use of this capability. + * If the uncertainty margin is not specified, calculate it. If + * both scale and freq are non-zero, calculate the clock period, but + * bound below at 2*WATCHDOG_MAX_SKEW, that is, 500ppm by default. + * However, if either of scale or freq is zero, be very conservative + * and take the tens-of-milliseconds WATCHDOG_THRESHOLD value + * for the uncertainty margin. Allow stupidly small uncertainty + * margins to be specified by the caller for testing purposes, + * but warn to discourage production use of this capability. + * + * Bottom line: The sum of the uncertainty margins of the + * watchdog clocksource and the clocksource under test will be at + * least 500ppm by default. For more information, please see the + * comment preceding CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US above. */ if (scale && freq && !cs->uncertainty_margin) { cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq); @@ -1236,34 +1267,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) } EXPORT_SYMBOL_GPL(__clocksource_register_scale); -static void __clocksource_change_rating(struct clocksource *cs, int rating) -{ - list_del(&cs->list); - cs->rating = rating; - clocksource_enqueue(cs); -} - -/** - * clocksource_change_rating - Change the rating of a registered clocksource - * @cs: clocksource to be changed - * @rating: new rating - */ -void clocksource_change_rating(struct clocksource *cs, int rating) -{ - unsigned long flags; - - mutex_lock(&clocksource_mutex); - clocksource_watchdog_lock(&flags); - __clocksource_change_rating(cs, rating); - clocksource_watchdog_unlock(&flags); - - clocksource_select(); - clocksource_select_watchdog(false); - clocksource_suspend_select(false); - mutex_unlock(&clocksource_mutex); -} -EXPORT_SYMBOL(clocksource_change_rating); - /* * Unbind clocksource @cs. Called with clocksource_mutex held */ @@ -1499,7 +1502,7 @@ static int __init boot_override_clocksource(char* str) { mutex_lock(&clocksource_mutex); if (str) - strscpy(override_name, str, sizeof(override_name)); + strscpy(override_name, str); mutex_unlock(&clocksource_mutex); return 1; } |
