diff options
-rw-r--r-- | Documentation/ABI/stable/sysfs-kernel-time-aux-clocks | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/vdso/vsyscall.h | 7 | ||||
-rw-r--r-- | include/asm-generic/vdso/vsyscall.h | 6 | ||||
-rw-r--r-- | include/linux/posix-timers.h | 5 | ||||
-rw-r--r-- | include/linux/timekeeper_internal.h | 37 | ||||
-rw-r--r-- | include/linux/timekeeping.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/time.h | 11 | ||||
-rw-r--r-- | include/vdso/auxclock.h | 13 | ||||
-rw-r--r-- | include/vdso/datapage.h | 5 | ||||
-rw-r--r-- | include/vdso/helpers.h | 50 | ||||
-rw-r--r-- | kernel/time/Kconfig | 15 | ||||
-rw-r--r-- | kernel/time/namespace.c | 5 | ||||
-rw-r--r-- | kernel/time/ntp.c | 72 | ||||
-rw-r--r-- | kernel/time/ntp_internal.h | 13 | ||||
-rw-r--r-- | kernel/time/posix-timers.c | 3 | ||||
-rw-r--r-- | kernel/time/posix-timers.h | 1 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 629 | ||||
-rw-r--r-- | kernel/time/timekeeping_internal.h | 3 | ||||
-rw-r--r-- | kernel/time/vsyscall.c | 70 | ||||
-rw-r--r-- | lib/vdso/gettimeofday.c | 224 |
20 files changed, 950 insertions, 236 deletions
diff --git a/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks new file mode 100644 index 000000000000..825508f42af6 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks @@ -0,0 +1,5 @@ +What: /sys/kernel/time/aux_clocks/<ID>/enable +Date: May 2025 +Contact: Thomas Gleixner <tglx@linutronix.de> +Description: + Controls the enablement of auxiliary clock timekeepers. diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index de58951b8df6..417aae5763a8 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -13,12 +13,11 @@ * Update the vDSO data page to keep in sync with kernel timekeeping. */ static __always_inline -void __arm64_update_vsyscall(struct vdso_time_data *vdata) +void __arch_update_vdso_clock(struct vdso_clock *vc) { - vdata->clock_data[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; - vdata->clock_data[CS_RAW].mask = VDSO_PRECISION_MASK; + vc->mask = VDSO_PRECISION_MASK; } -#define __arch_update_vsyscall __arm64_update_vsyscall +#define __arch_update_vdso_clock __arch_update_vdso_clock /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index b550afa15ecd..7fc0b560007d 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -22,11 +22,11 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ -#ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata) +#ifndef __arch_update_vdso_clock +static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc) { } -#endif /* __arch_update_vsyscall */ +#endif /* __arch_update_vdso_clock */ #ifndef __arch_sync_vdso_time_data static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dd48c64b605e..4d3dbcef379e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -37,6 +37,11 @@ static inline int clockid_to_fd(const clockid_t clk) return ~(clk >> 3); } +static inline bool clockid_aux_valid(clockid_t id) +{ + return IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) && id >= CLOCK_AUX && id <= CLOCK_AUX_LAST; +} + #ifdef CONFIG_POSIX_TIMERS #include <linux/signal_types.h> diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 785048a3b3e6..c27aac67cb3f 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -12,6 +12,22 @@ #include <linux/time.h> /** + * timekeeper_ids - IDs for various time keepers in the kernel + * @TIMEKEEPER_CORE: The central core timekeeper managing system time + * @TIMEKEEPER_AUX_FIRST: The first AUX timekeeper + * @TIMEKEEPER_AUX_LAST: The last AUX timekeeper + * @TIMEKEEPERS_MAX: The maximum number of timekeepers managed + */ +enum timekeeper_ids { + TIMEKEEPER_CORE, +#ifdef CONFIG_POSIX_AUX_CLOCKS + TIMEKEEPER_AUX_FIRST, + TIMEKEEPER_AUX_LAST = TIMEKEEPER_AUX_FIRST + MAX_AUX_CLOCKS - 1, +#endif + TIMEKEEPERS_MAX, +}; + +/** * struct tk_read_base - base structure for timekeeping readout * @clock: Current clocksource used for timekeeping. * @mask: Bitmask for two's complement subtraction of non 64bit clocks @@ -51,11 +67,14 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai + * @offs_aux: Offset clock monotonic -> clock AUX * @coarse_nsec: The nanoseconds part for coarse time getters + * @id: The timekeeper ID * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events + * @clock_valid: Indicator for valid clock * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -95,13 +114,16 @@ struct tk_read_base { * @monotonic_to_boottime is a timespec64 representation of @offs_boot to * accelerate the VDSO update for CLOCK_BOOTTIME. * + * @offs_aux is used by the auxiliary timekeepers which do not utilize any + * of the regular timekeeper offset fields. + * * The cacheline ordering of the structure is optimized for in kernel usage of * the ktime_get() and ktime_get_ts64() family of time accessors. Struct * timekeeper is prepended in the core timekeeping code with a sequence count, * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... coarse_nsec + * 1: xtime_sec ... id * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,8 +143,12 @@ struct timekeeper { struct timespec64 wall_to_monotonic; ktime_t offs_real; ktime_t offs_boot; - ktime_t offs_tai; + union { + ktime_t offs_tai; + ktime_t offs_aux; + }; u32 coarse_nsec; + enum timekeeper_ids id; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -131,6 +157,7 @@ struct timekeeper { /* Cachline 3 and 4 (timekeeping internal variables): */ unsigned int clock_was_set_seq; u8 cs_was_changed_seq; + u8 clock_valid; struct timespec64 monotonic_to_boot; @@ -163,4 +190,10 @@ static inline void update_vsyscall_tz(void) } #endif +#if defined(CONFIG_GENERIC_GETTIMEOFDAY) && defined(CONFIG_POSIX_AUX_CLOCKS) +extern void vdso_time_update_aux(struct timekeeper *tk); +#else +static inline void vdso_time_update_aux(struct timekeeper *tk) { } +#endif + #endif /* _LINUX_TIMEKEEPER_INTERNAL_H */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 542773650200..aee2c1a46e47 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts); extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +extern void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); @@ -263,6 +264,17 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); +/* + * Auxiliary clock interfaces + */ +#ifdef CONFIG_POSIX_AUX_CLOCKS +extern bool ktime_get_aux(clockid_t id, ktime_t *kt); +extern bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt); +#else +static inline bool ktime_get_aux(clockid_t id, ktime_t *kt) { return false; } +static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; } +#endif + /** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 4f4b6e48e01c..16ca1ac206fd 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -64,6 +64,17 @@ struct timezone { #define CLOCK_TAI 11 #define MAX_CLOCKS 16 + +/* + * AUX clock support. AUXiliary clocks are dynamically configured by + * enabling a clock ID. These clock can be steered independently of the + * core timekeeper. The kernel can support up to 8 auxiliary clocks, but + * the actual limit depends on eventual architecture constraints vs. VDSO. + */ +#define CLOCK_AUX MAX_CLOCKS +#define MAX_AUX_CLOCKS 8 +#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1) + #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/include/vdso/auxclock.h b/include/vdso/auxclock.h new file mode 100644 index 000000000000..6d6e74cbc400 --- /dev/null +++ b/include/vdso/auxclock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VDSO_AUXCLOCK_H +#define _VDSO_AUXCLOCK_H + +#include <uapi/linux/time.h> +#include <uapi/linux/types.h> + +static __always_inline u64 aux_clock_resolution_ns(void) +{ + return 1; +} + +#endif /* _VDSO_AUXCLOCK_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 1864e76e8f69..02533038640e 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include <linux/compiler.h> +#include <uapi/linux/bits.h> #include <uapi/linux/time.h> #include <uapi/linux/types.h> #include <uapi/asm-generic/errno-base.h> @@ -38,6 +39,7 @@ struct vdso_arch_data { #endif #define VDSO_BASES (CLOCK_TAI + 1) +#define VDSO_BASE_AUX 0 #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ BIT(CLOCK_BOOTTIME) | \ @@ -45,6 +47,7 @@ struct vdso_arch_data { #define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \ BIT(CLOCK_MONOTONIC_COARSE)) #define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW)) +#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX) #define CS_HRES_COARSE 0 #define CS_RAW 1 @@ -117,6 +120,7 @@ struct vdso_clock { * @arch_data: architecture specific data (optional, defaults * to an empty struct) * @clock_data: clocksource related data (array) + * @aux_clock_data: auxiliary clocksource related data (array) * @tz_minuteswest: minutes west of Greenwich * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution @@ -133,6 +137,7 @@ struct vdso_time_data { struct arch_vdso_time_data arch_data; struct vdso_clock clock_data[CS_BASES]; + struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS]; s32 tz_minuteswest; s32 tz_dsttime; diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 0a98fed550ba..1a5ee9d9052c 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -28,17 +28,47 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, return seq != start; } -static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc) { - struct vdso_clock *vc = vd->clock_data; + /* + * WRITE_ONCE() is required otherwise the compiler can validly tear + * updates to vc->seq and it is possible that the value seen by the + * reader is inconsistent. + */ + WRITE_ONCE(vc->seq, vc->seq + 1); +} +static __always_inline void vdso_write_seq_end(struct vdso_clock *vc) +{ /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + WRITE_ONCE(vc->seq, vc->seq + 1); +} + +static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc) +{ + vdso_write_seq_begin(vc); + /* Ensure the sequence invalidation is visible before data is modified */ + smp_wmb(); +} + +static __always_inline void vdso_write_end_clock(struct vdso_clock *vc) +{ + /* Ensure the data update is visible before the sequence is set valid again */ + smp_wmb(); + vdso_write_seq_end(vc); +} + +static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + vdso_write_seq_begin(&vc[CS_HRES_COARSE]); + vdso_write_seq_begin(&vc[CS_RAW]); + /* Ensure the sequence invalidation is visible before data is modified */ smp_wmb(); } @@ -46,14 +76,10 @@ static __always_inline void vdso_write_end(struct vdso_time_data *vd) { struct vdso_clock *vc = vd->clock_data; + /* Ensure the data update is visible before the sequence is set valid again */ smp_wmb(); - /* - * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the - * reader is inconsistent. - */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + vdso_write_seq_end(&vc[CS_HRES_COARSE]); + vdso_write_seq_end(&vc[CS_RAW]); } #endif /* !__ASSEMBLY__ */ diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index b0b97a60aaa6..7c6a52f7836c 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -82,9 +82,9 @@ config CONTEXT_TRACKING_IDLE help Tracks idle state on behalf of RCU. -if GENERIC_CLOCKEVENTS menu "Timers subsystem" +if GENERIC_CLOCKEVENTS # Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independent of this. @@ -208,6 +208,17 @@ config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US interval and NTP's maximum frequency drift of 500 parts per million. If the clocksource is good enough for NTP, it is good enough for the clocksource watchdog! +endif + +config POSIX_AUX_CLOCKS + bool "Enable auxiliary POSIX clocks" + depends on POSIX_TIMERS + help + Auxiliary POSIX clocks are clocks which can be steered + independently of the core timekeeper, which controls the + MONOTONIC, REALTIME, BOOTTIME and TAI clocks. They are useful to + provide e.g. lockless time accessors to independent PTP clocks + and other clock domains, which are not correlated to the TAI/NTP + notion of time. endmenu -endif diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index e3642278df43..667452768ed3 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -242,6 +242,11 @@ static void timens_set_vvar_page(struct task_struct *task, for (i = 0; i < CS_BASES; i++) timens_setup_vdso_clock_data(&vc[i], ns); + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { + for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) + timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); + } + out: mutex_unlock(&offset_lock); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b837d3d9d325..97fa99b96dd0 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/rtc.h> #include <linux/audit.h> +#include <linux/timekeeper_internal.h> #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -86,14 +87,16 @@ struct ntp_data { #endif }; -static struct ntp_data tk_ntp_data = { - .tick_usec = USER_TICK_USEC, - .time_state = TIME_OK, - .time_status = STA_UNSYNC, - .time_constant = 2, - .time_maxerror = NTP_PHASE_LIMIT, - .time_esterror = NTP_PHASE_LIMIT, - .ntp_next_leap_sec = TIME64_MAX, +static struct ntp_data tk_ntp_data[TIMEKEEPERS_MAX] = { + [ 0 ... TIMEKEEPERS_MAX - 1 ] = { + .tick_usec = USER_TICK_USEC, + .time_state = TIME_OK, + .time_status = STA_UNSYNC, + .time_constant = 2, + .time_maxerror = NTP_PHASE_LIMIT, + .time_esterror = NTP_PHASE_LIMIT, + .ntp_next_leap_sec = TIME64_MAX, + }, }; #define SECS_PER_DAY 86400 @@ -300,7 +303,7 @@ static void ntp_update_offset(struct ntp_data *ntpdata, long offset) * Select how the frequency is to be controlled * and in which mode (PLL or FLL). */ - real_secs = __ktime_get_real_seconds(); + real_secs = ktime_get_ntp_seconds(ntpdata - tk_ntp_data); secs = (long)(real_secs - ntpdata->time_reftime); if (unlikely(ntpdata->time_status & STA_FREQHOLD)) secs = 0; @@ -348,33 +351,38 @@ static void __ntp_clear(struct ntp_data *ntpdata) /** * ntp_clear - Clears the NTP state variables + * @tkid: Timekeeper ID to be able to select proper ntp data array member */ -void ntp_clear(void) +void ntp_clear(unsigned int tkid) { - __ntp_clear(&tk_ntp_data); + __ntp_clear(&tk_ntp_data[tkid]); } -u64 ntp_tick_length(void) +u64 ntp_tick_length(unsigned int tkid) { - return tk_ntp_data.tick_length; + return tk_ntp_data[tkid].tick_length; } /** * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t + * @tkid: Timekeeper ID * - * Provides the time of the next leapsecond against CLOCK_REALTIME in - * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending. + * Returns: For @tkid == TIMEKEEPER_CORE this provides the time of the next + * leap second against CLOCK_REALTIME in a ktime_t format if a + * leap second is pending. KTIME_MAX otherwise. */ -ktime_t ntp_get_next_leap(void) +ktime_t ntp_get_next_leap(unsigned int tkid) { - struct ntp_data *ntpdata = &tk_ntp_data; - ktime_t ret; + struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; + + if (tkid != TIMEKEEPER_CORE) + return KTIME_MAX; if ((ntpdata->time_state == TIME_INS) && (ntpdata->time_status & STA_INS)) return ktime_set(ntpdata->ntp_next_leap_sec, 0); - ret = KTIME_MAX; - return ret; + + return KTIME_MAX; } /* @@ -387,9 +395,9 @@ ktime_t ntp_get_next_leap(void) * * Also handles leap second processing, and returns leap offset */ -int second_overflow(time64_t secs) +int second_overflow(unsigned int tkid, time64_t secs) { - struct ntp_data *ntpdata = &tk_ntp_data; + struct ntp_data *ntpdata = &tk_ntp_data[tkid]; s64 delta; int leap = 0; s32 rem; @@ -605,7 +613,7 @@ static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_ns */ static inline bool ntp_synced(void) { - return !(tk_ntp_data.time_status & STA_UNSYNC); + return !(tk_ntp_data[TIMEKEEPER_CORE].time_status & STA_UNSYNC); } /* @@ -702,7 +710,7 @@ static inline void process_adj_status(struct ntp_data *ntpdata, const struct __k * reference time to current time. */ if (!(ntpdata->time_status & STA_PLL) && (txc->status & STA_PLL)) - ntpdata->time_reftime = __ktime_get_real_seconds(); + ntpdata->time_reftime = ktime_get_ntp_seconds(ntpdata - tk_ntp_data); /* only set allowed bits */ ntpdata->time_status &= STA_RONLY; @@ -759,10 +767,10 @@ static inline void process_adjtimex_modes(struct ntp_data *ntpdata, const struct * adjtimex() mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ -int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, - s32 *time_tai, struct audit_ntp_data *ad) +int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, + s32 *time_tai, struct audit_ntp_data *ad) { - struct ntp_data *ntpdata = &tk_ntp_data; + struct ntp_data *ntpdata = &tk_ntp_data[tkid]; int result; if (txc->modes & ADJ_ADJTIME) { @@ -1031,8 +1039,8 @@ static void hardpps_update_phase(struct ntp_data *ntpdata, long error) */ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { + struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE]; struct pps_normtime pts_norm, freq_norm; - struct ntp_data *ntpdata = &tk_ntp_data; pts_norm = pps_normalize_ts(*phase_ts); @@ -1083,18 +1091,18 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t static int __init ntp_tick_adj_setup(char *str) { - int rc = kstrtos64(str, 0, &tk_ntp_data.ntp_tick_adj); + int rc = kstrtos64(str, 0, &tk_ntp_data[TIMEKEEPER_CORE].ntp_tick_adj); if (rc) return rc; - tk_ntp_data.ntp_tick_adj <<= NTP_SCALE_SHIFT; + tk_ntp_data[TIMEKEEPER_CORE].ntp_tick_adj <<= NTP_SCALE_SHIFT; return 1; } - __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { - ntp_clear(); + for (int id = 0; id < TIMEKEEPERS_MAX; id++) + __ntp_clear(tk_ntp_data + id); ntp_init_cmos_sync(); } diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 5a633dce9057..7084d839c207 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -3,14 +3,13 @@ #define _LINUX_NTP_INTERNAL_H extern void ntp_init(void); -extern void ntp_clear(void); +extern void ntp_clear(unsigned int tkid); /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ -extern u64 ntp_tick_length(void); -extern ktime_t ntp_get_next_leap(void); -extern int second_overflow(time64_t secs); -extern int __do_adjtimex(struct __kernel_timex *txc, - const struct timespec64 *ts, - s32 *time_tai, struct audit_ntp_data *ad); +extern u64 ntp_tick_length(unsigned int tkid); +extern ktime_t ntp_get_next_leap(unsigned int tkid); +extern int second_overflow(unsigned int tkid, time64_t secs); +extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts, + s32 *time_tai, struct audit_ntp_data *ad); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 2053b1a4c9e4..8b582174b1f9 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1526,6 +1526,9 @@ static const struct k_clock * const posix_clocks[] = { [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, +#ifdef CONFIG_POSIX_AUX_CLOCKS + [CLOCK_AUX ... CLOCK_AUX_LAST] = &clock_aux, +#endif }; static const struct k_clock *clockid_to_kclock(const clockid_t id) diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index 61906f0688c1..7f259e845d24 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -41,6 +41,7 @@ extern const struct k_clock clock_posix_dynamic; extern const struct k_clock clock_process; extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; +extern const struct k_clock clock_aux; void posix_timer_queue_signal(struct k_itimer *timr); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 83c65f3afcca..059fa8b79be6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -6,6 +6,7 @@ #include <linux/timekeeper_internal.h> #include <linux/module.h> #include <linux/interrupt.h> +#include <linux/kobject.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/mm.h> @@ -25,6 +26,8 @@ #include <linux/audit.h> #include <linux/random.h> +#include <vdso/auxclock.h> + #include "tick-internal.h" #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -53,7 +56,32 @@ struct tk_data { raw_spinlock_t lock; } ____cacheline_aligned; -static struct tk_data tk_core; +static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; + +/* The core timekeeper */ +#define tk_core (timekeeper_data[TIMEKEEPER_CORE]) + +#ifdef CONFIG_POSIX_AUX_CLOCKS +static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) +{ + return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); +} + +static inline bool tk_is_aux(const struct timekeeper *tk) +{ + return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST; +} +#else +static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) +{ + return false; +} + +static inline bool tk_is_aux(const struct timekeeper *tk) +{ + return false; +} +#endif /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -113,6 +141,16 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = { .base[1] = FAST_TK_INIT, }; +#ifdef CONFIG_POSIX_AUX_CLOCKS +static __init void tk_aux_setup(void); +static void tk_aux_update_clocksource(void); +static void tk_aux_advance(void); +#else +static inline void tk_aux_setup(void) { } +static inline void tk_aux_update_clocksource(void) { } +static inline void tk_aux_advance(void) { } +#endif + unsigned long timekeeper_lock_irqsave(void) { unsigned long flags; @@ -601,7 +639,7 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); */ static inline void tk_update_leap_state(struct timekeeper *tk) { - tk->next_leap_ktime = ntp_get_next_leap(); + tk->next_leap_ktime = ntp_get_next_leap(tk->id); if (tk->next_leap_ktime != KTIME_MAX) /* Convert to monotonic time */ tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); @@ -663,7 +701,7 @@ static void timekeeping_restore_shadow(struct tk_data *tkd) static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) { - struct timekeeper *tk = &tk_core.shadow_timekeeper; + struct timekeeper *tk = &tkd->shadow_timekeeper; lockdep_assert_held(&tkd->lock); @@ -678,18 +716,22 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act if (action & TK_CLEAR_NTP) { tk->ntp_error = 0; - ntp_clear(); + ntp_clear(tk->id); } tk_update_leap_state(tk); tk_update_ktime_data(tk); + tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; - update_vsyscall(tk); - update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); + if (tk->id == TIMEKEEPER_CORE) { + update_vsyscall(tk); + update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); - tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; - update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); - update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); + } else if (tk_is_aux(tk)) { + vdso_time_update_aux(tk); + } if (action & TK_CLOCK_WAS_SET) tk->clock_was_set_seq++; @@ -975,9 +1017,14 @@ time64_t ktime_get_real_seconds(void) EXPORT_SYMBOL_GPL(ktime_get_real_seconds); /** - * __ktime_get_real_seconds - The same as ktime_get_real_seconds - * but without the sequence counter protect. This internal function - * is called just when timekeeping lock is already held. + * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds + * + * The same as ktime_get_real_seconds() but without the sequence counter + * protection. This function is used in restricted contexts like the x86 MCE + * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half + * completed modification and only to be used for such critical contexts. + * + * Returns: Racy snapshot of the CLOCK_REALTIME seconds value */ noinstr time64_t __ktime_get_real_seconds(void) { @@ -1412,41 +1459,73 @@ int do_settimeofday64(const struct timespec64 *ts) } EXPORT_SYMBOL(do_settimeofday64); +static inline bool timekeeper_is_core_tk(struct timekeeper *tk) +{ + return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE; +} + /** - * timekeeping_inject_offset - Adds or subtracts from the current time. + * __timekeeping_inject_offset - Adds or subtracts from the current time. + * @tkd: Pointer to the timekeeper to modify * @ts: Pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time. */ -static int timekeeping_inject_offset(const struct timespec64 *ts) +static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts) { + struct timekeeper *tks = &tkd->shadow_timekeeper; + struct timespec64 tmp; + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { - struct timekeeper *tks = &tk_core.shadow_timekeeper; - struct timespec64 tmp; - - timekeeping_forward_now(tks); + timekeeping_forward_now(tks); + if (timekeeper_is_core_tk(tks)) { /* Make sure the proposed value is valid */ tmp = timespec64_add(tk_xtime(tks), *ts); if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || !timespec64_valid_settod(&tmp)) { - timekeeping_restore_shadow(&tk_core); + timekeeping_restore_shadow(tkd); return -EINVAL; } tk_xtime_add(tks, ts); tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); - timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); + } else { + struct tk_read_base *tkr_mono = &tks->tkr_mono; + ktime_t now, offs; + + /* Get the current time */ + now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); + /* Add the relative offset change */ + offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts)); + + /* Prevent that the resulting time becomes negative */ + if (ktime_add(now, offs) < 0) { + timekeeping_restore_shadow(tkd); + return -EINVAL; + } + tks->offs_aux = offs; } - /* Signal hrtimers about time change */ - clock_was_set(CLOCK_SET_WALL); + timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); return 0; } +static int timekeeping_inject_offset(const struct timespec64 *ts) +{ + int ret; + + scoped_guard (raw_spinlock_irqsave, &tk_core.lock) + ret = __timekeeping_inject_offset(&tk_core, ts); + + /* Signal hrtimers about time change */ + if (!ret) + clock_was_set(CLOCK_SET_WALL); + return ret; +} + /* * Indicates if there is an offset between the system clock and the hardware * clock/persistent clock/rtc. @@ -1522,6 +1601,8 @@ static int change_clocksource(void *data) timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); } + tk_aux_update_clocksource(); + if (old) { if (old->disable) old->disable(old); @@ -1573,6 +1654,39 @@ void ktime_get_raw_ts64(struct timespec64 *ts) } EXPORT_SYMBOL(ktime_get_raw_ts64); +/** + * ktime_get_clock_ts64 - Returns time of a clock in a timespec + * @id: POSIX clock ID of the clock to read + * @ts: Pointer to the timespec64 to be set + * + * The timestamp is invalidated (@ts->sec is set to -1) if the + * clock @id is not available. + */ +void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) +{ + /* Invalidate time stamp */ + ts->tv_sec = -1; + ts->tv_nsec = 0; + + switch (id) { + case CLOCK_REALTIME: + ktime_get_real_ts64(ts); + return; + case CLOCK_MONOTONIC: + ktime_get_ts64(ts); + return; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(ts); + return; + case CLOCK_AUX ... CLOCK_AUX_LAST: + if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + ktime_get_aux_ts64(id, ts); + return; + default: + WARN_ON_ONCE(1); + } +} +EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres @@ -1649,10 +1763,12 @@ read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, *boot_offset = ns_to_timespec64(local_clock()); } -static __init void tkd_basic_setup(struct tk_data *tkd) +static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid) { raw_spin_lock_init(&tkd->lock); seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); + tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; + tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid; } /* @@ -1682,7 +1798,8 @@ void __init timekeeping_init(void) struct timekeeper *tks = &tk_core.shadow_timekeeper; struct clocksource *clock; - tkd_basic_setup(&tk_core); + tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); + tk_aux_setup(); read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); if (timespec64_valid_settod(&wall_time) && @@ -2034,7 +2151,7 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, */ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { - u64 ntp_tl = ntp_tick_length(); + u64 ntp_tl = ntp_tick_length(tk->id); u32 mult; /* @@ -2115,7 +2232,7 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) } /* Figure out if its a leap sec and apply if needed */ - leap = second_overflow(tk->xtime_sec); + leap = second_overflow(tk->id, tk->xtime_sec); if (unlikely(leap)) { struct timespec64 ts; @@ -2181,16 +2298,14 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length */ -static bool timekeeping_advance(enum timekeeping_adv_mode mode) +static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode) { - struct timekeeper *tk = &tk_core.shadow_timekeeper; - struct timekeeper *real_tk = &tk_core.timekeeper; + struct timekeeper *tk = &tkd->shadow_timekeeper; + struct timekeeper *real_tk = &tkd->timekeeper; unsigned int clock_set = 0; int shift = 0, maxshift; u64 offset, orig_offset; - guard(raw_spinlock_irqsave)(&tk_core.lock); - /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return false; @@ -2214,7 +2329,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) shift = ilog2(offset) - ilog2(tk->cycle_interval); shift = max(0, shift); /* Bound shift to one less than what overflows tick_length */ - maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; + maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1; shift = min(shift, maxshift); while (offset >= tk->cycle_interval) { offset = logarithmic_accumulation(tk, offset, shift, &clock_set); @@ -2239,19 +2354,27 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) if (orig_offset != offset) tk_update_coarse_nsecs(tk); - timekeeping_update_from_shadow(&tk_core, clock_set); + timekeeping_update_from_shadow(tkd, clock_set); return !!clock_set; } +static bool timekeeping_advance(enum timekeeping_adv_mode mode) +{ + guard(raw_spinlock_irqsave)(&tk_core.lock); + return __timekeeping_advance(&tk_core, mode); +} + /** * update_wall_time - Uses the current clocksource to increment the wall time * + * It also updates the enabled auxiliary clock timekeepers */ void update_wall_time(void) { if (timekeeping_advance(TK_ADV_TICK)) clock_was_set_delayed(); + tk_aux_advance(); } /** @@ -2449,7 +2572,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, /* * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex */ -static int timekeeping_validate_timex(const struct __kernel_timex *txc) +static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock) { if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ @@ -2508,6 +2631,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc) return -EINVAL; } + if (aux_clock) { + /* Auxiliary clocks are similar to TAI and do not have leap seconds */ + if (txc->status & (STA_INS | STA_DEL)) + return -EINVAL; + + /* No TAI offset setting */ + if (txc->modes & ADJ_TAI) + return -EINVAL; + + /* No PPS support either */ + if (txc->status & (STA_PPSFREQ | STA_PPSTIME)) + return -EINVAL; + } + return 0; } @@ -2526,74 +2663,103 @@ unsigned long random_get_entropy_fallback(void) } EXPORT_SYMBOL_GPL(random_get_entropy_fallback); -/** - * do_adjtimex() - Accessor function to NTP __do_adjtimex function - * @txc: Pointer to kernel_timex structure containing NTP parameters - */ -int do_adjtimex(struct __kernel_timex *txc) +struct adjtimex_result { + struct audit_ntp_data ad; + struct timespec64 delta; + bool clock_set; +}; + +static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, + struct adjtimex_result *result) { - struct audit_ntp_data ad; - bool offset_set = false; - bool clock_set = false; + struct timekeeper *tks = &tkd->shadow_timekeeper; + bool aux_clock = !timekeeper_is_core_tk(tks); struct timespec64 ts; + s32 orig_tai, tai; int ret; /* Validate the data before disabling interrupts */ - ret = timekeeping_validate_timex(txc); + ret = timekeeping_validate_timex(txc, aux_clock); if (ret) return ret; add_device_randomness(txc, sizeof(*txc)); - if (txc->modes & ADJ_SETOFFSET) { - struct timespec64 delta; + if (!aux_clock) + ktime_get_real_ts64(&ts); + else + tk_get_aux_ts64(tkd->timekeeper.id, &ts); - delta.tv_sec = txc->time.tv_sec; - delta.tv_nsec = txc->time.tv_usec; + add_device_randomness(&ts, sizeof(ts)); + + guard(raw_spinlock_irqsave)(&tkd->lock); + + if (!tks->clock_valid) + return -ENODEV; + + if (txc->modes & ADJ_SETOFFSET) { + result->delta.tv_sec = txc->time.tv_sec; + result->delta.tv_nsec = txc->time.tv_usec; if (!(txc->modes & ADJ_NANO)) - delta.tv_nsec *= 1000; - ret = timekeeping_inject_offset(&delta); + result->delta.tv_nsec *= 1000; + ret = __timekeeping_inject_offset(tkd, &result->delta); if (ret) return ret; - - offset_set = delta.tv_sec != 0; - audit_tk_injoffset(delta); + result->clock_set = true; } - audit_ntp_init(&ad); + orig_tai = tai = tks->tai_offset; + ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad); - ktime_get_real_ts64(&ts); - add_device_randomness(&ts, sizeof(ts)); + if (tai != orig_tai) { + __timekeeping_set_tai_offset(tks, tai); + timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); + result->clock_set = true; + } else { + tk_update_leap_state_all(&tk_core); + } - scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { - struct timekeeper *tks = &tk_core.shadow_timekeeper; - s32 orig_tai, tai; + /* Update the multiplier immediately if frequency was set directly */ + if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) + result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); - orig_tai = tai = tks->tai_offset; - ret = __do_adjtimex(txc, &ts, &tai, &ad); + return ret; +} - if (tai != orig_tai) { - __timekeeping_set_tai_offset(tks, tai); - timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); - clock_set = true; - } else { - tk_update_leap_state_all(&tk_core); - } - } +/** + * do_adjtimex() - Accessor function to NTP __do_adjtimex function + * @txc: Pointer to kernel_timex structure containing NTP parameters + */ +int do_adjtimex(struct __kernel_timex *txc) +{ + struct adjtimex_result result = { }; + int ret; - audit_ntp_log(&ad); + ret = __do_adjtimex(&tk_core, txc, &result); + if (ret < 0) + return ret; - /* Update the multiplier immediately if frequency was set directly */ - if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) - clock_set |= timekeeping_advance(TK_ADV_FREQ); + if (txc->modes & ADJ_SETOFFSET) + audit_tk_injoffset(result.delta); - if (clock_set) + audit_ntp_log(&result.ad); + + if (result.clock_set) clock_was_set(CLOCK_SET_WALL); - ntp_notify_cmos_timer(offset_set); + ntp_notify_cmos_timer(result.delta.tv_sec != 0); return ret; } +/* + * Invoked from NTP with the time keeper lock held, so lockless access is + * fine. + */ +long ktime_get_ntp_seconds(unsigned int id) +{ + return timekeeper_data[id].timekeeper.xtime_sec; +} + #ifdef CONFIG_NTP_PPS /** * hardpps() - Accessor function to NTP __hardpps function @@ -2607,3 +2773,316 @@ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) } EXPORT_SYMBOL(hardpps); #endif /* CONFIG_NTP_PPS */ + +#ifdef CONFIG_POSIX_AUX_CLOCKS +#include "posix-timers.h" + +/* + * Bitmap for the activated auxiliary timekeepers to allow lockless quick + * checks in the hot paths without touching extra cache lines. If set, then + * the state of the corresponding timekeeper has to be re-checked under + * timekeeper::lock. + */ +static unsigned long aux_timekeepers; + +static inline unsigned int clockid_to_tkid(unsigned int id) +{ + return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX; +} + +static inline struct tk_data *aux_get_tk_data(clockid_t id) +{ + if (!clockid_aux_valid(id)) + return NULL; + return &timekeeper_data[clockid_to_tkid(id)]; +} + +/* Invoked from timekeeping after a clocksource change */ +static void tk_aux_update_clocksource(void) +{ + unsigned long active = READ_ONCE(aux_timekeepers); + unsigned int id; + + for_each_set_bit(id, &active, BITS_PER_LONG) { + struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; + struct timekeeper *tks = &tkd->shadow_timekeeper; + + guard(raw_spinlock_irqsave)(&tkd->lock); + if (!tks->clock_valid) + continue; + + timekeeping_forward_now(tks); + tk_setup_internals(tks, tk_core.timekeeper.tkr_mono.clock); + timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); + } +} + +static void tk_aux_advance(void) +{ + unsigned long active = READ_ONCE(aux_timekeepers); + unsigned int id; + + /* Lockless quick check to avoid extra cache lines */ + for_each_set_bit(id, &active, BITS_PER_LONG) { + struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; + + guard(raw_spinlock)(&aux_tkd->lock); + if (aux_tkd->shadow_timekeeper.clock_valid) + __timekeeping_advance(aux_tkd, TK_ADV_TICK); + } +} + +/** + * ktime_get_aux - Get time for a AUX clock + * @id: ID of the clock to read (CLOCK_AUX...) + * @kt: Pointer to ktime_t to store the time stamp + * + * Returns: True if the timestamp is valid, false otherwise + */ +bool ktime_get_aux(clockid_t id, ktime_t *kt) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct timekeeper *aux_tk; + unsigned int seq; + ktime_t base; + u64 nsecs; + + WARN_ON(timekeeping_suspended); + + if (!aux_tkd) + return false; + + aux_tk = &aux_tkd->timekeeper; + do { + seq = read_seqcount_begin(&aux_tkd->seq); + if (!aux_tk->clock_valid) + return false; + + base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux); + nsecs = timekeeping_get_ns(&aux_tk->tkr_mono); + } while (read_seqcount_retry(&aux_tkd->seq, seq)); + + *kt = ktime_add_ns(base, nsecs); + return true; +} +EXPORT_SYMBOL_GPL(ktime_get_aux); + +/** + * ktime_get_aux_ts64 - Get time for a AUX clock + * @id: ID of the clock to read (CLOCK_AUX...) + * @ts: Pointer to timespec64 to store the time stamp + * + * Returns: True if the timestamp is valid, false otherwise + */ +bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) +{ + ktime_t now; + + if (!ktime_get_aux(id, &now)) + return false; + *ts = ktime_to_timespec64(now); + return true; +} +EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); + +static int aux_get_res(clockid_t id, struct timespec64 *tp) +{ + if (!clockid_aux_valid(id)) + return -ENODEV; + + tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC; + tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC; + return 0; +} + +static int aux_get_timespec(clockid_t id, struct timespec64 *tp) +{ + return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; +} + +static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct timekeeper *aux_tks; + ktime_t tnow, nsecs; + + if (!timespec64_valid_settod(tnew)) + return -EINVAL; + if (!aux_tkd) + return -ENODEV; + + aux_tks = &aux_tkd->shadow_timekeeper; + + guard(raw_spinlock_irq)(&aux_tkd->lock); + if (!aux_tks->clock_valid) + return -ENODEV; + + /* Forward the timekeeper base time */ + timekeeping_forward_now(aux_tks); + /* + * Get the updated base time. tkr_mono.base has not been + * updated yet, so do that first. That makes the update + * in timekeeping_update_from_shadow() redundant, but + * that's harmless. After that @tnow can be calculated + * by using tkr_mono::cycle_last, which has been set + * by timekeeping_forward_now(). + */ + tk_update_ktime_data(aux_tks); + nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last); + tnow = ktime_add(aux_tks->tkr_mono.base, nsecs); + + /* + * Calculate the new AUX offset as delta to @tnow ("monotonic"). + * That avoids all the tk::xtime back and forth conversions as + * xtime ("realtime") is not applicable for auxiliary clocks and + * kept in sync with "monotonic". + */ + aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow); + + timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); + return 0; +} + +static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct adjtimex_result result = { }; + + if (!aux_tkd) + return -ENODEV; + + /* + * @result is ignored for now as there are neither hrtimers nor a + * RTC related to auxiliary clocks for now. + */ + return __do_adjtimex(aux_tkd, txc, &result); +} + +const struct k_clock clock_aux = { + .clock_getres = aux_get_res, + .clock_get_timespec = aux_get_timespec, + .clock_set = aux_clock_set, + .clock_adj = aux_clock_adj, +}; + +static void aux_clock_enable(clockid_t id) +{ + struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw; + struct tk_data *aux_tkd = aux_get_tk_data(id); + struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper; + + /* Prevent the core timekeeper from changing. */ + guard(raw_spinlock_irq)(&tk_core.lock); + + /* + * Setup the auxiliary clock assuming that the raw core timekeeper + * clock frequency conversion is close enough. Userspace has to + * adjust for the deviation via clock_adjtime(2). + */ + guard(raw_spinlock_nested)(&aux_tkd->lock); + + /* Remove leftovers of a previous registration */ + memset(aux_tks, 0, sizeof(*aux_tks)); + /* Restore the timekeeper id */ + aux_tks->id = aux_tkd->timekeeper.id; + /* Setup the timekeeper based on the current system clocksource */ + tk_setup_internals(aux_tks, tkr_raw->clock); + + /* Mark it valid and set it live */ + aux_tks->clock_valid = true; + timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); +} + +static void aux_clock_disable(clockid_t id) +{ + struct tk_data *aux_tkd = aux_get_tk_data(id); + + guard(raw_spinlock_irq)(&aux_tkd->lock); + aux_tkd->shadow_timekeeper.clock_valid = false; + timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); +} + +static DEFINE_MUTEX(aux_clock_mutex); + +static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + /* Lazy atoi() as name is "0..7" */ + int id = kobj->name[0] & 0x7; + bool enable; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (kstrtobool(buf, &enable) < 0) + return -EINVAL; + + guard(mutex)(&aux_clock_mutex); + if (enable == test_bit(id, &aux_timekeepers)) + return count; + + if (enable) { + aux_clock_enable(CLOCK_AUX + id); + set_bit(id, &aux_timekeepers); + } else { + aux_clock_disable(CLOCK_AUX + id); + clear_bit(id, &aux_timekeepers); + } + return count; +} + +static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + unsigned long active = READ_ONCE(aux_timekeepers); + /* Lazy atoi() as name is "0..7" */ + int id = kobj->name[0] & 0x7; + + return sysfs_emit(buf, "%d\n", test_bit(id, &active)); +} + +static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable); + +static struct attribute *aux_clock_enable_attrs[] = { + &aux_clock_enable_attr.attr, + NULL +}; + +static const struct attribute_group aux_clock_enable_attr_group = { + .attrs = aux_clock_enable_attrs, +}; + +static int __init tk_aux_sysfs_init(void) +{ + struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); + + if (!tko) + return -ENOMEM; + + auxo = kobject_create_and_add("aux_clocks", tko); + if (!auxo) { + kobject_put(tko); + return -ENOMEM; + } + + for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { + char id[2] = { [0] = '0' + i, }; + struct kobject *clk = kobject_create_and_add(id, auxo); + + if (!clk) + return -ENOMEM; + + int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); + + if (ret) + return ret; + } + return 0; +} +late_initcall(tk_aux_sysfs_init); + +static __init void tk_aux_setup(void) +{ + for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) + tkd_basic_setup(&timekeeper_data[i], i, false); +} +#endif /* CONFIG_POSIX_AUX_CLOCKS */ diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 8c9079108ffb..973ede670a36 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -45,4 +45,7 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta) unsigned long timekeeper_lock_irqsave(void); void timekeeper_unlock_irqrestore(unsigned long flags); +/* NTP specific interface to access the current seconds value */ +long ktime_get_ntp_seconds(unsigned int id); + #endif /* _TIMEKEEPING_INTERNAL_H */ diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 32ef27c71b57..8ba8b0d8a387 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -15,26 +15,25 @@ #include "timekeeping_internal.h" +static inline void fill_clock_configuration(struct vdso_clock *vc, const struct tk_read_base *base) +{ + vc->cycle_last = base->cycle_last; +#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT + vc->max_cycles = base->clock->max_cycles; +#endif + vc->mask = base->mask; + vc->mult = base->mult; + vc->shift = base->shift; +} + static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk) { struct vdso_clock *vc = vdata->clock_data; struct vdso_timestamp *vdso_ts; u64 nsec, sec; - vc[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; -#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vc[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; -#endif - vc[CS_HRES_COARSE].mask = tk->tkr_mono.mask; - vc[CS_HRES_COARSE].mult = tk->tkr_mono.mult; - vc[CS_HRES_COARSE].shift = tk->tkr_mono.shift; - vc[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; -#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vc[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; -#endif - vc[CS_RAW].mask = tk->tkr_raw.mask; - vc[CS_RAW].mult = tk->tkr_raw.mult; - vc[CS_RAW].shift = tk->tkr_raw.shift; + fill_clock_configuration(&vc[CS_HRES_COARSE], &tk->tkr_mono); + fill_clock_configuration(&vc[CS_RAW], &tk->tkr_raw); /* CLOCK_MONOTONIC */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; @@ -119,7 +118,8 @@ void update_vsyscall(struct timekeeper *tk) if (clock_mode != VDSO_CLOCKMODE_NONE) update_vdso_time_data(vdata, tk); - __arch_update_vsyscall(vdata); + __arch_update_vdso_clock(&vc[CS_HRES_COARSE]); + __arch_update_vdso_clock(&vc[CS_RAW]); vdso_write_end(vdata); @@ -136,6 +136,46 @@ void update_vsyscall_tz(void) __arch_sync_vdso_time_data(vdata); } +#ifdef CONFIG_POSIX_AUX_CLOCKS +void vdso_time_update_aux(struct timekeeper *tk) +{ + struct vdso_time_data *vdata = vdso_k_time_data; + struct vdso_timestamp *vdso_ts; + struct vdso_clock *vc; + s32 clock_mode; + u64 nsec; + + vc = &vdata->aux_clock_data[tk->id - TIMEKEEPER_AUX_FIRST]; + vdso_ts = &vc->basetime[VDSO_BASE_AUX]; + clock_mode = tk->tkr_mono.clock->vdso_clock_mode; + if (!tk->clock_valid) + clock_mode = VDSO_CLOCKMODE_NONE; + + /* copy vsyscall data */ + vdso_write_begin_clock(vc); + + vc->clock_mode = clock_mode; + + if (clock_mode != VDSO_CLOCKMODE_NONE) { + fill_clock_configuration(vc, &tk->tkr_mono); + + vdso_ts->sec = tk->xtime_sec; + + nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec += tk->offs_aux; + vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); + nsec = nsec << tk->tkr_mono.shift; + vdso_ts->nsec = nsec; + } + + __arch_update_vdso_clock(vc); + + vdso_write_end_clock(vc); + + __arch_sync_vdso_time_data(vdata); +} +#endif + /** * vdso_update_begin - Start of a VDSO update section * diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 93ef801a97ef..02ea19f67164 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -2,6 +2,7 @@ /* * Generic userspace implementations of gettimeofday() and similar. */ +#include <vdso/auxclock.h> #include <vdso/datapage.h> #include <vdso/helpers.h> @@ -71,6 +72,42 @@ static inline bool vdso_cycles_ok(u64 cycles) } #endif +static __always_inline bool vdso_clockid_valid(clockid_t clock) +{ + /* Check for negative values or invalid clocks */ + return likely((u32) clock <= CLOCK_AUX_LAST); +} + +/* + * Must not be invoked within the sequence read section as a race inside + * that loop could result in __iter_div_u64_rem() being extremely slow. + */ +static __always_inline void vdso_set_timespec(struct __kernel_timespec *ts, u64 sec, u64 ns) +{ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; +} + +static __always_inline +bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock *vc, + unsigned int clkidx, u64 *sec, u64 *ns) +{ + const struct vdso_timestamp *vdso_ts = &vc->basetime[clkidx]; + u64 cycles; + + if (unlikely(!vdso_clocksource_ok(vc))) + return false; + + cycles = __arch_get_hw_counter(vc->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(cycles))) + return false; + + *ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); + *sec = vdso_ts->sec; + + return true; +} + #ifdef CONFIG_TIME_NS #ifdef CONFIG_GENERIC_VDSO_DATA_STORE @@ -82,48 +119,35 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ static __always_inline -int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; const struct vdso_clock *vc = vd->clock_data; - const struct vdso_timestamp *vdso_ts; - u64 cycles, ns; u32 seq; s64 sec; + u64 ns; if (clk != CLOCK_MONOTONIC_RAW) vc = &vc[CS_HRES_COARSE]; else vc = &vc[CS_RAW]; - vdso_ts = &vc->basetime[clk]; do { seq = vdso_read_begin(vc); - if (unlikely(!vdso_clocksource_ok(vc))) - return -1; - - cycles = __arch_get_hw_counter(vc->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) - return -1; - ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); - sec = vdso_ts->sec; + if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) + return false; } while (unlikely(vdso_read_retry(vc, seq))); /* Add the namespace offset */ sec += offs->sec; ns += offs->nsec; - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + vdso_set_timespec(ts, sec, ns); - return 0; + return true; } #else static __always_inline @@ -133,24 +157,23 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim } static __always_inline -int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - return -EINVAL; + return false; } #endif static __always_inline -int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; - u64 cycles, sec, ns; + u64 sec, ns; u32 seq; /* Allows to compile the high resolution parts out */ if (!__arch_vdso_hres_capable()) - return -1; + return false; do { /* @@ -172,30 +195,19 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, } smp_rmb(); - if (unlikely(!vdso_clocksource_ok(vc))) - return -1; - - cycles = __arch_get_hw_counter(vc->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) - return -1; - ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); - sec = vdso_ts->sec; + if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) + return false; } while (unlikely(vdso_read_retry(vc, seq))); - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + vdso_set_timespec(ts, sec, ns); - return 0; + return true; } #ifdef CONFIG_TIME_NS static __always_inline -int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; @@ -217,26 +229,22 @@ int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock sec += offs->sec; nsec += offs->nsec; - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); - ts->tv_nsec = nsec; - return 0; + vdso_set_timespec(ts, sec, nsec); + + return true; } #else static __always_inline -int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - return -1; + return false; } #endif static __always_inline -int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; u32 seq; @@ -258,19 +266,60 @@ int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, ts->tv_nsec = vdso_ts->nsec; } while (unlikely(vdso_read_retry(vc, seq))); - return 0; + return true; +} + +static __always_inline +bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) +{ + const struct vdso_clock *vc; + u32 seq, idx; + u64 sec, ns; + + if (!IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + return false; + + idx = clock - CLOCK_AUX; + vc = &vd->aux_clock_data[idx]; + + do { + /* + * Open coded function vdso_read_begin() to handle + * VDSO_CLOCK_TIMENS. See comment in do_hres(). + */ + while ((seq = READ_ONCE(vc->seq)) & 1) { + if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { + vd = __arch_get_vdso_u_timens_data(vd); + vc = &vd->aux_clock_data[idx]; + /* Re-read from the real time data page */ + continue; + } + cpu_relax(); + } + smp_rmb(); + + /* Auxclock disabled? */ + if (vc->clock_mode == VDSO_CLOCKMODE_NONE) + return false; + + if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns)) + return false; + } while (unlikely(vdso_read_retry(vc, seq))); + + vdso_set_timespec(ts, sec, ns); + + return true; } -static __always_inline int +static __always_inline bool __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { const struct vdso_clock *vc = vd->clock_data; u32 msk; - /* Check for negative values or invalid clocks */ - if (unlikely((u32) clock >= MAX_CLOCKS)) - return -1; + if (!vdso_clockid_valid(clock)) + return false; /* * Convert the clockid to a bitmask and use it to check which @@ -283,8 +332,10 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts); else if (msk & VDSO_RAW) vc = &vc[CS_RAW]; + else if (msk & VDSO_AUX) + return do_aux(vd, clock, ts); else - return -1; + return false; return do_hres(vd, vc, clock, ts); } @@ -293,9 +344,11 @@ static __maybe_unused int __cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { - int ret = __cvdso_clock_gettime_common(vd, clock, ts); + bool ok; + + ok = __cvdso_clock_gettime_common(vd, clock, ts); - if (unlikely(ret)) + if (unlikely(!ok)) return clock_gettime_fallback(clock, ts); return 0; } @@ -312,18 +365,18 @@ __cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { struct __kernel_timespec ts; - int ret; + bool ok; - ret = __cvdso_clock_gettime_common(vd, clock, &ts); + ok = __cvdso_clock_gettime_common(vd, clock, &ts); - if (unlikely(ret)) + if (unlikely(!ok)) return clock_gettime32_fallback(clock, res); - /* For ret == 0 */ + /* For ok == true */ res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; - return ret; + return 0; } static __maybe_unused int @@ -342,7 +395,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd, if (likely(tv != NULL)) { struct __kernel_timespec ts; - if (do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) + if (!do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) return gettimeofday_fallback(tv, tz); tv->tv_sec = ts.tv_sec; @@ -396,16 +449,15 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time #ifdef VDSO_HAS_CLOCK_GETRES static __maybe_unused -int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, - struct __kernel_timespec *res) +bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, + struct __kernel_timespec *res) { const struct vdso_clock *vc = vd->clock_data; u32 msk; u64 ns; - /* Check for negative values or invalid clocks */ - if (unlikely((u32) clock >= MAX_CLOCKS)) - return -1; + if (!vdso_clockid_valid(clock)) + return false; if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) @@ -426,24 +478,28 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock * Preserves the behaviour of posix_get_coarse_res(). */ ns = LOW_RES_NSEC; + } else if (msk & VDSO_AUX) { + ns = aux_clock_resolution_ns(); } else { - return -1; + return false; } if (likely(res)) { res->tv_sec = 0; res->tv_nsec = ns; } - return 0; + return true; } static __maybe_unused int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { - int ret = __cvdso_clock_getres_common(vd, clock, res); + bool ok; - if (unlikely(ret)) + ok = __cvdso_clock_getres_common(vd, clock, res); + + if (unlikely(!ok)) return clock_getres_fallback(clock, res); return 0; } @@ -460,18 +516,18 @@ __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t cloc struct old_timespec32 *res) { struct __kernel_timespec ts; - int ret; + bool ok; - ret = __cvdso_clock_getres_common(vd, clock, &ts); + ok = __cvdso_clock_getres_common(vd, clock, &ts); - if (unlikely(ret)) + if (unlikely(!ok)) return clock_getres32_fallback(clock, res); if (likely(res)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } - return ret; + return 0; } static __maybe_unused int |