summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-kernel-time-aux-clocks5
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h7
-rw-r--r--include/asm-generic/vdso/vsyscall.h6
-rw-r--r--include/linux/posix-timers.h5
-rw-r--r--include/linux/timekeeper_internal.h37
-rw-r--r--include/linux/timekeeping.h12
-rw-r--r--include/uapi/linux/time.h11
-rw-r--r--include/vdso/auxclock.h13
-rw-r--r--include/vdso/datapage.h5
-rw-r--r--include/vdso/helpers.h50
-rw-r--r--kernel/time/Kconfig15
-rw-r--r--kernel/time/namespace.c5
-rw-r--r--kernel/time/ntp.c72
-rw-r--r--kernel/time/ntp_internal.h13
-rw-r--r--kernel/time/posix-timers.c3
-rw-r--r--kernel/time/posix-timers.h1
-rw-r--r--kernel/time/timekeeping.c629
-rw-r--r--kernel/time/timekeeping_internal.h3
-rw-r--r--kernel/time/vsyscall.c70
-rw-r--r--lib/vdso/gettimeofday.c224
20 files changed, 950 insertions, 236 deletions
diff --git a/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
new file mode 100644
index 000000000000..825508f42af6
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
@@ -0,0 +1,5 @@
+What: /sys/kernel/time/aux_clocks/<ID>/enable
+Date: May 2025
+Contact: Thomas Gleixner <tglx@linutronix.de>
+Description:
+ Controls the enablement of auxiliary clock timekeepers.
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index de58951b8df6..417aae5763a8 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -13,12 +13,11 @@
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
static __always_inline
-void __arm64_update_vsyscall(struct vdso_time_data *vdata)
+void __arch_update_vdso_clock(struct vdso_clock *vc)
{
- vdata->clock_data[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
- vdata->clock_data[CS_RAW].mask = VDSO_PRECISION_MASK;
+ vc->mask = VDSO_PRECISION_MASK;
}
-#define __arch_update_vsyscall __arm64_update_vsyscall
+#define __arch_update_vdso_clock __arch_update_vdso_clock
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index b550afa15ecd..7fc0b560007d 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -22,11 +22,11 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo
#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
-#ifndef __arch_update_vsyscall
-static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata)
+#ifndef __arch_update_vdso_clock
+static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc)
{
}
-#endif /* __arch_update_vsyscall */
+#endif /* __arch_update_vdso_clock */
#ifndef __arch_sync_vdso_time_data
static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index dd48c64b605e..4d3dbcef379e 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -37,6 +37,11 @@ static inline int clockid_to_fd(const clockid_t clk)
return ~(clk >> 3);
}
+static inline bool clockid_aux_valid(clockid_t id)
+{
+ return IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) && id >= CLOCK_AUX && id <= CLOCK_AUX_LAST;
+}
+
#ifdef CONFIG_POSIX_TIMERS
#include <linux/signal_types.h>
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 785048a3b3e6..c27aac67cb3f 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -12,6 +12,22 @@
#include <linux/time.h>
/**
+ * timekeeper_ids - IDs for various time keepers in the kernel
+ * @TIMEKEEPER_CORE: The central core timekeeper managing system time
+ * @TIMEKEEPER_AUX_FIRST: The first AUX timekeeper
+ * @TIMEKEEPER_AUX_LAST: The last AUX timekeeper
+ * @TIMEKEEPERS_MAX: The maximum number of timekeepers managed
+ */
+enum timekeeper_ids {
+ TIMEKEEPER_CORE,
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+ TIMEKEEPER_AUX_FIRST,
+ TIMEKEEPER_AUX_LAST = TIMEKEEPER_AUX_FIRST + MAX_AUX_CLOCKS - 1,
+#endif
+ TIMEKEEPERS_MAX,
+};
+
+/**
* struct tk_read_base - base structure for timekeeping readout
* @clock: Current clocksource used for timekeeping.
* @mask: Bitmask for two's complement subtraction of non 64bit clocks
@@ -51,11 +67,14 @@ struct tk_read_base {
* @offs_real: Offset clock monotonic -> clock realtime
* @offs_boot: Offset clock monotonic -> clock boottime
* @offs_tai: Offset clock monotonic -> clock tai
+ * @offs_aux: Offset clock monotonic -> clock AUX
* @coarse_nsec: The nanoseconds part for coarse time getters
+ * @id: The timekeeper ID
* @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
* @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
* @clock_was_set_seq: The sequence number of clock was set events
* @cs_was_changed_seq: The sequence number of clocksource change events
+ * @clock_valid: Indicator for valid clock
* @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -95,13 +114,16 @@ struct tk_read_base {
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
* accelerate the VDSO update for CLOCK_BOOTTIME.
*
+ * @offs_aux is used by the auxiliary timekeepers which do not utilize any
+ * of the regular timekeeper offset fields.
+ *
* The cacheline ordering of the structure is optimized for in kernel usage of
* the ktime_get() and ktime_get_ts64() family of time accessors. Struct
* timekeeper is prepended in the core timekeeping code with a sequence count,
* which results in the following cacheline layout:
*
* 0: seqcount, tkr_mono
- * 1: xtime_sec ... coarse_nsec
+ * 1: xtime_sec ... id
* 2: tkr_raw, raw_sec
* 3,4: Internal variables
*
@@ -121,8 +143,12 @@ struct timekeeper {
struct timespec64 wall_to_monotonic;
ktime_t offs_real;
ktime_t offs_boot;
- ktime_t offs_tai;
+ union {
+ ktime_t offs_tai;
+ ktime_t offs_aux;
+ };
u32 coarse_nsec;
+ enum timekeeper_ids id;
/* Cacheline 2: */
struct tk_read_base tkr_raw;
@@ -131,6 +157,7 @@ struct timekeeper {
/* Cachline 3 and 4 (timekeeping internal variables): */
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
+ u8 clock_valid;
struct timespec64 monotonic_to_boot;
@@ -163,4 +190,10 @@ static inline void update_vsyscall_tz(void)
}
#endif
+#if defined(CONFIG_GENERIC_GETTIMEOFDAY) && defined(CONFIG_POSIX_AUX_CLOCKS)
+extern void vdso_time_update_aux(struct timekeeper *tk);
+#else
+static inline void vdso_time_update_aux(struct timekeeper *tk) { }
+#endif
+
#endif /* _LINUX_TIMEKEEPER_INTERNAL_H */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 542773650200..aee2c1a46e47 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts);
extern void ktime_get_real_ts64(struct timespec64 *tv);
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
+extern void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts);
/* Multigrain timestamp interfaces */
extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
@@ -263,6 +264,17 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
+/*
+ * Auxiliary clock interfaces
+ */
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+extern bool ktime_get_aux(clockid_t id, ktime_t *kt);
+extern bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt);
+#else
+static inline bool ktime_get_aux(clockid_t id, ktime_t *kt) { return false; }
+static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; }
+#endif
+
/**
* struct system_time_snapshot - simultaneous raw/real time capture with
* counter value
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 4f4b6e48e01c..16ca1ac206fd 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -64,6 +64,17 @@ struct timezone {
#define CLOCK_TAI 11
#define MAX_CLOCKS 16
+
+/*
+ * AUX clock support. AUXiliary clocks are dynamically configured by
+ * enabling a clock ID. These clock can be steered independently of the
+ * core timekeeper. The kernel can support up to 8 auxiliary clocks, but
+ * the actual limit depends on eventual architecture constraints vs. VDSO.
+ */
+#define CLOCK_AUX MAX_CLOCKS
+#define MAX_AUX_CLOCKS 8
+#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1)
+
#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
#define CLOCKS_MONO CLOCK_MONOTONIC
diff --git a/include/vdso/auxclock.h b/include/vdso/auxclock.h
new file mode 100644
index 000000000000..6d6e74cbc400
--- /dev/null
+++ b/include/vdso/auxclock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _VDSO_AUXCLOCK_H
+#define _VDSO_AUXCLOCK_H
+
+#include <uapi/linux/time.h>
+#include <uapi/linux/types.h>
+
+static __always_inline u64 aux_clock_resolution_ns(void)
+{
+ return 1;
+}
+
+#endif /* _VDSO_AUXCLOCK_H */
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 1864e76e8f69..02533038640e 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -5,6 +5,7 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
+#include <uapi/linux/bits.h>
#include <uapi/linux/time.h>
#include <uapi/linux/types.h>
#include <uapi/asm-generic/errno-base.h>
@@ -38,6 +39,7 @@ struct vdso_arch_data {
#endif
#define VDSO_BASES (CLOCK_TAI + 1)
+#define VDSO_BASE_AUX 0
#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
BIT(CLOCK_MONOTONIC) | \
BIT(CLOCK_BOOTTIME) | \
@@ -45,6 +47,7 @@ struct vdso_arch_data {
#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
BIT(CLOCK_MONOTONIC_COARSE))
#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
+#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX)
#define CS_HRES_COARSE 0
#define CS_RAW 1
@@ -117,6 +120,7 @@ struct vdso_clock {
* @arch_data: architecture specific data (optional, defaults
* to an empty struct)
* @clock_data: clocksource related data (array)
+ * @aux_clock_data: auxiliary clocksource related data (array)
* @tz_minuteswest: minutes west of Greenwich
* @tz_dsttime: type of DST correction
* @hrtimer_res: hrtimer resolution
@@ -133,6 +137,7 @@ struct vdso_time_data {
struct arch_vdso_time_data arch_data;
struct vdso_clock clock_data[CS_BASES];
+ struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS];
s32 tz_minuteswest;
s32 tz_dsttime;
diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h
index 0a98fed550ba..1a5ee9d9052c 100644
--- a/include/vdso/helpers.h
+++ b/include/vdso/helpers.h
@@ -28,17 +28,47 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc,
return seq != start;
}
-static __always_inline void vdso_write_begin(struct vdso_time_data *vd)
+static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc)
{
- struct vdso_clock *vc = vd->clock_data;
+ /*
+ * WRITE_ONCE() is required otherwise the compiler can validly tear
+ * updates to vc->seq and it is possible that the value seen by the
+ * reader is inconsistent.
+ */
+ WRITE_ONCE(vc->seq, vc->seq + 1);
+}
+static __always_inline void vdso_write_seq_end(struct vdso_clock *vc)
+{
/*
* WRITE_ONCE() is required otherwise the compiler can validly tear
- * updates to vd[x].seq and it is possible that the value seen by the
+ * updates to vc->seq and it is possible that the value seen by the
* reader is inconsistent.
*/
- WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1);
- WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1);
+ WRITE_ONCE(vc->seq, vc->seq + 1);
+}
+
+static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc)
+{
+ vdso_write_seq_begin(vc);
+ /* Ensure the sequence invalidation is visible before data is modified */
+ smp_wmb();
+}
+
+static __always_inline void vdso_write_end_clock(struct vdso_clock *vc)
+{
+ /* Ensure the data update is visible before the sequence is set valid again */
+ smp_wmb();
+ vdso_write_seq_end(vc);
+}
+
+static __always_inline void vdso_write_begin(struct vdso_time_data *vd)
+{
+ struct vdso_clock *vc = vd->clock_data;
+
+ vdso_write_seq_begin(&vc[CS_HRES_COARSE]);
+ vdso_write_seq_begin(&vc[CS_RAW]);
+ /* Ensure the sequence invalidation is visible before data is modified */
smp_wmb();
}
@@ -46,14 +76,10 @@ static __always_inline void vdso_write_end(struct vdso_time_data *vd)
{
struct vdso_clock *vc = vd->clock_data;
+ /* Ensure the data update is visible before the sequence is set valid again */
smp_wmb();
- /*
- * WRITE_ONCE() is required otherwise the compiler can validly tear
- * updates to vd[x].seq and it is possible that the value seen by the
- * reader is inconsistent.
- */
- WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1);
- WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1);
+ vdso_write_seq_end(&vc[CS_HRES_COARSE]);
+ vdso_write_seq_end(&vc[CS_RAW]);
}
#endif /* !__ASSEMBLY__ */
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index b0b97a60aaa6..7c6a52f7836c 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -82,9 +82,9 @@ config CONTEXT_TRACKING_IDLE
help
Tracks idle state on behalf of RCU.
-if GENERIC_CLOCKEVENTS
menu "Timers subsystem"
+if GENERIC_CLOCKEVENTS
# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
# only related to the tick functionality. Oneshot clockevent devices
# are supported independent of this.
@@ -208,6 +208,17 @@ config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
interval and NTP's maximum frequency drift of 500 parts
per million. If the clocksource is good enough for NTP,
it is good enough for the clocksource watchdog!
+endif
+
+config POSIX_AUX_CLOCKS
+ bool "Enable auxiliary POSIX clocks"
+ depends on POSIX_TIMERS
+ help
+ Auxiliary POSIX clocks are clocks which can be steered
+ independently of the core timekeeper, which controls the
+ MONOTONIC, REALTIME, BOOTTIME and TAI clocks. They are useful to
+ provide e.g. lockless time accessors to independent PTP clocks
+ and other clock domains, which are not correlated to the TAI/NTP
+ notion of time.
endmenu
-endif
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index e3642278df43..667452768ed3 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -242,6 +242,11 @@ static void timens_set_vvar_page(struct task_struct *task,
for (i = 0; i < CS_BASES; i++)
timens_setup_vdso_clock_data(&vc[i], ns);
+ if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
+ for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
+ timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
+ }
+
out:
mutex_unlock(&offset_lock);
}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b837d3d9d325..97fa99b96dd0 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/audit.h>
+#include <linux/timekeeper_internal.h>
#include "ntp_internal.h"
#include "timekeeping_internal.h"
@@ -86,14 +87,16 @@ struct ntp_data {
#endif
};
-static struct ntp_data tk_ntp_data = {
- .tick_usec = USER_TICK_USEC,
- .time_state = TIME_OK,
- .time_status = STA_UNSYNC,
- .time_constant = 2,
- .time_maxerror = NTP_PHASE_LIMIT,
- .time_esterror = NTP_PHASE_LIMIT,
- .ntp_next_leap_sec = TIME64_MAX,
+static struct ntp_data tk_ntp_data[TIMEKEEPERS_MAX] = {
+ [ 0 ... TIMEKEEPERS_MAX - 1 ] = {
+ .tick_usec = USER_TICK_USEC,
+ .time_state = TIME_OK,
+ .time_status = STA_UNSYNC,
+ .time_constant = 2,
+ .time_maxerror = NTP_PHASE_LIMIT,
+ .time_esterror = NTP_PHASE_LIMIT,
+ .ntp_next_leap_sec = TIME64_MAX,
+ },
};
#define SECS_PER_DAY 86400
@@ -300,7 +303,7 @@ static void ntp_update_offset(struct ntp_data *ntpdata, long offset)
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
- real_secs = __ktime_get_real_seconds();
+ real_secs = ktime_get_ntp_seconds(ntpdata - tk_ntp_data);
secs = (long)(real_secs - ntpdata->time_reftime);
if (unlikely(ntpdata->time_status & STA_FREQHOLD))
secs = 0;
@@ -348,33 +351,38 @@ static void __ntp_clear(struct ntp_data *ntpdata)
/**
* ntp_clear - Clears the NTP state variables
+ * @tkid: Timekeeper ID to be able to select proper ntp data array member
*/
-void ntp_clear(void)
+void ntp_clear(unsigned int tkid)
{
- __ntp_clear(&tk_ntp_data);
+ __ntp_clear(&tk_ntp_data[tkid]);
}
-u64 ntp_tick_length(void)
+u64 ntp_tick_length(unsigned int tkid)
{
- return tk_ntp_data.tick_length;
+ return tk_ntp_data[tkid].tick_length;
}
/**
* ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t
+ * @tkid: Timekeeper ID
*
- * Provides the time of the next leapsecond against CLOCK_REALTIME in
- * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending.
+ * Returns: For @tkid == TIMEKEEPER_CORE this provides the time of the next
+ * leap second against CLOCK_REALTIME in a ktime_t format if a
+ * leap second is pending. KTIME_MAX otherwise.
*/
-ktime_t ntp_get_next_leap(void)
+ktime_t ntp_get_next_leap(unsigned int tkid)
{
- struct ntp_data *ntpdata = &tk_ntp_data;
- ktime_t ret;
+ struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE];
+
+ if (tkid != TIMEKEEPER_CORE)
+ return KTIME_MAX;
if ((ntpdata->time_state == TIME_INS) && (ntpdata->time_status & STA_INS))
return ktime_set(ntpdata->ntp_next_leap_sec, 0);
- ret = KTIME_MAX;
- return ret;
+
+ return KTIME_MAX;
}
/*
@@ -387,9 +395,9 @@ ktime_t ntp_get_next_leap(void)
*
* Also handles leap second processing, and returns leap offset
*/
-int second_overflow(time64_t secs)
+int second_overflow(unsigned int tkid, time64_t secs)
{
- struct ntp_data *ntpdata = &tk_ntp_data;
+ struct ntp_data *ntpdata = &tk_ntp_data[tkid];
s64 delta;
int leap = 0;
s32 rem;
@@ -605,7 +613,7 @@ static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_ns
*/
static inline bool ntp_synced(void)
{
- return !(tk_ntp_data.time_status & STA_UNSYNC);
+ return !(tk_ntp_data[TIMEKEEPER_CORE].time_status & STA_UNSYNC);
}
/*
@@ -702,7 +710,7 @@ static inline void process_adj_status(struct ntp_data *ntpdata, const struct __k
* reference time to current time.
*/
if (!(ntpdata->time_status & STA_PLL) && (txc->status & STA_PLL))
- ntpdata->time_reftime = __ktime_get_real_seconds();
+ ntpdata->time_reftime = ktime_get_ntp_seconds(ntpdata - tk_ntp_data);
/* only set allowed bits */
ntpdata->time_status &= STA_RONLY;
@@ -759,10 +767,10 @@ static inline void process_adjtimex_modes(struct ntp_data *ntpdata, const struct
* adjtimex() mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
-int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
- s32 *time_tai, struct audit_ntp_data *ad)
+int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts,
+ s32 *time_tai, struct audit_ntp_data *ad)
{
- struct ntp_data *ntpdata = &tk_ntp_data;
+ struct ntp_data *ntpdata = &tk_ntp_data[tkid];
int result;
if (txc->modes & ADJ_ADJTIME) {
@@ -1031,8 +1039,8 @@ static void hardpps_update_phase(struct ntp_data *ntpdata, long error)
*/
void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
{
+ struct ntp_data *ntpdata = &tk_ntp_data[TIMEKEEPER_CORE];
struct pps_normtime pts_norm, freq_norm;
- struct ntp_data *ntpdata = &tk_ntp_data;
pts_norm = pps_normalize_ts(*phase_ts);
@@ -1083,18 +1091,18 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t
static int __init ntp_tick_adj_setup(char *str)
{
- int rc = kstrtos64(str, 0, &tk_ntp_data.ntp_tick_adj);
+ int rc = kstrtos64(str, 0, &tk_ntp_data[TIMEKEEPER_CORE].ntp_tick_adj);
if (rc)
return rc;
- tk_ntp_data.ntp_tick_adj <<= NTP_SCALE_SHIFT;
+ tk_ntp_data[TIMEKEEPER_CORE].ntp_tick_adj <<= NTP_SCALE_SHIFT;
return 1;
}
-
__setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
- ntp_clear();
+ for (int id = 0; id < TIMEKEEPERS_MAX; id++)
+ __ntp_clear(tk_ntp_data + id);
ntp_init_cmos_sync();
}
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 5a633dce9057..7084d839c207 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -3,14 +3,13 @@
#define _LINUX_NTP_INTERNAL_H
extern void ntp_init(void);
-extern void ntp_clear(void);
+extern void ntp_clear(unsigned int tkid);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
-extern u64 ntp_tick_length(void);
-extern ktime_t ntp_get_next_leap(void);
-extern int second_overflow(time64_t secs);
-extern int __do_adjtimex(struct __kernel_timex *txc,
- const struct timespec64 *ts,
- s32 *time_tai, struct audit_ntp_data *ad);
+extern u64 ntp_tick_length(unsigned int tkid);
+extern ktime_t ntp_get_next_leap(unsigned int tkid);
+extern int second_overflow(unsigned int tkid, time64_t secs);
+extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, const struct timespec64 *ts,
+ s32 *time_tai, struct audit_ntp_data *ad);
extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 2053b1a4c9e4..8b582174b1f9 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1526,6 +1526,9 @@ static const struct k_clock * const posix_clocks[] = {
[CLOCK_REALTIME_ALARM] = &alarm_clock,
[CLOCK_BOOTTIME_ALARM] = &alarm_clock,
[CLOCK_TAI] = &clock_tai,
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+ [CLOCK_AUX ... CLOCK_AUX_LAST] = &clock_aux,
+#endif
};
static const struct k_clock *clockid_to_kclock(const clockid_t id)
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index 61906f0688c1..7f259e845d24 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -41,6 +41,7 @@ extern const struct k_clock clock_posix_dynamic;
extern const struct k_clock clock_process;
extern const struct k_clock clock_thread;
extern const struct k_clock alarm_clock;
+extern const struct k_clock clock_aux;
void posix_timer_queue_signal(struct k_itimer *timr);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 83c65f3afcca..059fa8b79be6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -6,6 +6,7 @@
#include <linux/timekeeper_internal.h>
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/kobject.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
@@ -25,6 +26,8 @@
#include <linux/audit.h>
#include <linux/random.h>
+#include <vdso/auxclock.h>
+
#include "tick-internal.h"
#include "ntp_internal.h"
#include "timekeeping_internal.h"
@@ -53,7 +56,32 @@ struct tk_data {
raw_spinlock_t lock;
} ____cacheline_aligned;
-static struct tk_data tk_core;
+static struct tk_data timekeeper_data[TIMEKEEPERS_MAX];
+
+/* The core timekeeper */
+#define tk_core (timekeeper_data[TIMEKEEPER_CORE])
+
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts)
+{
+ return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts);
+}
+
+static inline bool tk_is_aux(const struct timekeeper *tk)
+{
+ return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST;
+}
+#else
+static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts)
+{
+ return false;
+}
+
+static inline bool tk_is_aux(const struct timekeeper *tk)
+{
+ return false;
+}
+#endif
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
@@ -113,6 +141,16 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = {
.base[1] = FAST_TK_INIT,
};
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+static __init void tk_aux_setup(void);
+static void tk_aux_update_clocksource(void);
+static void tk_aux_advance(void);
+#else
+static inline void tk_aux_setup(void) { }
+static inline void tk_aux_update_clocksource(void) { }
+static inline void tk_aux_advance(void) { }
+#endif
+
unsigned long timekeeper_lock_irqsave(void)
{
unsigned long flags;
@@ -601,7 +639,7 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
*/
static inline void tk_update_leap_state(struct timekeeper *tk)
{
- tk->next_leap_ktime = ntp_get_next_leap();
+ tk->next_leap_ktime = ntp_get_next_leap(tk->id);
if (tk->next_leap_ktime != KTIME_MAX)
/* Convert to monotonic time */
tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
@@ -663,7 +701,7 @@ static void timekeeping_restore_shadow(struct tk_data *tkd)
static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action)
{
- struct timekeeper *tk = &tk_core.shadow_timekeeper;
+ struct timekeeper *tk = &tkd->shadow_timekeeper;
lockdep_assert_held(&tkd->lock);
@@ -678,18 +716,22 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act
if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
- ntp_clear();
+ ntp_clear(tk->id);
}
tk_update_leap_state(tk);
tk_update_ktime_data(tk);
+ tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
- update_vsyscall(tk);
- update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
+ if (tk->id == TIMEKEEPER_CORE) {
+ update_vsyscall(tk);
+ update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
- tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
- update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
- update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
+ update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+ update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
+ } else if (tk_is_aux(tk)) {
+ vdso_time_update_aux(tk);
+ }
if (action & TK_CLOCK_WAS_SET)
tk->clock_was_set_seq++;
@@ -975,9 +1017,14 @@ time64_t ktime_get_real_seconds(void)
EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
/**
- * __ktime_get_real_seconds - The same as ktime_get_real_seconds
- * but without the sequence counter protect. This internal function
- * is called just when timekeeping lock is already held.
+ * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds
+ *
+ * The same as ktime_get_real_seconds() but without the sequence counter
+ * protection. This function is used in restricted contexts like the x86 MCE
+ * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half
+ * completed modification and only to be used for such critical contexts.
+ *
+ * Returns: Racy snapshot of the CLOCK_REALTIME seconds value
*/
noinstr time64_t __ktime_get_real_seconds(void)
{
@@ -1412,41 +1459,73 @@ int do_settimeofday64(const struct timespec64 *ts)
}
EXPORT_SYMBOL(do_settimeofday64);
+static inline bool timekeeper_is_core_tk(struct timekeeper *tk)
+{
+ return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE;
+}
+
/**
- * timekeeping_inject_offset - Adds or subtracts from the current time.
+ * __timekeeping_inject_offset - Adds or subtracts from the current time.
+ * @tkd: Pointer to the timekeeper to modify
* @ts: Pointer to the timespec variable containing the offset
*
* Adds or subtracts an offset value from the current time.
*/
-static int timekeeping_inject_offset(const struct timespec64 *ts)
+static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts)
{
+ struct timekeeper *tks = &tkd->shadow_timekeeper;
+ struct timespec64 tmp;
+
if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
- scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
- struct timekeeper *tks = &tk_core.shadow_timekeeper;
- struct timespec64 tmp;
-
- timekeeping_forward_now(tks);
+ timekeeping_forward_now(tks);
+ if (timekeeper_is_core_tk(tks)) {
/* Make sure the proposed value is valid */
tmp = timespec64_add(tk_xtime(tks), *ts);
if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 ||
!timespec64_valid_settod(&tmp)) {
- timekeeping_restore_shadow(&tk_core);
+ timekeeping_restore_shadow(tkd);
return -EINVAL;
}
tk_xtime_add(tks, ts);
tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts));
- timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+ } else {
+ struct tk_read_base *tkr_mono = &tks->tkr_mono;
+ ktime_t now, offs;
+
+ /* Get the current time */
+ now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono));
+ /* Add the relative offset change */
+ offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts));
+
+ /* Prevent that the resulting time becomes negative */
+ if (ktime_add(now, offs) < 0) {
+ timekeeping_restore_shadow(tkd);
+ return -EINVAL;
+ }
+ tks->offs_aux = offs;
}
- /* Signal hrtimers about time change */
- clock_was_set(CLOCK_SET_WALL);
+ timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL);
return 0;
}
+static int timekeeping_inject_offset(const struct timespec64 *ts)
+{
+ int ret;
+
+ scoped_guard (raw_spinlock_irqsave, &tk_core.lock)
+ ret = __timekeeping_inject_offset(&tk_core, ts);
+
+ /* Signal hrtimers about time change */
+ if (!ret)
+ clock_was_set(CLOCK_SET_WALL);
+ return ret;
+}
+
/*
* Indicates if there is an offset between the system clock and the hardware
* clock/persistent clock/rtc.
@@ -1522,6 +1601,8 @@ static int change_clocksource(void *data)
timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
}
+ tk_aux_update_clocksource();
+
if (old) {
if (old->disable)
old->disable(old);
@@ -1573,6 +1654,39 @@ void ktime_get_raw_ts64(struct timespec64 *ts)
}
EXPORT_SYMBOL(ktime_get_raw_ts64);
+/**
+ * ktime_get_clock_ts64 - Returns time of a clock in a timespec
+ * @id: POSIX clock ID of the clock to read
+ * @ts: Pointer to the timespec64 to be set
+ *
+ * The timestamp is invalidated (@ts->sec is set to -1) if the
+ * clock @id is not available.
+ */
+void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts)
+{
+ /* Invalidate time stamp */
+ ts->tv_sec = -1;
+ ts->tv_nsec = 0;
+
+ switch (id) {
+ case CLOCK_REALTIME:
+ ktime_get_real_ts64(ts);
+ return;
+ case CLOCK_MONOTONIC:
+ ktime_get_ts64(ts);
+ return;
+ case CLOCK_MONOTONIC_RAW:
+ ktime_get_raw_ts64(ts);
+ return;
+ case CLOCK_AUX ... CLOCK_AUX_LAST:
+ if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS))
+ ktime_get_aux_ts64(id, ts);
+ return;
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+EXPORT_SYMBOL_GPL(ktime_get_clock_ts64);
/**
* timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
@@ -1649,10 +1763,12 @@ read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
*boot_offset = ns_to_timespec64(local_clock());
}
-static __init void tkd_basic_setup(struct tk_data *tkd)
+static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid)
{
raw_spin_lock_init(&tkd->lock);
seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock);
+ tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id;
+ tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid;
}
/*
@@ -1682,7 +1798,8 @@ void __init timekeeping_init(void)
struct timekeeper *tks = &tk_core.shadow_timekeeper;
struct clocksource *clock;
- tkd_basic_setup(&tk_core);
+ tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true);
+ tk_aux_setup();
read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
if (timespec64_valid_settod(&wall_time) &&
@@ -2034,7 +2151,7 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
*/
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
- u64 ntp_tl = ntp_tick_length();
+ u64 ntp_tl = ntp_tick_length(tk->id);
u32 mult;
/*
@@ -2115,7 +2232,7 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
}
/* Figure out if its a leap sec and apply if needed */
- leap = second_overflow(tk->xtime_sec);
+ leap = second_overflow(tk->id, tk->xtime_sec);
if (unlikely(leap)) {
struct timespec64 ts;
@@ -2181,16 +2298,14 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
*/
-static bool timekeeping_advance(enum timekeeping_adv_mode mode)
+static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode)
{
- struct timekeeper *tk = &tk_core.shadow_timekeeper;
- struct timekeeper *real_tk = &tk_core.timekeeper;
+ struct timekeeper *tk = &tkd->shadow_timekeeper;
+ struct timekeeper *real_tk = &tkd->timekeeper;
unsigned int clock_set = 0;
int shift = 0, maxshift;
u64 offset, orig_offset;
- guard(raw_spinlock_irqsave)(&tk_core.lock);
-
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
return false;
@@ -2214,7 +2329,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
shift = ilog2(offset) - ilog2(tk->cycle_interval);
shift = max(0, shift);
/* Bound shift to one less than what overflows tick_length */
- maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
+ maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1;
shift = min(shift, maxshift);
while (offset >= tk->cycle_interval) {
offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
@@ -2239,19 +2354,27 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
if (orig_offset != offset)
tk_update_coarse_nsecs(tk);
- timekeeping_update_from_shadow(&tk_core, clock_set);
+ timekeeping_update_from_shadow(tkd, clock_set);
return !!clock_set;
}
+static bool timekeeping_advance(enum timekeeping_adv_mode mode)
+{
+ guard(raw_spinlock_irqsave)(&tk_core.lock);
+ return __timekeeping_advance(&tk_core, mode);
+}
+
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
+ * It also updates the enabled auxiliary clock timekeepers
*/
void update_wall_time(void)
{
if (timekeeping_advance(TK_ADV_TICK))
clock_was_set_delayed();
+ tk_aux_advance();
}
/**
@@ -2449,7 +2572,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
/*
* timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
*/
-static int timekeeping_validate_timex(const struct __kernel_timex *txc)
+static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock)
{
if (txc->modes & ADJ_ADJTIME) {
/* singleshot must not be used with any other mode bits */
@@ -2508,6 +2631,20 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc)
return -EINVAL;
}
+ if (aux_clock) {
+ /* Auxiliary clocks are similar to TAI and do not have leap seconds */
+ if (txc->status & (STA_INS | STA_DEL))
+ return -EINVAL;
+
+ /* No TAI offset setting */
+ if (txc->modes & ADJ_TAI)
+ return -EINVAL;
+
+ /* No PPS support either */
+ if (txc->status & (STA_PPSFREQ | STA_PPSTIME))
+ return -EINVAL;
+ }
+
return 0;
}
@@ -2526,74 +2663,103 @@ unsigned long random_get_entropy_fallback(void)
}
EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
-/**
- * do_adjtimex() - Accessor function to NTP __do_adjtimex function
- * @txc: Pointer to kernel_timex structure containing NTP parameters
- */
-int do_adjtimex(struct __kernel_timex *txc)
+struct adjtimex_result {
+ struct audit_ntp_data ad;
+ struct timespec64 delta;
+ bool clock_set;
+};
+
+static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc,
+ struct adjtimex_result *result)
{
- struct audit_ntp_data ad;
- bool offset_set = false;
- bool clock_set = false;
+ struct timekeeper *tks = &tkd->shadow_timekeeper;
+ bool aux_clock = !timekeeper_is_core_tk(tks);
struct timespec64 ts;
+ s32 orig_tai, tai;
int ret;
/* Validate the data before disabling interrupts */
- ret = timekeeping_validate_timex(txc);
+ ret = timekeeping_validate_timex(txc, aux_clock);
if (ret)
return ret;
add_device_randomness(txc, sizeof(*txc));
- if (txc->modes & ADJ_SETOFFSET) {
- struct timespec64 delta;
+ if (!aux_clock)
+ ktime_get_real_ts64(&ts);
+ else
+ tk_get_aux_ts64(tkd->timekeeper.id, &ts);
- delta.tv_sec = txc->time.tv_sec;
- delta.tv_nsec = txc->time.tv_usec;
+ add_device_randomness(&ts, sizeof(ts));
+
+ guard(raw_spinlock_irqsave)(&tkd->lock);
+
+ if (!tks->clock_valid)
+ return -ENODEV;
+
+ if (txc->modes & ADJ_SETOFFSET) {
+ result->delta.tv_sec = txc->time.tv_sec;
+ result->delta.tv_nsec = txc->time.tv_usec;
if (!(txc->modes & ADJ_NANO))
- delta.tv_nsec *= 1000;
- ret = timekeeping_inject_offset(&delta);
+ result->delta.tv_nsec *= 1000;
+ ret = __timekeeping_inject_offset(tkd, &result->delta);
if (ret)
return ret;
-
- offset_set = delta.tv_sec != 0;
- audit_tk_injoffset(delta);
+ result->clock_set = true;
}
- audit_ntp_init(&ad);
+ orig_tai = tai = tks->tai_offset;
+ ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad);
- ktime_get_real_ts64(&ts);
- add_device_randomness(&ts, sizeof(ts));
+ if (tai != orig_tai) {
+ __timekeeping_set_tai_offset(tks, tai);
+ timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET);
+ result->clock_set = true;
+ } else {
+ tk_update_leap_state_all(&tk_core);
+ }
- scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
- struct timekeeper *tks = &tk_core.shadow_timekeeper;
- s32 orig_tai, tai;
+ /* Update the multiplier immediately if frequency was set directly */
+ if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
+ result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ);
- orig_tai = tai = tks->tai_offset;
- ret = __do_adjtimex(txc, &ts, &tai, &ad);
+ return ret;
+}
- if (tai != orig_tai) {
- __timekeeping_set_tai_offset(tks, tai);
- timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
- clock_set = true;
- } else {
- tk_update_leap_state_all(&tk_core);
- }
- }
+/**
+ * do_adjtimex() - Accessor function to NTP __do_adjtimex function
+ * @txc: Pointer to kernel_timex structure containing NTP parameters
+ */
+int do_adjtimex(struct __kernel_timex *txc)
+{
+ struct adjtimex_result result = { };
+ int ret;
- audit_ntp_log(&ad);
+ ret = __do_adjtimex(&tk_core, txc, &result);
+ if (ret < 0)
+ return ret;
- /* Update the multiplier immediately if frequency was set directly */
- if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
- clock_set |= timekeeping_advance(TK_ADV_FREQ);
+ if (txc->modes & ADJ_SETOFFSET)
+ audit_tk_injoffset(result.delta);
- if (clock_set)
+ audit_ntp_log(&result.ad);
+
+ if (result.clock_set)
clock_was_set(CLOCK_SET_WALL);
- ntp_notify_cmos_timer(offset_set);
+ ntp_notify_cmos_timer(result.delta.tv_sec != 0);
return ret;
}
+/*
+ * Invoked from NTP with the time keeper lock held, so lockless access is
+ * fine.
+ */
+long ktime_get_ntp_seconds(unsigned int id)
+{
+ return timekeeper_data[id].timekeeper.xtime_sec;
+}
+
#ifdef CONFIG_NTP_PPS
/**
* hardpps() - Accessor function to NTP __hardpps function
@@ -2607,3 +2773,316 @@ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
}
EXPORT_SYMBOL(hardpps);
#endif /* CONFIG_NTP_PPS */
+
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+#include "posix-timers.h"
+
+/*
+ * Bitmap for the activated auxiliary timekeepers to allow lockless quick
+ * checks in the hot paths without touching extra cache lines. If set, then
+ * the state of the corresponding timekeeper has to be re-checked under
+ * timekeeper::lock.
+ */
+static unsigned long aux_timekeepers;
+
+static inline unsigned int clockid_to_tkid(unsigned int id)
+{
+ return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX;
+}
+
+static inline struct tk_data *aux_get_tk_data(clockid_t id)
+{
+ if (!clockid_aux_valid(id))
+ return NULL;
+ return &timekeeper_data[clockid_to_tkid(id)];
+}
+
+/* Invoked from timekeeping after a clocksource change */
+static void tk_aux_update_clocksource(void)
+{
+ unsigned long active = READ_ONCE(aux_timekeepers);
+ unsigned int id;
+
+ for_each_set_bit(id, &active, BITS_PER_LONG) {
+ struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST];
+ struct timekeeper *tks = &tkd->shadow_timekeeper;
+
+ guard(raw_spinlock_irqsave)(&tkd->lock);
+ if (!tks->clock_valid)
+ continue;
+
+ timekeeping_forward_now(tks);
+ tk_setup_internals(tks, tk_core.timekeeper.tkr_mono.clock);
+ timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL);
+ }
+}
+
+static void tk_aux_advance(void)
+{
+ unsigned long active = READ_ONCE(aux_timekeepers);
+ unsigned int id;
+
+ /* Lockless quick check to avoid extra cache lines */
+ for_each_set_bit(id, &active, BITS_PER_LONG) {
+ struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST];
+
+ guard(raw_spinlock)(&aux_tkd->lock);
+ if (aux_tkd->shadow_timekeeper.clock_valid)
+ __timekeeping_advance(aux_tkd, TK_ADV_TICK);
+ }
+}
+
+/**
+ * ktime_get_aux - Get time for a AUX clock
+ * @id: ID of the clock to read (CLOCK_AUX...)
+ * @kt: Pointer to ktime_t to store the time stamp
+ *
+ * Returns: True if the timestamp is valid, false otherwise
+ */
+bool ktime_get_aux(clockid_t id, ktime_t *kt)
+{
+ struct tk_data *aux_tkd = aux_get_tk_data(id);
+ struct timekeeper *aux_tk;
+ unsigned int seq;
+ ktime_t base;
+ u64 nsecs;
+
+ WARN_ON(timekeeping_suspended);
+
+ if (!aux_tkd)
+ return false;
+
+ aux_tk = &aux_tkd->timekeeper;
+ do {
+ seq = read_seqcount_begin(&aux_tkd->seq);
+ if (!aux_tk->clock_valid)
+ return false;
+
+ base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux);
+ nsecs = timekeeping_get_ns(&aux_tk->tkr_mono);
+ } while (read_seqcount_retry(&aux_tkd->seq, seq));
+
+ *kt = ktime_add_ns(base, nsecs);
+ return true;
+}
+EXPORT_SYMBOL_GPL(ktime_get_aux);
+
+/**
+ * ktime_get_aux_ts64 - Get time for a AUX clock
+ * @id: ID of the clock to read (CLOCK_AUX...)
+ * @ts: Pointer to timespec64 to store the time stamp
+ *
+ * Returns: True if the timestamp is valid, false otherwise
+ */
+bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts)
+{
+ ktime_t now;
+
+ if (!ktime_get_aux(id, &now))
+ return false;
+ *ts = ktime_to_timespec64(now);
+ return true;
+}
+EXPORT_SYMBOL_GPL(ktime_get_aux_ts64);
+
+static int aux_get_res(clockid_t id, struct timespec64 *tp)
+{
+ if (!clockid_aux_valid(id))
+ return -ENODEV;
+
+ tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC;
+ tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC;
+ return 0;
+}
+
+static int aux_get_timespec(clockid_t id, struct timespec64 *tp)
+{
+ return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV;
+}
+
+static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew)
+{
+ struct tk_data *aux_tkd = aux_get_tk_data(id);
+ struct timekeeper *aux_tks;
+ ktime_t tnow, nsecs;
+
+ if (!timespec64_valid_settod(tnew))
+ return -EINVAL;
+ if (!aux_tkd)
+ return -ENODEV;
+
+ aux_tks = &aux_tkd->shadow_timekeeper;
+
+ guard(raw_spinlock_irq)(&aux_tkd->lock);
+ if (!aux_tks->clock_valid)
+ return -ENODEV;
+
+ /* Forward the timekeeper base time */
+ timekeeping_forward_now(aux_tks);
+ /*
+ * Get the updated base time. tkr_mono.base has not been
+ * updated yet, so do that first. That makes the update
+ * in timekeeping_update_from_shadow() redundant, but
+ * that's harmless. After that @tnow can be calculated
+ * by using tkr_mono::cycle_last, which has been set
+ * by timekeeping_forward_now().
+ */
+ tk_update_ktime_data(aux_tks);
+ nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last);
+ tnow = ktime_add(aux_tks->tkr_mono.base, nsecs);
+
+ /*
+ * Calculate the new AUX offset as delta to @tnow ("monotonic").
+ * That avoids all the tk::xtime back and forth conversions as
+ * xtime ("realtime") is not applicable for auxiliary clocks and
+ * kept in sync with "monotonic".
+ */
+ aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow);
+
+ timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL);
+ return 0;
+}
+
+static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc)
+{
+ struct tk_data *aux_tkd = aux_get_tk_data(id);
+ struct adjtimex_result result = { };
+
+ if (!aux_tkd)
+ return -ENODEV;
+
+ /*
+ * @result is ignored for now as there are neither hrtimers nor a
+ * RTC related to auxiliary clocks for now.
+ */
+ return __do_adjtimex(aux_tkd, txc, &result);
+}
+
+const struct k_clock clock_aux = {
+ .clock_getres = aux_get_res,
+ .clock_get_timespec = aux_get_timespec,
+ .clock_set = aux_clock_set,
+ .clock_adj = aux_clock_adj,
+};
+
+static void aux_clock_enable(clockid_t id)
+{
+ struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw;
+ struct tk_data *aux_tkd = aux_get_tk_data(id);
+ struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper;
+
+ /* Prevent the core timekeeper from changing. */
+ guard(raw_spinlock_irq)(&tk_core.lock);
+
+ /*
+ * Setup the auxiliary clock assuming that the raw core timekeeper
+ * clock frequency conversion is close enough. Userspace has to
+ * adjust for the deviation via clock_adjtime(2).
+ */
+ guard(raw_spinlock_nested)(&aux_tkd->lock);
+
+ /* Remove leftovers of a previous registration */
+ memset(aux_tks, 0, sizeof(*aux_tks));
+ /* Restore the timekeeper id */
+ aux_tks->id = aux_tkd->timekeeper.id;
+ /* Setup the timekeeper based on the current system clocksource */
+ tk_setup_internals(aux_tks, tkr_raw->clock);
+
+ /* Mark it valid and set it live */
+ aux_tks->clock_valid = true;
+ timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL);
+}
+
+static void aux_clock_disable(clockid_t id)
+{
+ struct tk_data *aux_tkd = aux_get_tk_data(id);
+
+ guard(raw_spinlock_irq)(&aux_tkd->lock);
+ aux_tkd->shadow_timekeeper.clock_valid = false;
+ timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL);
+}
+
+static DEFINE_MUTEX(aux_clock_mutex);
+
+static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ /* Lazy atoi() as name is "0..7" */
+ int id = kobj->name[0] & 0x7;
+ bool enable;
+
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ if (kstrtobool(buf, &enable) < 0)
+ return -EINVAL;
+
+ guard(mutex)(&aux_clock_mutex);
+ if (enable == test_bit(id, &aux_timekeepers))
+ return count;
+
+ if (enable) {
+ aux_clock_enable(CLOCK_AUX + id);
+ set_bit(id, &aux_timekeepers);
+ } else {
+ aux_clock_disable(CLOCK_AUX + id);
+ clear_bit(id, &aux_timekeepers);
+ }
+ return count;
+}
+
+static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ unsigned long active = READ_ONCE(aux_timekeepers);
+ /* Lazy atoi() as name is "0..7" */
+ int id = kobj->name[0] & 0x7;
+
+ return sysfs_emit(buf, "%d\n", test_bit(id, &active));
+}
+
+static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable);
+
+static struct attribute *aux_clock_enable_attrs[] = {
+ &aux_clock_enable_attr.attr,
+ NULL
+};
+
+static const struct attribute_group aux_clock_enable_attr_group = {
+ .attrs = aux_clock_enable_attrs,
+};
+
+static int __init tk_aux_sysfs_init(void)
+{
+ struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj);
+
+ if (!tko)
+ return -ENOMEM;
+
+ auxo = kobject_create_and_add("aux_clocks", tko);
+ if (!auxo) {
+ kobject_put(tko);
+ return -ENOMEM;
+ }
+
+ for (int i = 0; i <= MAX_AUX_CLOCKS; i++) {
+ char id[2] = { [0] = '0' + i, };
+ struct kobject *clk = kobject_create_and_add(id, auxo);
+
+ if (!clk)
+ return -ENOMEM;
+
+ int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group);
+
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+late_initcall(tk_aux_sysfs_init);
+
+static __init void tk_aux_setup(void)
+{
+ for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++)
+ tkd_basic_setup(&timekeeper_data[i], i, false);
+}
+#endif /* CONFIG_POSIX_AUX_CLOCKS */
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 8c9079108ffb..973ede670a36 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -45,4 +45,7 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask, u64 max_delta)
unsigned long timekeeper_lock_irqsave(void);
void timekeeper_unlock_irqrestore(unsigned long flags);
+/* NTP specific interface to access the current seconds value */
+long ktime_get_ntp_seconds(unsigned int id);
+
#endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 32ef27c71b57..8ba8b0d8a387 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -15,26 +15,25 @@
#include "timekeeping_internal.h"
+static inline void fill_clock_configuration(struct vdso_clock *vc, const struct tk_read_base *base)
+{
+ vc->cycle_last = base->cycle_last;
+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+ vc->max_cycles = base->clock->max_cycles;
+#endif
+ vc->mask = base->mask;
+ vc->mult = base->mult;
+ vc->shift = base->shift;
+}
+
static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk)
{
struct vdso_clock *vc = vdata->clock_data;
struct vdso_timestamp *vdso_ts;
u64 nsec, sec;
- vc[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
-#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
- vc[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles;
-#endif
- vc[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
- vc[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
- vc[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
- vc[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
-#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
- vc[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles;
-#endif
- vc[CS_RAW].mask = tk->tkr_raw.mask;
- vc[CS_RAW].mult = tk->tkr_raw.mult;
- vc[CS_RAW].shift = tk->tkr_raw.shift;
+ fill_clock_configuration(&vc[CS_HRES_COARSE], &tk->tkr_mono);
+ fill_clock_configuration(&vc[CS_RAW], &tk->tkr_raw);
/* CLOCK_MONOTONIC */
vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
@@ -119,7 +118,8 @@ void update_vsyscall(struct timekeeper *tk)
if (clock_mode != VDSO_CLOCKMODE_NONE)
update_vdso_time_data(vdata, tk);
- __arch_update_vsyscall(vdata);
+ __arch_update_vdso_clock(&vc[CS_HRES_COARSE]);
+ __arch_update_vdso_clock(&vc[CS_RAW]);
vdso_write_end(vdata);
@@ -136,6 +136,46 @@ void update_vsyscall_tz(void)
__arch_sync_vdso_time_data(vdata);
}
+#ifdef CONFIG_POSIX_AUX_CLOCKS
+void vdso_time_update_aux(struct timekeeper *tk)
+{
+ struct vdso_time_data *vdata = vdso_k_time_data;
+ struct vdso_timestamp *vdso_ts;
+ struct vdso_clock *vc;
+ s32 clock_mode;
+ u64 nsec;
+
+ vc = &vdata->aux_clock_data[tk->id - TIMEKEEPER_AUX_FIRST];
+ vdso_ts = &vc->basetime[VDSO_BASE_AUX];
+ clock_mode = tk->tkr_mono.clock->vdso_clock_mode;
+ if (!tk->clock_valid)
+ clock_mode = VDSO_CLOCKMODE_NONE;
+
+ /* copy vsyscall data */
+ vdso_write_begin_clock(vc);
+
+ vc->clock_mode = clock_mode;
+
+ if (clock_mode != VDSO_CLOCKMODE_NONE) {
+ fill_clock_configuration(vc, &tk->tkr_mono);
+
+ vdso_ts->sec = tk->xtime_sec;
+
+ nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+ nsec += tk->offs_aux;
+ vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
+ nsec = nsec << tk->tkr_mono.shift;
+ vdso_ts->nsec = nsec;
+ }
+
+ __arch_update_vdso_clock(vc);
+
+ vdso_write_end_clock(vc);
+
+ __arch_sync_vdso_time_data(vdata);
+}
+#endif
+
/**
* vdso_update_begin - Start of a VDSO update section
*
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 93ef801a97ef..02ea19f67164 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -2,6 +2,7 @@
/*
* Generic userspace implementations of gettimeofday() and similar.
*/
+#include <vdso/auxclock.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -71,6 +72,42 @@ static inline bool vdso_cycles_ok(u64 cycles)
}
#endif
+static __always_inline bool vdso_clockid_valid(clockid_t clock)
+{
+ /* Check for negative values or invalid clocks */
+ return likely((u32) clock <= CLOCK_AUX_LAST);
+}
+
+/*
+ * Must not be invoked within the sequence read section as a race inside
+ * that loop could result in __iter_div_u64_rem() being extremely slow.
+ */
+static __always_inline void vdso_set_timespec(struct __kernel_timespec *ts, u64 sec, u64 ns)
+{
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+}
+
+static __always_inline
+bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock *vc,
+ unsigned int clkidx, u64 *sec, u64 *ns)
+{
+ const struct vdso_timestamp *vdso_ts = &vc->basetime[clkidx];
+ u64 cycles;
+
+ if (unlikely(!vdso_clocksource_ok(vc)))
+ return false;
+
+ cycles = __arch_get_hw_counter(vc->clock_mode, vd);
+ if (unlikely(!vdso_cycles_ok(cycles)))
+ return false;
+
+ *ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec);
+ *sec = vdso_ts->sec;
+
+ return true;
+}
+
#ifdef CONFIG_TIME_NS
#ifdef CONFIG_GENERIC_VDSO_DATA_STORE
@@ -82,48 +119,35 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim
#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */
static __always_inline
-int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
- clockid_t clk, struct __kernel_timespec *ts)
+bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
+ clockid_t clk, struct __kernel_timespec *ts)
{
const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns);
const struct timens_offset *offs = &vcns->offset[clk];
const struct vdso_clock *vc = vd->clock_data;
- const struct vdso_timestamp *vdso_ts;
- u64 cycles, ns;
u32 seq;
s64 sec;
+ u64 ns;
if (clk != CLOCK_MONOTONIC_RAW)
vc = &vc[CS_HRES_COARSE];
else
vc = &vc[CS_RAW];
- vdso_ts = &vc->basetime[clk];
do {
seq = vdso_read_begin(vc);
- if (unlikely(!vdso_clocksource_ok(vc)))
- return -1;
-
- cycles = __arch_get_hw_counter(vc->clock_mode, vd);
- if (unlikely(!vdso_cycles_ok(cycles)))
- return -1;
- ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec);
- sec = vdso_ts->sec;
+ if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
+ return false;
} while (unlikely(vdso_read_retry(vc, seq)));
/* Add the namespace offset */
sec += offs->sec;
ns += offs->nsec;
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
+ vdso_set_timespec(ts, sec, ns);
- return 0;
+ return true;
}
#else
static __always_inline
@@ -133,24 +157,23 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim
}
static __always_inline
-int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
- clockid_t clk, struct __kernel_timespec *ts)
+bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
+ clockid_t clk, struct __kernel_timespec *ts)
{
- return -EINVAL;
+ return false;
}
#endif
static __always_inline
-int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
- clockid_t clk, struct __kernel_timespec *ts)
+bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
+ clockid_t clk, struct __kernel_timespec *ts)
{
- const struct vdso_timestamp *vdso_ts = &vc->basetime[clk];
- u64 cycles, sec, ns;
+ u64 sec, ns;
u32 seq;
/* Allows to compile the high resolution parts out */
if (!__arch_vdso_hres_capable())
- return -1;
+ return false;
do {
/*
@@ -172,30 +195,19 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
}
smp_rmb();
- if (unlikely(!vdso_clocksource_ok(vc)))
- return -1;
-
- cycles = __arch_get_hw_counter(vc->clock_mode, vd);
- if (unlikely(!vdso_cycles_ok(cycles)))
- return -1;
- ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec);
- sec = vdso_ts->sec;
+ if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
+ return false;
} while (unlikely(vdso_read_retry(vc, seq)));
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
+ vdso_set_timespec(ts, sec, ns);
- return 0;
+ return true;
}
#ifdef CONFIG_TIME_NS
static __always_inline
-int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
- clockid_t clk, struct __kernel_timespec *ts)
+bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
+ clockid_t clk, struct __kernel_timespec *ts)
{
const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns);
const struct timens_offset *offs = &vcns->offset[clk];
@@ -217,26 +229,22 @@ int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock
sec += offs->sec;
nsec += offs->nsec;
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
- ts->tv_nsec = nsec;
- return 0;
+ vdso_set_timespec(ts, sec, nsec);
+
+ return true;
}
#else
static __always_inline
-int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
- clockid_t clk, struct __kernel_timespec *ts)
+bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns,
+ clockid_t clk, struct __kernel_timespec *ts)
{
- return -1;
+ return false;
}
#endif
static __always_inline
-int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
- clockid_t clk, struct __kernel_timespec *ts)
+bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
+ clockid_t clk, struct __kernel_timespec *ts)
{
const struct vdso_timestamp *vdso_ts = &vc->basetime[clk];
u32 seq;
@@ -258,19 +266,60 @@ int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
ts->tv_nsec = vdso_ts->nsec;
} while (unlikely(vdso_read_retry(vc, seq)));
- return 0;
+ return true;
+}
+
+static __always_inline
+bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts)
+{
+ const struct vdso_clock *vc;
+ u32 seq, idx;
+ u64 sec, ns;
+
+ if (!IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS))
+ return false;
+
+ idx = clock - CLOCK_AUX;
+ vc = &vd->aux_clock_data[idx];
+
+ do {
+ /*
+ * Open coded function vdso_read_begin() to handle
+ * VDSO_CLOCK_TIMENS. See comment in do_hres().
+ */
+ while ((seq = READ_ONCE(vc->seq)) & 1) {
+ if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
+ vd = __arch_get_vdso_u_timens_data(vd);
+ vc = &vd->aux_clock_data[idx];
+ /* Re-read from the real time data page */
+ continue;
+ }
+ cpu_relax();
+ }
+ smp_rmb();
+
+ /* Auxclock disabled? */
+ if (vc->clock_mode == VDSO_CLOCKMODE_NONE)
+ return false;
+
+ if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns))
+ return false;
+ } while (unlikely(vdso_read_retry(vc, seq)));
+
+ vdso_set_timespec(ts, sec, ns);
+
+ return true;
}
-static __always_inline int
+static __always_inline bool
__cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *ts)
{
const struct vdso_clock *vc = vd->clock_data;
u32 msk;
- /* Check for negative values or invalid clocks */
- if (unlikely((u32) clock >= MAX_CLOCKS))
- return -1;
+ if (!vdso_clockid_valid(clock))
+ return false;
/*
* Convert the clockid to a bitmask and use it to check which
@@ -283,8 +332,10 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts);
else if (msk & VDSO_RAW)
vc = &vc[CS_RAW];
+ else if (msk & VDSO_AUX)
+ return do_aux(vd, clock, ts);
else
- return -1;
+ return false;
return do_hres(vd, vc, clock, ts);
}
@@ -293,9 +344,11 @@ static __maybe_unused int
__cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *ts)
{
- int ret = __cvdso_clock_gettime_common(vd, clock, ts);
+ bool ok;
+
+ ok = __cvdso_clock_gettime_common(vd, clock, ts);
- if (unlikely(ret))
+ if (unlikely(!ok))
return clock_gettime_fallback(clock, ts);
return 0;
}
@@ -312,18 +365,18 @@ __cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock,
struct old_timespec32 *res)
{
struct __kernel_timespec ts;
- int ret;
+ bool ok;
- ret = __cvdso_clock_gettime_common(vd, clock, &ts);
+ ok = __cvdso_clock_gettime_common(vd, clock, &ts);
- if (unlikely(ret))
+ if (unlikely(!ok))
return clock_gettime32_fallback(clock, res);
- /* For ret == 0 */
+ /* For ok == true */
res->tv_sec = ts.tv_sec;
res->tv_nsec = ts.tv_nsec;
- return ret;
+ return 0;
}
static __maybe_unused int
@@ -342,7 +395,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd,
if (likely(tv != NULL)) {
struct __kernel_timespec ts;
- if (do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
+ if (!do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
return gettimeofday_fallback(tv, tz);
tv->tv_sec = ts.tv_sec;
@@ -396,16 +449,15 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time
#ifdef VDSO_HAS_CLOCK_GETRES
static __maybe_unused
-int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock,
- struct __kernel_timespec *res)
+bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock,
+ struct __kernel_timespec *res)
{
const struct vdso_clock *vc = vd->clock_data;
u32 msk;
u64 ns;
- /* Check for negative values or invalid clocks */
- if (unlikely((u32) clock >= MAX_CLOCKS))
- return -1;
+ if (!vdso_clockid_valid(clock))
+ return false;
if (IS_ENABLED(CONFIG_TIME_NS) &&
vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
@@ -426,24 +478,28 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock
* Preserves the behaviour of posix_get_coarse_res().
*/
ns = LOW_RES_NSEC;
+ } else if (msk & VDSO_AUX) {
+ ns = aux_clock_resolution_ns();
} else {
- return -1;
+ return false;
}
if (likely(res)) {
res->tv_sec = 0;
res->tv_nsec = ns;
}
- return 0;
+ return true;
}
static __maybe_unused
int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *res)
{
- int ret = __cvdso_clock_getres_common(vd, clock, res);
+ bool ok;
- if (unlikely(ret))
+ ok = __cvdso_clock_getres_common(vd, clock, res);
+
+ if (unlikely(!ok))
return clock_getres_fallback(clock, res);
return 0;
}
@@ -460,18 +516,18 @@ __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t cloc
struct old_timespec32 *res)
{
struct __kernel_timespec ts;
- int ret;
+ bool ok;
- ret = __cvdso_clock_getres_common(vd, clock, &ts);
+ ok = __cvdso_clock_getres_common(vd, clock, &ts);
- if (unlikely(ret))
+ if (unlikely(!ok))
return clock_getres32_fallback(clock, res);
if (likely(res)) {
res->tv_sec = ts.tv_sec;
res->tv_nsec = ts.tv_nsec;
}
- return ret;
+ return 0;
}
static __maybe_unused int