diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-27 16:47:05 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-27 16:47:05 -0800 |
commit | e279160f491392f1345f6eb4b0eeec5a6a2ecdd7 (patch) | |
tree | 018410c41f919774f70e6350fc251556bf789944 /drivers/clocksource/hyperv_timer.c | |
parent | 534b0a8b677443c0aa8c4c71ff7887f08a2b9b41 (diff) | |
parent | fd928f3e32ba09381b287f8b732418434d932855 (diff) |
Merge tag 'timers-core-2020-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner:
"The timekeeping and timers departement provides:
- Time namespace support:
If a container migrates from one host to another then it expects
that clocks based on MONOTONIC and BOOTTIME are not subject to
disruption. Due to different boot time and non-suspended runtime
these clocks can differ significantly on two hosts, in the worst
case time goes backwards which is a violation of the POSIX
requirements.
The time namespace addresses this problem. It allows to set offsets
for clock MONOTONIC and BOOTTIME once after creation and before
tasks are associated with the namespace. These offsets are taken
into account by timers and timekeeping including the VDSO.
Offsets for wall clock based clocks (REALTIME/TAI) are not provided
by this mechanism. While in theory possible, the overhead and code
complexity would be immense and not justified by the esoteric
potential use cases which were discussed at Plumbers '18.
The overhead for tasks in the root namespace (ie where host time
offsets = 0) is in the noise and great effort was made to ensure
that especially in the VDSO. If time namespace is disabled in the
kernel configuration the code is compiled out.
Kudos to Andrei Vagin and Dmitry Sofanov who implemented this
feature and kept on for more than a year addressing review
comments, finding better solutions. A pleasant experience.
- Overhaul of the alarmtimer device dependency handling to ensure
that the init/suspend/resume ordering is correct.
- A new clocksource/event driver for Microchip PIT64
- Suspend/resume support for the Hyper-V clocksource
- The usual pile of fixes, updates and improvements mostly in the
driver code"
* tag 'timers-core-2020-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (71 commits)
alarmtimer: Make alarmtimer_get_rtcdev() a stub when CONFIG_RTC_CLASS=n
alarmtimer: Use wakeup source from alarmtimer platform device
alarmtimer: Make alarmtimer platform device child of RTC device
alarmtimer: Update alarmtimer_get_rtcdev() docs to reflect reality
hrtimer: Add missing sparse annotation for __run_timer()
lib/vdso: Only read hrtimer_res when needed in __cvdso_clock_getres()
MIPS: vdso: Define BUILD_VDSO32 when building a 32bit kernel
clocksource/drivers/hyper-v: Set TSC clocksource as default w/ InvariantTSC
clocksource/drivers/hyper-v: Untangle stimers and timesync from clocksources
clocksource/drivers/timer-microchip-pit64b: Fix sparse warning
clocksource/drivers/exynos_mct: Rename Exynos to lowercase
clocksource/drivers/timer-ti-dm: Fix uninitialized pointer access
clocksource/drivers/timer-ti-dm: Switch to platform_get_irq
clocksource/drivers/timer-ti-dm: Convert to devm_platform_ioremap_resource
clocksource/drivers/em_sti: Fix variable declaration in em_sti_probe
clocksource/drivers/em_sti: Convert to devm_platform_ioremap_resource
clocksource/drivers/bcm2835_timer: Fix memory leak of timer
clocksource/drivers/cadence-ttc: Use ttc driver as platform driver
clocksource/drivers/timer-microchip-pit64b: Add Microchip PIT64B support
clocksource/drivers/hyper-v: Reserve PAGE_SIZE space for tsc page
...
Diffstat (limited to 'drivers/clocksource/hyperv_timer.c')
-rw-r--r-- | drivers/clocksource/hyperv_timer.c | 84 |
1 files changed, 65 insertions, 19 deletions
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 287d8d58c21a..9d808d595ca8 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -66,7 +66,7 @@ static int hv_ce_set_next_event(unsigned long delta, { u64 current_tick; - current_tick = hyperv_cs->read(NULL); + current_tick = hv_read_reference_counter(); current_tick += delta; hv_init_timer(0, current_tick); return 0; @@ -302,22 +302,33 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); * the other that uses the TSC reference page feature as defined in the * TLFS. The MSR version is for compatibility with old versions of * Hyper-V and 32-bit x86. The TSC reference page version is preferred. + * + * The Hyper-V clocksource ratings of 250 are chosen to be below the + * TSC clocksource rating of 300. In configurations where Hyper-V offers + * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource + * is available and preferred. With the higher rating, it will be the + * default. On older hardware and Hyper-V versions, the TSC is marked + * "unstable", so no TSC clocksource is created and the selected Hyper-V + * clocksource will be the default. */ -struct clocksource *hyperv_cs; -EXPORT_SYMBOL_GPL(hyperv_cs); +u64 (*hv_read_reference_counter)(void); +EXPORT_SYMBOL_GPL(hv_read_reference_counter); -static struct ms_hyperv_tsc_page tsc_pg __aligned(PAGE_SIZE); +static union { + struct ms_hyperv_tsc_page page; + u8 reserved[PAGE_SIZE]; +} tsc_pg __aligned(PAGE_SIZE); struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { - return &tsc_pg; + return &tsc_pg.page; } EXPORT_SYMBOL_GPL(hv_get_tsc_page); -static u64 notrace read_hv_clock_tsc(struct clocksource *arg) +static u64 notrace read_hv_clock_tsc(void) { - u64 current_tick = hv_read_tsc_page(&tsc_pg); + u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); if (current_tick == U64_MAX) hv_get_time_ref_count(current_tick); @@ -325,20 +336,50 @@ static u64 notrace read_hv_clock_tsc(struct clocksource *arg) return current_tick; } +static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) +{ + return read_hv_clock_tsc(); +} + static u64 read_hv_sched_clock_tsc(void) { - return read_hv_clock_tsc(NULL) - hv_sched_clock_offset; + return read_hv_clock_tsc() - hv_sched_clock_offset; +} + +static void suspend_hv_clock_tsc(struct clocksource *arg) +{ + u64 tsc_msr; + + /* Disable the TSC page */ + hv_get_reference_tsc(tsc_msr); + tsc_msr &= ~BIT_ULL(0); + hv_set_reference_tsc(tsc_msr); +} + + +static void resume_hv_clock_tsc(struct clocksource *arg) +{ + phys_addr_t phys_addr = virt_to_phys(&tsc_pg); + u64 tsc_msr; + + /* Re-enable the TSC page */ + hv_get_reference_tsc(tsc_msr); + tsc_msr &= GENMASK_ULL(11, 0); + tsc_msr |= BIT_ULL(0) | (u64)phys_addr; + hv_set_reference_tsc(tsc_msr); } static struct clocksource hyperv_cs_tsc = { .name = "hyperv_clocksource_tsc_page", - .rating = 400, - .read = read_hv_clock_tsc, + .rating = 250, + .read = read_hv_clock_tsc_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .suspend= suspend_hv_clock_tsc, + .resume = resume_hv_clock_tsc, }; -static u64 notrace read_hv_clock_msr(struct clocksource *arg) +static u64 notrace read_hv_clock_msr(void) { u64 current_tick; /* @@ -350,15 +391,20 @@ static u64 notrace read_hv_clock_msr(struct clocksource *arg) return current_tick; } +static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) +{ + return read_hv_clock_msr(); +} + static u64 read_hv_sched_clock_msr(void) { - return read_hv_clock_msr(NULL) - hv_sched_clock_offset; + return read_hv_clock_msr() - hv_sched_clock_offset; } static struct clocksource hyperv_cs_msr = { .name = "hyperv_clocksource_msr", - .rating = 400, - .read = read_hv_clock_msr, + .rating = 250, + .read = read_hv_clock_msr_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -371,8 +417,8 @@ static bool __init hv_init_tsc_clocksource(void) if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) return false; - hyperv_cs = &hyperv_cs_tsc; - phys_addr = virt_to_phys(&tsc_pg); + hv_read_reference_counter = read_hv_clock_tsc; + phys_addr = virt_to_phys(hv_get_tsc_page()); /* * The Hyper-V TLFS specifies to preserve the value of reserved @@ -389,7 +435,7 @@ static bool __init hv_init_tsc_clocksource(void) hv_set_clocksource_vdso(hyperv_cs_tsc); clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); + hv_sched_clock_offset = hv_read_reference_counter(); hv_setup_sched_clock(read_hv_sched_clock_tsc); return true; @@ -411,10 +457,10 @@ void __init hv_init_clocksource(void) if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) return; - hyperv_cs = &hyperv_cs_msr; + hv_read_reference_counter = read_hv_clock_msr; clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - hv_sched_clock_offset = hyperv_cs->read(hyperv_cs); + hv_sched_clock_offset = hv_read_reference_counter(); hv_setup_sched_clock(read_hv_sched_clock_msr); } EXPORT_SYMBOL_GPL(hv_init_clocksource); |