From 6c5e9059692567740a4ee51530dffe51a4b9584d Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 21 Oct 2016 08:58:50 -0700
Subject: timers: Fix usleep_range() in the context of wake_up_process()

Users of usleep_range() expect that it will _never_ return in less time
than the minimum passed parameter. However, nothing in the code ensures
this, when the sleeping task is woken by wake_up_process() or any other
mechanism which can wake a task from uninterruptible state.

Neither usleep_range() nor schedule_hrtimeout_range*() have any protection
against wakeups. schedule_hrtimeout_range*() is designed this way despite
the fact that the API documentation does not mention it.

msleep() already has code to handle this case since it will loop as long
as there was still time left.  usleep_range() has no such loop, add it.

Presumably this problem was not detected before because usleep_range() is
only used in a few places and the function is mostly used in contexts which
are not exposed to wakeups of any form.

An effort was made to look for users relying on the old behavior by
looking for usleep_range() in the same file as wake_up_process().
No problems were found by this search, though it is conceivable that
someone could have put the sleep and wakeup in two different files.

An effort was made to ask several upstream maintainers if they were aware
of people relying on wake_up_process() to wake up usleep_range(). No
maintainers were aware of that but they were aware of many people relying
on usleep_range() never returning before the minimum.

Reported-by: Tao Huang <huangtao@rock-chips.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: heiko@sntech.de
Cc: broonie@kernel.org
Cc: briannorris@chromium.org
Cc: Andreas Mohr <andi@lisas.de>
Cc: linux-rockchip@lists.infradead.org
Cc: tony.xie@rock-chips.com
Cc: John Stultz <john.stultz@linaro.org>
Cc: djkurtz@chromium.org
Cc: linux@roeck-us.net
Cc: tskd08@gmail.com
Link: http://lkml.kernel.org/r/1477065531-30342-1-git-send-email-dianders@chromium.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d47980a1bc4..12681c9a7683 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1896,16 +1896,6 @@ unsigned long msleep_interruptible(unsigned int msecs)
 
 EXPORT_SYMBOL(msleep_interruptible);
 
-static void __sched do_usleep_range(unsigned long min, unsigned long max)
-{
-	ktime_t kmin;
-	u64 delta;
-
-	kmin = ktime_set(0, min * NSEC_PER_USEC);
-	delta = (u64)(max - min) * NSEC_PER_USEC;
-	schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
-}
-
 /**
  * usleep_range - Sleep for an approximate time
  * @min: Minimum time in usecs to sleep
@@ -1919,7 +1909,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max)
  */
 void __sched usleep_range(unsigned long min, unsigned long max)
 {
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-	do_usleep_range(min, max);
+	ktime_t exp = ktime_add_us(ktime_get(), min);
+	u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+
+	for (;;) {
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		/* Do not return before the requested sleep time has elapsed */
+		if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+			break;
+	}
 }
 EXPORT_SYMBOL(usleep_range);
-- 
cgit 


From 4b7e9cf9c84b09adc428e0433cd376b91f9c52a7 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 21 Oct 2016 08:58:51 -0700
Subject: timers: Fix documentation for schedule_timeout() and similar

The documentation for schedule_timeout(), schedule_hrtimeout(), and
schedule_hrtimeout_range() all claim that the routines couldn't possibly
return early if the task state was TASK_UNINTERRUPTIBLE. This is simply
not true since wake_up_process() will cause those routines to exit early.

We cannot make schedule_[hr]timeout() loop until the timeout expires if the
task state is uninterruptible because we have users which rely on the
existing and designed behaviour.

Make the documentation match the (correct) implementation.

schedule_hrtimeout() returns -EINTR even when a uninterruptible task was
woken up. This might look strange, but making the return code depend on the
state is too much of an effort as it would affect all the call sites. There
is no value in doing so, but we spell it out clearly in the documentation.

Suggested-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: huangtao@rock-chips.com
Cc: heiko@sntech.de
Cc: broonie@kernel.org
Cc: briannorris@chromium.org
Cc: Andreas Mohr <andi@lisas.de>
Cc: linux-rockchip@lists.infradead.org
Cc: tony.xie@rock-chips.com
Cc: John Stultz <john.stultz@linaro.org>
Cc: linux@roeck-us.net
Cc: tskd08@gmail.com
Link: http://lkml.kernel.org/r/1477065531-30342-2-git-send-email-dianders@chromium.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/hrtimer.c | 20 ++++++++++++++------
 kernel/time/timer.c   | 11 +++++++----
 2 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bb5ec425dfe0..08be5c99d26b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1742,15 +1742,19 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
  * You can set the task state as follows -
  *
  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process()).
  *
  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
+ * delivered to the current task or the current task is explicitly woken
+ * up.
  *
  * The current task state is guaranteed to be TASK_RUNNING when this
  * routine returns.
  *
- * Returns 0 when the timer has expired otherwise -EINTR
+ * Returns 0 when the timer has expired. If the task was woken before the
+ * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
+ * by an explicit wakeup, it returns -EINTR.
  */
 int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 				     const enum hrtimer_mode mode)
@@ -1772,15 +1776,19 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
  * You can set the task state as follows -
  *
  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process()).
  *
  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
+ * delivered to the current task or the current task is explicitly woken
+ * up.
  *
  * The current task state is guaranteed to be TASK_RUNNING when this
  * routine returns.
  *
- * Returns 0 when the timer has expired otherwise -EINTR
+ * Returns 0 when the timer has expired. If the task was woken before the
+ * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
+ * by an explicit wakeup, it returns -EINTR.
  */
 int __sched schedule_hrtimeout(ktime_t *expires,
 			       const enum hrtimer_mode mode)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 12681c9a7683..88aab86a4594 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1691,11 +1691,12 @@ static void process_timeout(unsigned long __data)
  * You can set the task state as follows -
  *
  * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process())".
  *
  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
+ * delivered to the current task or the current task is explicitly woken
+ * up.
  *
  * The current task state is guaranteed to be TASK_RUNNING when this
  * routine returns.
@@ -1704,7 +1705,9 @@ static void process_timeout(unsigned long __data)
  * the CPU away without a bound on the timeout. In this case the return
  * value will be %MAX_SCHEDULE_TIMEOUT.
  *
- * In all cases the return value is guaranteed to be non-negative.
+ * Returns 0 when the timer has expired otherwise the remaining time in
+ * jiffies will be returned.  In all cases the return value is guaranteed
+ * to be non-negative.
  */
 signed long __sched schedule_timeout(signed long timeout)
 {
-- 
cgit 


From 74ba181e61c6accf9066d6980f44588de2f854f6 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Fri, 11 Nov 2016 00:10:08 -0500
Subject: timer: Move sys_alarm from timer.c to itimer.c

Move the only user of alarm_setitimer to itimer.c where it is defined.
This allows for making alarm_setitimer static, and dropping it from the
build when __ARCH_WANT_SYS_ALARM is not defined.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: linux-kbuild@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Edward Cree <ecree@solarflare.com>
Link: http://lkml.kernel.org/r/1478841010-28605-5-git-send-email-nicolas.pitre@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/itimer.c | 15 ++++++++++++++-
 kernel/time/timer.c  | 13 -------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 1d5c7204ddc9..2b9f45bc955d 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -238,6 +238,8 @@ again:
 	return 0;
 }
 
+#ifdef __ARCH_WANT_SYS_ALARM
+
 /**
  * alarm_setitimer - set alarm in seconds
  *
@@ -250,7 +252,7 @@ again:
  * On 32 bit machines the seconds value is limited to (INT_MAX/2) to avoid
  * negative timeval settings which would cause immediate expiry.
  */
-unsigned int alarm_setitimer(unsigned int seconds)
+static unsigned int alarm_setitimer(unsigned int seconds)
 {
 	struct itimerval it_new, it_old;
 
@@ -275,6 +277,17 @@ unsigned int alarm_setitimer(unsigned int seconds)
 	return it_old.it_value.tv_sec;
 }
 
+/*
+ * For backwards compatibility?  This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+SYSCALL_DEFINE1(alarm, unsigned int, seconds)
+{
+	return alarm_setitimer(seconds);
+}
+
+#endif
+
 SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
 		struct itimerval __user *, ovalue)
 {
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 88aab86a4594..42d27aa242b9 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1662,19 +1662,6 @@ void run_local_timers(void)
 	raise_softirq(TIMER_SOFTIRQ);
 }
 
-#ifdef __ARCH_WANT_SYS_ALARM
-
-/*
- * For backwards compatibility?  This can be done in libc so Alpha
- * and all newer ports shouldn't need it.
- */
-SYSCALL_DEFINE1(alarm, unsigned int, seconds)
-{
-	return alarm_setitimer(seconds);
-}
-
-#endif
-
 static void process_timeout(unsigned long __data)
 {
 	wake_up_process((struct task_struct *)__data);
-- 
cgit 


From 53d3eaa31508222e445b489f3c3ac4c63542a4ef Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Fri, 11 Nov 2016 00:10:09 -0500
Subject: posix_cpu_timers: Move the add_device_randomness() call to a proper
 place

There is no logical relation between add_device_randomness() and
posix_cpu_timers_exit(). Let's move the former to where the later
is called. This way, when posix-cpu-timers.c is compiled out, there
is no need to worry about not losing a call to add_device_randomness().

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: linux-kbuild@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Edward Cree <ecree@solarflare.com>
Link: http://lkml.kernel.org/r/1478841010-28605-6-git-send-email-nicolas.pitre@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/posix-cpu-timers.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 39008d78927a..e582f20f47a4 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -9,7 +9,6 @@
 #include <asm/uaccess.h>
 #include <linux/kernel_stat.h>
 #include <trace/events/timer.h>
-#include <linux/random.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 
@@ -447,10 +446,7 @@ static void cleanup_timers(struct list_head *head)
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
-	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-						sizeof(unsigned long long));
 	cleanup_timers(tsk->cpu_timers);
-
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-- 
cgit 


From baa73d9e478ff32d62f3f9422822b59dd9a95a21 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Fri, 11 Nov 2016 00:10:10 -0500
Subject: posix-timers: Make them configurable

Some embedded systems have no use for them.  This removes about
25KB from the kernel binary size when configured out.

Corresponding syscalls are routed to a stub logging the attempt to
use those syscalls which should be enough of a clue if they were
disabled without proper consideration. They are: timer_create,
timer_gettime: timer_getoverrun, timer_settime, timer_delete,
clock_adjtime, setitimer, getitimer, alarm.

The clock_settime, clock_gettime, clock_getres and clock_nanosleep
syscalls are replaced by simple wrappers compatible with CLOCK_REALTIME,
CLOCK_MONOTONIC and CLOCK_BOOTTIME only which should cover the vast
majority of use cases with very little code.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: linux-kbuild@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: Michal Marek <mmarek@suse.com>
Cc: Edward Cree <ecree@solarflare.com>
Link: http://lkml.kernel.org/r/1478841010-28605-7-git-send-email-nicolas.pitre@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/Makefile      |  10 +++-
 kernel/time/alarmtimer.c  |   6 ++-
 kernel/time/posix-stubs.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/timer.c       |   3 +-
 4 files changed, 137 insertions(+), 5 deletions(-)
 create mode 100644 kernel/time/posix-stubs.c

(limited to 'kernel/time')

diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 49eca0beed32..976840d29a71 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,6 +1,12 @@
-obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
+obj-y += time.o timer.o hrtimer.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
-obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
+obj-y += timeconv.o timecounter.o alarmtimer.o
+
+ifeq ($(CONFIG_POSIX_TIMERS),y)
+ obj-y += posix-timers.o posix-cpu-timers.o posix-clock.o itimer.o
+else
+ obj-y += posix-stubs.o
+endif
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= clockevents.o tick-common.o
 ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 12dd190634ab..a15caa3d1721 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -846,8 +846,10 @@ static int __init alarmtimer_init(void)
 
 	alarmtimer_rtc_timer_init();
 
-	posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
-	posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
+	if (IS_ENABLED(CONFIG_POSIX_TIMERS)) {
+		posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
+		posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
+	}
 
 	/* Initialize alarm bases */
 	alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
new file mode 100644
index 000000000000..cd6716e115e8
--- /dev/null
+++ b/kernel/time/posix-stubs.c
@@ -0,0 +1,123 @@
+/*
+ * Dummy stubs used when CONFIG_POSIX_TIMERS=n
+ *
+ * Created by:  Nicolas Pitre, July 2016
+ * Copyright:   (C) 2016 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/syscalls.h>
+#include <linux/ktime.h>
+#include <linux/timekeeping.h>
+#include <linux/posix-timers.h>
+
+asmlinkage long sys_ni_posix_timers(void)
+{
+	pr_err_once("process %d (%s) attempted a POSIX timer syscall "
+		    "while CONFIG_POSIX_TIMERS is not set\n",
+		    current->pid, current->comm);
+	return -ENOSYS;
+}
+
+#define SYS_NI(name)  SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers)
+
+SYS_NI(timer_create);
+SYS_NI(timer_gettime);
+SYS_NI(timer_getoverrun);
+SYS_NI(timer_settime);
+SYS_NI(timer_delete);
+SYS_NI(clock_adjtime);
+SYS_NI(getitimer);
+SYS_NI(setitimer);
+#ifdef __ARCH_WANT_SYS_ALARM
+SYS_NI(alarm);
+#endif
+
+/*
+ * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC
+ * as it is easy to remain compatible with little code. CLOCK_BOOTTIME
+ * is also included for convenience as at least systemd uses it.
+ */
+
+SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
+		const struct timespec __user *, tp)
+{
+	struct timespec new_tp;
+
+	if (which_clock != CLOCK_REALTIME)
+		return -EINVAL;
+	if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+		return -EFAULT;
+	return do_sys_settimeofday(&new_tp, NULL);
+}
+
+SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
+		struct timespec __user *,tp)
+{
+	struct timespec kernel_tp;
+
+	switch (which_clock) {
+	case CLOCK_REALTIME: ktime_get_real_ts(&kernel_tp); break;
+	case CLOCK_MONOTONIC: ktime_get_ts(&kernel_tp); break;
+	case CLOCK_BOOTTIME: get_monotonic_boottime(&kernel_tp); break;
+	default: return -EINVAL;
+	}
+	if (copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+		return -EFAULT;
+	return 0;
+}
+
+SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp)
+{
+	struct timespec rtn_tp = {
+		.tv_sec = 0,
+		.tv_nsec = hrtimer_resolution,
+	};
+
+	switch (which_clock) {
+	case CLOCK_REALTIME:
+	case CLOCK_MONOTONIC:
+	case CLOCK_BOOTTIME:
+		if (copy_to_user(tp, &rtn_tp, sizeof(rtn_tp)))
+			return -EFAULT;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
+		const struct timespec __user *, rqtp,
+		struct timespec __user *, rmtp)
+{
+	struct timespec t;
+
+	switch (which_clock) {
+	case CLOCK_REALTIME:
+	case CLOCK_MONOTONIC:
+	case CLOCK_BOOTTIME:
+		if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+			return -EFAULT;
+		if (!timespec_valid(&t))
+			return -EINVAL;
+		return hrtimer_nanosleep(&t, rmtp, flags & TIMER_ABSTIME ?
+					 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+					 which_clock);
+	default:
+		return -EINVAL;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+long clock_nanosleep_restart(struct restart_block *restart_block)
+{
+	return hrtimer_nanosleep_restart(restart_block);
+}
+#endif
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 42d27aa242b9..e2892e454fe3 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1601,7 +1601,8 @@ void update_process_times(int user_tick)
 		irq_work_tick();
 #endif
 	scheduler_tick();
-	run_posix_cpu_timers(p);
+	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
+		run_posix_cpu_timers(p);
 }
 
 /**
-- 
cgit 


From 948a5312f41658f7b76a598a139ef1f4dea09ca9 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 28 Nov 2016 14:35:22 -0800
Subject: timekeeping: Add a fast and NMI safe boot clock

This boot clock can be used as a tracing clock and will account for
suspend time.

To keep it NMI safe since we're accessing from tracing, we're not using a
separate timekeeper with updates to monotonic clock and boot offset
protected with seqlocks. This has the following minor side effects:

(1) Its possible that a timestamp be taken after the boot offset is updated
but before the timekeeper is updated. If this happens, the new boot offset
is added to the old timekeeping making the clock appear to update slightly
earlier:
   CPU 0                                        CPU 1
   timekeeping_inject_sleeptime64()
   __timekeeping_inject_sleeptime(tk, delta);
                                                timestamp();
   timekeeping_update(tk, TK_CLEAR_NTP...);

(2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
partially updated.  Since the tk->offs_boot update is a rare event, this
should be a rare occurrence which postprocessing should be able to handle.

Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1480372524-15181-6-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e3db43..b2286e94c934 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -425,6 +425,35 @@ u64 ktime_get_raw_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ *    CPU 0                                        CPU 1
+ *    timekeeping_inject_sleeptime64()
+ *    __timekeeping_inject_sleeptime(tk, delta);
+ *                                                 timestamp();
+ *    timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated.  Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+
+	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
-- 
cgit 


From 4a057549d6044c2dea47e80f8369a76225ec9d90 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Mon, 28 Nov 2016 14:35:21 -0800
Subject: alarmtimer: Add tracepoints for alarm timers

Alarm timers are one of the mechanisms to wake up a system from suspend,
but there exist no tracepoints to analyse which process/thread armed an
alarmtimer.

Add tracepoints for start/cancel/expire of individual alarm timers and one
for tracing the suspend time decision when to resume the system.

The following trace excerpt illustrates the new mechanism:

Binder:3292_2-3304  [000] d..2   149.981123: alarmtimer_cancel:
alarmtimer:ffffffc1319a7800 type:REALTIME
expires:1325463120000000000 now:1325376810370370245

Binder:3292_2-3304  [000] d..2   149.981136: alarmtimer_start:
alarmtimer:ffffffc1319a7800 type:REALTIME
expires:1325376840000000000 now:1325376810370384591

Binder:3292_9-3953  [000] d..2   150.212991: alarmtimer_cancel:
alarmtimer:ffffffc1319a5a00 type:BOOTTIME
expires:179552000000 now:150154008122

Binder:3292_9-3953  [000] d..2   150.213006: alarmtimer_start:
alarmtimer:ffffffc1319a5a00 type:BOOTTIME
expires:179551000000 now:150154025622

system_server-3000  [002] ...1  162.701940: alarmtimer_suspend:
alarmtimer type:REALTIME expires:1325376840000000000

The wakeup time which is selected at suspend time allows to map it back to
the task arming the timer: Binder:3292_2.

[ tglx: Store alarm timer expiry time instead of some useless RTC relative
  	information, add proper type information for wakeups which are
  	handled via the clock_nanosleep/freezer and massage the changelog. ]

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Link: http://lkml.kernel.org/r/1480372524-15181-5-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/alarmtimer.c | 53 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a15caa3d1721..9b08ca391aed 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,9 @@
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/alarmtimer.h>
+
 /**
  * struct alarm_base - Alarm timer bases
  * @lock:		Lock for syncrhonized access to the base
@@ -40,7 +43,9 @@ static struct alarm_base {
 	clockid_t		base_clockid;
 } alarm_bases[ALARM_NUMTYPE];
 
-/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */
+/* freezer information to handle clock_nanosleep triggered wakeups */
+static enum alarmtimer_type freezer_alarmtype;
+static ktime_t freezer_expires;
 static ktime_t freezer_delta;
 static DEFINE_SPINLOCK(freezer_delta_lock);
 
@@ -194,6 +199,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
 	}
 	spin_unlock_irqrestore(&base->lock, flags);
 
+	trace_alarmtimer_fired(alarm, base->gettime());
 	return ret;
 
 }
@@ -218,15 +224,16 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
  */
 static int alarmtimer_suspend(struct device *dev)
 {
-	struct rtc_time tm;
-	ktime_t min, now;
-	unsigned long flags;
+	ktime_t min, now, expires;
+	int i, ret, type;
 	struct rtc_device *rtc;
-	int i;
-	int ret;
+	unsigned long flags;
+	struct rtc_time tm;
 
 	spin_lock_irqsave(&freezer_delta_lock, flags);
 	min = freezer_delta;
+	expires = freezer_expires;
+	type = freezer_alarmtype;
 	freezer_delta = ktime_set(0, 0);
 	spin_unlock_irqrestore(&freezer_delta_lock, flags);
 
@@ -247,8 +254,11 @@ static int alarmtimer_suspend(struct device *dev)
 		if (!next)
 			continue;
 		delta = ktime_sub(next->expires, base->gettime());
-		if (!min.tv64 || (delta.tv64 < min.tv64))
+		if (!min.tv64 || (delta.tv64 < min.tv64)) {
+			expires = next->expires;
 			min = delta;
+			type = i;
+		}
 	}
 	if (min.tv64 == 0)
 		return 0;
@@ -258,6 +268,8 @@ static int alarmtimer_suspend(struct device *dev)
 		return -EBUSY;
 	}
 
+	trace_alarmtimer_suspend(expires, type);
+
 	/* Setup an rtc timer to fire that far in the future */
 	rtc_timer_cancel(rtc, &rtctimer);
 	rtc_read_time(rtc, &tm);
@@ -295,15 +307,32 @@ static int alarmtimer_resume(struct device *dev)
 
 static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
 {
-	ktime_t delta;
+	struct alarm_base *base;
 	unsigned long flags;
-	struct alarm_base *base = &alarm_bases[type];
+	ktime_t delta;
+
+	switch(type) {
+	case ALARM_REALTIME:
+		base = &alarm_bases[ALARM_REALTIME];
+		type = ALARM_REALTIME_FREEZER;
+		break;
+	case ALARM_BOOTTIME:
+		base = &alarm_bases[ALARM_BOOTTIME];
+		type = ALARM_BOOTTIME_FREEZER;
+		break;
+	default:
+		WARN_ONCE(1, "Invalid alarm type: %d\n", type);
+		return;
+	}
 
 	delta = ktime_sub(absexp, base->gettime());
 
 	spin_lock_irqsave(&freezer_delta_lock, flags);
-	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64))
+	if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) {
 		freezer_delta = delta;
+		freezer_expires = absexp;
+		freezer_alarmtype = type;
+	}
 	spin_unlock_irqrestore(&freezer_delta_lock, flags);
 }
 
@@ -342,6 +371,8 @@ void alarm_start(struct alarm *alarm, ktime_t start)
 	alarmtimer_enqueue(base, alarm);
 	hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
 	spin_unlock_irqrestore(&base->lock, flags);
+
+	trace_alarmtimer_start(alarm, base->gettime());
 }
 EXPORT_SYMBOL_GPL(alarm_start);
 
@@ -390,6 +421,8 @@ int alarm_try_to_cancel(struct alarm *alarm)
 	if (ret >= 0)
 		alarmtimer_dequeue(base, alarm);
 	spin_unlock_irqrestore(&base->lock, flags);
+
+	trace_alarmtimer_cancel(alarm, base->gettime());
 	return ret;
 }
 EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
-- 
cgit 


From 9c1645727b8fa90d07256fdfcc45bf831242a3ab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 8 Dec 2016 20:49:32 +0000
Subject: timekeeping_Force_unsigned_clocksource_to_nanoseconds_conversion

The clocksource delta to nanoseconds conversion is using signed math, but
the delta is unsigned. This makes the conversion space smaller than
necessary and in case of a multiplication overflow the conversion can
become negative. The conversion is done with scaled math:

    s64 nsec_delta = ((s64)clkdelta * clk->mult) >> clk->shift;

Shifting a signed integer right obvioulsy preserves the sign, which has
interesting consequences:

 - Time jumps backwards

 - __iter_div_u64_rem() which is used in one of the calling code pathes
   will take forever to piecewise calculate the seconds/nanoseconds part.

This has been reported by several people with different scenarios:

David observed that when stopping a VM with a debugger:

 "It was essentially the stopped by debugger case.  I forget exactly why,
  but the guest was being explicitly stopped from outside, it wasn't just
  scheduling lag.  I think it was something in the vicinity of 10 minutes
  stopped."

 When lifting the stop the machine went dead.

The stopped by debugger case is not really interesting, but nevertheless it
would be a good thing not to die completely.

But this was also observed on a live system by Liav:

 "When the OS is too overloaded, delta will get a high enough value for the
  msb of the sum delta * tkr->mult + tkr->xtime_nsec to be set, and so
  after the shift the nsec variable will gain a value similar to
  0xffffffffff000000."

Unfortunately this has been reintroduced recently with commit 6bd58f09e1d8
("time: Add cycles to nanoseconds translation"). It had been fixed a year
ago already in commit 35a4933a8959 ("time: Avoid signed overflow in
timekeeping_get_ns()").

Though it's not surprising that the issue has been reintroduced because the
function itself and the whole call chain uses s64 for the result and the
propagation of it. The change in this recent commit is subtle:

   s64 nsec;

-  nsec = (d * m + n) >> s:
+  nsec = d * m + n;
+  nsec >>= s;

d being type of cycle_t adds another level of obfuscation.

This wouldn't have happened if the previous change to unsigned computation
would have made the 'nsec' variable u64 right away and a follow up patch
had cleaned up the whole call chain.

There have been patches submitted which basically did a revert of the above
patch leaving everything else unchanged as signed. Back to square one. This
spawned a admittedly pointless discussion about potential users which rely
on the unsigned behaviour until someone pointed out that it had been fixed
before. The changelogs of said patches added further confusion as they made
finally false claims about the consequences for eventual users which expect
signed results.

Despite delta being cycle_t, aka. u64, it's very well possible to hand in
a signed negative value and the signed computation will happily return the
correct result. But nobody actually sat down and analyzed the code which
was added as user after the propably unintended signed conversion.

Though in sensitive code like this it's better to analyze it proper and
make sure that nothing relies on this than hunting the subtle wreckage half
a year later. After analyzing all call chains it stands that no caller can
hand in a negative value (which actually would work due to the s64 cast)
and rely on the signed math to do the right thing.

Change the conversion function to unsigned math. The conversion of all call
chains is done in a follow up patch.

This solves the starvation issue, which was caused by the negative result,
but it does not solve the underlying problem. It merily procrastinates
it. When the timekeeper update is deferred long enough that the unsigned
multiplication overflows, then time going backwards is observable again.

It does neither solve the issue of clocksources with a small counter width
which will wrap around possibly several times and cause random time stamps
to be generated. But those are usually not found on systems used for
virtualization, so this is likely a non issue.

I took the liberty to claim authorship for this simply because
analyzing all callsites and writing the changelog took substantially
more time than just making the simple s/s64/u64/ change and ignore the
rest.

Fixes: 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation")
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Reported-by: Liav Rehana <liavr@mellanox.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Parit Bhargava <prarit@redhat.com>
Cc: Laurent Vivier <lvivier@redhat.com>
Cc: "Christopher S. Hall" <christopher.s.hall@intel.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20161208204228.688545601@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b2286e94c934..bfe589e929e8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -299,10 +299,10 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
 					  cycle_t delta)
 {
-	s64 nsec;
+	u64 nsec;
 
 	nsec = delta * tkr->mult + tkr->xtime_nsec;
 	nsec >>= tkr->shift;
-- 
cgit 


From acc89612a70e370a5640fd77a83f15b7b94d85e4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 8 Dec 2016 20:49:34 +0000
Subject: timekeeping: Make the conversion call chain consistently unsigned

Propagating a unsigned value through signed variables and functions makes
absolutely no sense and is just prone to (re)introduce subtle signed
vs. unsigned issues as happened recently.

Clean it up.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Parit Bhargava <prarit@redhat.com>
Cc: Laurent Vivier <lvivier@redhat.com>
Cc: "Christopher S. Hall" <christopher.s.hall@intel.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Liav Rehana <liavr@mellanox.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20161208204228.765843099@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index bfe589e929e8..5244821643a4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -311,7 +311,7 @@ static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
 	return nsec + arch_gettimeoffset();
 }
 
-static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
 	cycle_t delta;
 
@@ -319,8 +319,8 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 	return timekeeping_delta_to_ns(tkr, delta);
 }
 
-static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
-					    cycle_t cycles)
+static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
+					   cycle_t cycles)
 {
 	cycle_t delta;
 
@@ -652,7 +652,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 {
 	struct clocksource *clock = tk->tkr_mono.clock;
 	cycle_t cycle_now, delta;
-	s64 nsec;
+	u64 nsec;
 
 	cycle_now = tk->tkr_mono.read(clock);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
@@ -681,7 +681,7 @@ int __getnstimeofday64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
-	s64 nsecs = 0;
+	u64 nsecs;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
@@ -721,7 +721,7 @@ ktime_t ktime_get(void)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	ktime_t base;
-	s64 nsecs;
+	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
 
@@ -764,7 +764,7 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	ktime_t base, *offset = offsets[offs];
-	s64 nsecs;
+	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
 
@@ -808,7 +808,7 @@ ktime_t ktime_get_raw(void)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	ktime_t base;
-	s64 nsecs;
+	u64 nsecs;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
@@ -833,8 +833,8 @@ void ktime_get_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 tomono;
-	s64 nsec;
 	unsigned int seq;
+	u64 nsec;
 
 	WARN_ON(timekeeping_suspended);
 
@@ -922,8 +922,8 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 	unsigned long seq;
 	ktime_t base_raw;
 	ktime_t base_real;
-	s64 nsec_raw;
-	s64 nsec_real;
+	u64 nsec_raw;
+	u64 nsec_real;
 	cycle_t now;
 
 	WARN_ON_ONCE(timekeeping_suspended);
@@ -1081,7 +1081,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 	cycle_t cycles, now, interval_start;
 	unsigned int clock_was_set_seq = 0;
 	ktime_t base_real, base_raw;
-	s64 nsec_real, nsec_raw;
+	u64 nsec_real, nsec_raw;
 	u8 cs_was_changed_seq;
 	unsigned long seq;
 	bool do_interp;
@@ -1394,7 +1394,7 @@ void getrawmonotonic64(struct timespec64 *ts)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts64;
 	unsigned long seq;
-	s64 nsecs;
+	u64 nsecs;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-- 
cgit 


From cbd99e3b289e43000c29aa4aa9b94b394cdc68bd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 8 Dec 2016 20:49:36 +0000
Subject: timekeeping: Get rid of pointless typecasts

cycle_t is defined as u64, so casting it to u64 is a pointless and
confusing exercise. cycle_t should simply go away and be replaced with a
plain u64 to avoid further confusion.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Parit Bhargava <prarit@redhat.com>
Cc: Laurent Vivier <lvivier@redhat.com>
Cc: "Christopher S. Hall" <christopher.s.hall@intel.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Liav Rehana <liavr@mellanox.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20161208204228.844699737@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5244821643a4..82e1b5cbebbb 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -258,10 +258,9 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	tk->cycle_interval = interval;
 
 	/* Go back from cycles -> shifted ns */
-	tk->xtime_interval = (u64) interval * clock->mult;
+	tk->xtime_interval = interval * clock->mult;
 	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-	tk->raw_interval =
-		((u64) interval * clock->mult) >> clock->shift;
+	tk->raw_interval = (interval * clock->mult) >> clock->shift;
 
 	 /* if changing clocks, convert xtime_nsec shift units */
 	if (old_clock) {
-- 
cgit 


From c029a2bec66e42e57538cb65e28618baf6a4b311 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 8 Dec 2016 20:49:38 +0000
Subject: timekeeping: Use mul_u64_u32_shr() instead of open coding it

The resume code must deal with a clocksource delta which is potentially big
enough to overflow the 64bit mult.

Replace the open coded handling with the proper function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Parit Bhargava <prarit@redhat.com>
Cc: Laurent Vivier <lvivier@redhat.com>
Cc: "Christopher S. Hall" <christopher.s.hall@intel.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Liav Rehana <liavr@mellanox.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/20161208204228.921674404@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timekeeping.c | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 82e1b5cbebbb..da233cdf89b0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1644,7 +1644,7 @@ void timekeeping_resume(void)
 	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
-	cycle_t cycle_now, cycle_delta;
+	cycle_t cycle_now;
 
 	sleeptime_injected = false;
 	read_persistent_clock64(&ts_new);
@@ -1670,27 +1670,11 @@ void timekeeping_resume(void)
 	cycle_now = tk->tkr_mono.read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->tkr_mono.cycle_last) {
-		u64 num, max = ULLONG_MAX;
-		u32 mult = clock->mult;
-		u32 shift = clock->shift;
-		s64 nsec = 0;
-
-		cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
-						tk->tkr_mono.mask);
-
-		/*
-		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
-		 * suspended time is too long. In that case we need do the
-		 * 64 bits math carefully
-		 */
-		do_div(max, mult);
-		if (cycle_delta > max) {
-			num = div64_u64(cycle_delta, max);
-			nsec = (((u64) max * mult) >> shift) * num;
-			cycle_delta -= num * max;
-		}
-		nsec += ((u64) cycle_delta * mult) >> shift;
+		u64 nsec, cyc_delta;
 
+		cyc_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+					      tk->tkr_mono.mask);
+		nsec = mul_u64_u32_shr(cyc_delta, clock->mult, clock->shift);
 		ts_delta = ns_to_timespec64(nsec);
 		sleeptime_injected = true;
 	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
-- 
cgit