From 60a9ce57e7c5ac1df3a39fb941022bbfa40c0862 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 29 Jun 2017 19:15:09 +0200
Subject: sched/cputime: Rename vtime fields

The current "snapshot" based naming on vtime fields suggests we record
some past event but that's a low level picture of their actual purpose
which comes out blurry. The real point of these fields is to run a basic
state machine that tracks down cputime entry while switching between
contexts.

So lets reflect that with more meaningful names.

Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c4ca7433d9d..ff001646549e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -689,7 +689,7 @@ struct task_struct {
 	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 	seqcount_t			vtime_seqcount;
-	unsigned long long		vtime_snap;
+	unsigned long long		vtime_starttime;
 	enum {
 		/* Task is sleeping or running in a CPU with VTIME inactive: */
 		VTIME_INACTIVE = 0,
@@ -697,7 +697,7 @@ struct task_struct {
 		VTIME_USER,
 		/* Task runs in kernelspace in a CPU with VTIME active: */
 		VTIME_SYS,
-	} vtime_snap_whence;
+	} vtime_state;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-- 
cgit 


From bac5b6b6b11560f323e71d0ebac4061cfe5f56c0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 29 Jun 2017 19:15:10 +0200
Subject: sched/cputime: Move the vtime task fields to their own struct

We are about to add vtime accumulation fields to the task struct. Let's
avoid more bloatification and gather vtime information to their own
struct.

Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-5-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff001646549e..eeff8a024f0c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,6 +223,21 @@ struct task_cputime {
 #define prof_exp			stime
 #define sched_exp			sum_exec_runtime
 
+enum vtime_state {
+	/* Task is sleeping or running in a CPU with VTIME inactive: */
+	VTIME_INACTIVE = 0,
+	/* Task runs in userspace in a CPU with VTIME active: */
+	VTIME_USER,
+	/* Task runs in kernelspace in a CPU with VTIME active: */
+	VTIME_SYS,
+};
+
+struct vtime {
+	seqcount_t		seqcount;
+	unsigned long long	starttime;
+	enum vtime_state	state;
+};
+
 struct sched_info {
 #ifdef CONFIG_SCHED_INFO
 	/* Cumulative counters: */
@@ -688,16 +703,7 @@ struct task_struct {
 	u64				gtime;
 	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqcount_t			vtime_seqcount;
-	unsigned long long		vtime_starttime;
-	enum {
-		/* Task is sleeping or running in a CPU with VTIME inactive: */
-		VTIME_INACTIVE = 0,
-		/* Task runs in userspace in a CPU with VTIME active: */
-		VTIME_USER,
-		/* Task runs in kernelspace in a CPU with VTIME active: */
-		VTIME_SYS,
-	} vtime_state;
+	struct vtime			vtime;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-- 
cgit 


From 2a42eb9594a1480b4ead9e036e06ee1290e5fa6d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 29 Jun 2017 19:15:11 +0200
Subject: sched/cputime: Accumulate vtime on top of nsec clocksource

Currently the cputime source used by vtime is jiffies. When we cross
a context boundary and jiffies have changed since the last snapshot, the
pending cputime is accounted to the switching out context.

This system works ok if the ticks are not aligned across CPUs. If they
instead are aligned (ie: all fire at the same time) and the CPUs run in
userspace, the jiffies change is only observed on tick exit and therefore
the user cputime is accounted as system cputime. This is because the
CPU that maintains timekeeping fires its tick at the same time as the
others. It updates jiffies in the middle of the tick and the other CPUs
see that update on IRQ exit:

    CPU 0 (timekeeper)                  CPU 1
    -------------------              -------------
                      jiffies = N
    ...                              run in userspace for a jiffy
    tick entry                       tick entry (sees jiffies = N)
    set jiffies = N + 1
    tick exit                        tick exit (sees jiffies = N + 1)
                                                account 1 jiffy as stime

Fix this with using a nanosec clock source instead of jiffies. The
cputime is then accumulated and flushed everytime the pending delta
reaches a jiffy in order to mitigate the accounting overhead.

[ fweisbec: changelog, rebase on struct vtime, field renames, add delta
  on cputime readers, keep idle vtime as-is (low overhead accounting),
  harmonize clock sources. ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Luiz Capitulino <lcapitulino@redhat.com>
Tested-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1498756511-11714-6-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux/sched.h')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eeff8a024f0c..4818126c5153 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -236,6 +236,9 @@ struct vtime {
 	seqcount_t		seqcount;
 	unsigned long long	starttime;
 	enum vtime_state	state;
+	u64			utime;
+	u64			stime;
+	u64			gtime;
 };
 
 struct sched_info {
-- 
cgit