Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: - MAINTAINERS: Add Mark Rutland as perf submaintainer, Juri Lelli and Vincent Guittot as scheduler submaintainers. Add Dietmar Eggemann, Steven Rostedt, Ben Segall and Mel Gorman as scheduler reviewers. As perf and the scheduler is getting bigger and more complex, document the status quo of current responsibilities and interests, and spread the review pain^H^H^H^H fun via an increase in the Cc: linecount generated by scripts/get_maintainer.pl. :-) - Add another series of patches that brings the -rt (PREEMPT_RT) tree closer to mainline: split the monolithic CONFIG_PREEMPT dependencies into a new CONFIG_PREEMPTION category that will allow the eventual introduction of CONFIG_PREEMPT_RT. Still a few more hundred patches to go though. - Extend the CPU cgroup controller with uclamp.min and uclamp.max to allow the finer shaping of CPU bandwidth usage. - Micro-optimize energy-aware wake-ups from O(CPUS^2) to O(CPUS). - Improve the behavior of high CPU count, high thread count applications running under cpu.cfs_quota_us constraints. - Improve balancing with SCHED_IDLE (SCHED_BATCH) tasks present. - Improve CPU isolation housekeeping CPU allocation NUMA locality. - Fix deadline scheduler bandwidth calculations and logic when cpusets rebuilds the topology, or when it gets deadline-throttled while it's being offlined. - Convert the cpuset_mutex to percpu_rwsem, to allow it to be used from setscheduler() system calls without creating global serialization. Add new synchronization between cpuset topology-changing events and the deadline acceptance tests in setscheduler(), which were broken before. - Rework the active_mm state machine to be less confusing and more optimal. - Rework (simplify) the pick_next_task() slowpath. - Improve load-balancing on AMD EPYC systems. - ... and misc cleanups, smaller fixes and improvements - please see the Git log for more details. * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (53 commits) sched/psi: Correct overly pessimistic size calculation sched/fair: Speed-up energy-aware wake-ups sched/uclamp: Always use 'enum uclamp_id' for clamp_id values sched/uclamp: Update CPU's refcount on TG's clamp changes sched/uclamp: Use TG's clamps to restrict TASK's clamps sched/uclamp: Propagate system defaults to the root group sched/uclamp: Propagate parent clamps sched/uclamp: Extend CPU's cgroup controller sched/topology: Improve load balancing on AMD EPYC systems arch, ia64: Make NUMA select SMP sched, perf: MAINTAINERS update, add submaintainers and reviewers sched/fair: Use rq_lock/unlock in online_fair_sched_group cpufreq: schedutil: fix equation in comment sched: Rework pick_next_task() slow-path sched: Allow put_prev_task() to drop rq->lock sched/fair: Expose newidle_balance() sched: Add task_struct pointer to sched_class::set_curr_task sched: Rework CPU hotplug task selection sched/{rt,deadline}: Fix set_next_task vs pick_next_task sched: Fix kerneldoc comment for ia64_set_curr_task ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-09-16 17:25:49 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-09-16 17:25:49 -0700
commit: 7e67a859997aad47727aff9c5a32e160da079ce3 (patch)
tree: 96f53425c2834de5b3276d7598782ab6412e4d5e /kernel/sched/sched.h
parent: 772c1d06bd402f7ee72c61a18c2db74cd74b6758 (diff)
parent: 563c4f85f9f0d63b712081d5b4522152cdcb8b6b (diff)
1 files changed, 39 insertions, 24 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 802b1f3405f2..b3cb895d14a2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -335,8 +335,6 @@ struct cfs_bandwidth {
 	u64			quota;
 	u64			runtime;
 	s64			hierarchical_quota;
-	u64			runtime_expires;
-	int			expires_seq;
 
 	u8			idle;
 	u8			period_active;
@@ -393,6 +391,16 @@ struct task_group {
 #endif
 
 	struct cfs_bandwidth	cfs_bandwidth;
+
+#ifdef CONFIG_UCLAMP_TASK_GROUP
+	/* The two decimal precision [%] value requested from user-space */
+	unsigned int		uclamp_pct[UCLAMP_CNT];
+	/* Clamp values requested for a task group */
+	struct uclamp_se	uclamp_req[UCLAMP_CNT];
+	/* Effective clamp values used for a task group */
+	struct uclamp_se	uclamp[UCLAMP_CNT];
+#endif
+
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -483,7 +491,8 @@ struct cfs_rq {
 	struct load_weight	load;
 	unsigned long		runnable_weight;
 	unsigned int		nr_running;
-	unsigned int		h_nr_running;
+	unsigned int		h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
+	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
 	u64			exec_clock;
 	u64			min_vruntime;
@@ -556,8 +565,6 @@ struct cfs_rq {
 
 #ifdef CONFIG_CFS_BANDWIDTH
 	int			runtime_enabled;
-	int			expires_seq;
-	u64			runtime_expires;
 	s64			runtime_remaining;
 
 	u64			throttled_clock;
@@ -777,9 +784,6 @@ struct root_domain {
 	struct perf_domain __rcu *pd;
 };
 
-extern struct root_domain def_root_domain;
-extern struct mutex sched_domains_mutex;
-
 extern void init_defrootdomain(void);
 extern int sched_init_domains(const struct cpumask *cpu_map);
 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
@@ -1261,16 +1265,18 @@ enum numa_topology_type {
 extern enum numa_topology_type sched_numa_topology_type;
 extern int sched_max_numa_distance;
 extern bool find_numa_distance(int distance);
-#endif
-
-#ifdef CONFIG_NUMA
 extern void sched_init_numa(void);
 extern void sched_domains_numa_masks_set(unsigned int cpu);
 extern void sched_domains_numa_masks_clear(unsigned int cpu);
+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
 #else
 static inline void sched_init_numa(void) { }
 static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
 static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+	return nr_cpu_ids;
+}
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -1449,10 +1455,14 @@ static inline void unregister_sched_domain_sysctl(void)
 }
 #endif
 
+extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+
 #else
 
 static inline void sched_ttwu_pending(void) { }
 
+static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
+
 #endif /* CONFIG_SMP */
 
 #include "stats.h"
@@ -1700,17 +1710,21 @@ struct sched_class {
 	void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
 
 	/*
-	 * It is the responsibility of the pick_next_task() method that will
-	 * return the next task to call put_prev_task() on the @prev task or
-	 * something equivalent.
+	 * Both @prev and @rf are optional and may be NULL, in which case the
+	 * caller must already have invoked put_prev_task(rq, prev, rf).
+	 *
+	 * Otherwise it is the responsibility of the pick_next_task() to call
+	 * put_prev_task() on the @prev task or something equivalent, IFF it
+	 * returns a next task.
 	 *
-	 * May return RETRY_TASK when it finds a higher prio class has runnable
-	 * tasks.
+	 * In that case (@rf != NULL) it may return RETRY_TASK when it finds a
+	 * higher prio class has runnable tasks.
 	 */
 	struct task_struct * (*pick_next_task)(struct rq *rq,
 					       struct task_struct *prev,
 					       struct rq_flags *rf);
-	void (*put_prev_task)(struct rq *rq, struct task_struct *p);
+	void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
+	void (*set_next_task)(struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
@@ -1725,7 +1739,6 @@ struct sched_class {
 	void (*rq_offline)(struct rq *rq);
 #endif
 
-	void (*set_curr_task)(struct rq *rq);
 	void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
 	void (*task_fork)(struct task_struct *p);
 	void (*task_dead)(struct task_struct *p);
@@ -1755,12 +1768,14 @@ struct sched_class {
 
 static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	prev->sched_class->put_prev_task(rq, prev);
+	WARN_ON_ONCE(rq->curr != prev);
+	prev->sched_class->put_prev_task(rq, prev, NULL);
 }
 
-static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
+static inline void set_next_task(struct rq *rq, struct task_struct *next)
 {
-	curr->sched_class->set_curr_task(rq);
+	WARN_ON_ONCE(rq->curr != next);
+	next->sched_class->set_next_task(rq, next);
 }
 
 #ifdef CONFIG_SMP
@@ -1943,7 +1958,7 @@ unsigned long arch_scale_freq_capacity(int cpu)
 #endif
 
 #ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
 static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
@@ -1995,7 +2010,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	return ret;
 }
 
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 /*
  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
@@ -2266,7 +2281,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef CONFIG_UCLAMP_TASK
-unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id);
+enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
 
 static __always_inline
 unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-09-16 17:25:49 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-09-16 17:25:49 -0700
commit	7e67a859997aad47727aff9c5a32e160da079ce3 (patch)
tree	96f53425c2834de5b3276d7598782ab6412e4d5e /kernel/sched/sched.h
parent	772c1d06bd402f7ee72c61a18c2db74cd74b6758 (diff)
parent	563c4f85f9f0d63b712081d5b4522152cdcb8b6b (diff)