diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 17:25:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 17:25:49 -0700 |
commit | 7e67a859997aad47727aff9c5a32e160da079ce3 (patch) | |
tree | 96f53425c2834de5b3276d7598782ab6412e4d5e /kernel/sched/sched.h | |
parent | 772c1d06bd402f7ee72c61a18c2db74cd74b6758 (diff) | |
parent | 563c4f85f9f0d63b712081d5b4522152cdcb8b6b (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- MAINTAINERS: Add Mark Rutland as perf submaintainer, Juri Lelli and
Vincent Guittot as scheduler submaintainers. Add Dietmar Eggemann,
Steven Rostedt, Ben Segall and Mel Gorman as scheduler reviewers.
As perf and the scheduler is getting bigger and more complex,
document the status quo of current responsibilities and interests,
and spread the review pain^H^H^H^H fun via an increase in the Cc:
linecount generated by scripts/get_maintainer.pl. :-)
- Add another series of patches that brings the -rt (PREEMPT_RT) tree
closer to mainline: split the monolithic CONFIG_PREEMPT dependencies
into a new CONFIG_PREEMPTION category that will allow the eventual
introduction of CONFIG_PREEMPT_RT. Still a few more hundred patches
to go though.
- Extend the CPU cgroup controller with uclamp.min and uclamp.max to
allow the finer shaping of CPU bandwidth usage.
- Micro-optimize energy-aware wake-ups from O(CPUS^2) to O(CPUS).
- Improve the behavior of high CPU count, high thread count
applications running under cpu.cfs_quota_us constraints.
- Improve balancing with SCHED_IDLE (SCHED_BATCH) tasks present.
- Improve CPU isolation housekeeping CPU allocation NUMA locality.
- Fix deadline scheduler bandwidth calculations and logic when cpusets
rebuilds the topology, or when it gets deadline-throttled while it's
being offlined.
- Convert the cpuset_mutex to percpu_rwsem, to allow it to be used from
setscheduler() system calls without creating global serialization.
Add new synchronization between cpuset topology-changing events and
the deadline acceptance tests in setscheduler(), which were broken
before.
- Rework the active_mm state machine to be less confusing and more
optimal.
- Rework (simplify) the pick_next_task() slowpath.
- Improve load-balancing on AMD EPYC systems.
- ... and misc cleanups, smaller fixes and improvements - please see
the Git log for more details.
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (53 commits)
sched/psi: Correct overly pessimistic size calculation
sched/fair: Speed-up energy-aware wake-ups
sched/uclamp: Always use 'enum uclamp_id' for clamp_id values
sched/uclamp: Update CPU's refcount on TG's clamp changes
sched/uclamp: Use TG's clamps to restrict TASK's clamps
sched/uclamp: Propagate system defaults to the root group
sched/uclamp: Propagate parent clamps
sched/uclamp: Extend CPU's cgroup controller
sched/topology: Improve load balancing on AMD EPYC systems
arch, ia64: Make NUMA select SMP
sched, perf: MAINTAINERS update, add submaintainers and reviewers
sched/fair: Use rq_lock/unlock in online_fair_sched_group
cpufreq: schedutil: fix equation in comment
sched: Rework pick_next_task() slow-path
sched: Allow put_prev_task() to drop rq->lock
sched/fair: Expose newidle_balance()
sched: Add task_struct pointer to sched_class::set_curr_task
sched: Rework CPU hotplug task selection
sched/{rt,deadline}: Fix set_next_task vs pick_next_task
sched: Fix kerneldoc comment for ia64_set_curr_task
...
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 63 |
1 files changed, 39 insertions, 24 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 802b1f3405f2..b3cb895d14a2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -335,8 +335,6 @@ struct cfs_bandwidth { u64 quota; u64 runtime; s64 hierarchical_quota; - u64 runtime_expires; - int expires_seq; u8 idle; u8 period_active; @@ -393,6 +391,16 @@ struct task_group { #endif struct cfs_bandwidth cfs_bandwidth; + +#ifdef CONFIG_UCLAMP_TASK_GROUP + /* The two decimal precision [%] value requested from user-space */ + unsigned int uclamp_pct[UCLAMP_CNT]; + /* Clamp values requested for a task group */ + struct uclamp_se uclamp_req[UCLAMP_CNT]; + /* Effective clamp values used for a task group */ + struct uclamp_se uclamp[UCLAMP_CNT]; +#endif + }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -483,7 +491,8 @@ struct cfs_rq { struct load_weight load; unsigned long runnable_weight; unsigned int nr_running; - unsigned int h_nr_running; + unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ + unsigned int idle_h_nr_running; /* SCHED_IDLE */ u64 exec_clock; u64 min_vruntime; @@ -556,8 +565,6 @@ struct cfs_rq { #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; - int expires_seq; - u64 runtime_expires; s64 runtime_remaining; u64 throttled_clock; @@ -777,9 +784,6 @@ struct root_domain { struct perf_domain __rcu *pd; }; -extern struct root_domain def_root_domain; -extern struct mutex sched_domains_mutex; - extern void init_defrootdomain(void); extern int sched_init_domains(const struct cpumask *cpu_map); extern void rq_attach_root(struct rq *rq, struct root_domain *rd); @@ -1261,16 +1265,18 @@ enum numa_topology_type { extern enum numa_topology_type sched_numa_topology_type; extern int sched_max_numa_distance; extern bool find_numa_distance(int distance); -#endif - -#ifdef CONFIG_NUMA extern void sched_init_numa(void); extern void sched_domains_numa_masks_set(unsigned int cpu); extern void sched_domains_numa_masks_clear(unsigned int cpu); +extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); #else static inline void sched_init_numa(void) { } static inline void sched_domains_numa_masks_set(unsigned int cpu) { } static inline void sched_domains_numa_masks_clear(unsigned int cpu) { } +static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) +{ + return nr_cpu_ids; +} #endif #ifdef CONFIG_NUMA_BALANCING @@ -1449,10 +1455,14 @@ static inline void unregister_sched_domain_sysctl(void) } #endif +extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf); + #else static inline void sched_ttwu_pending(void) { } +static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; } + #endif /* CONFIG_SMP */ #include "stats.h" @@ -1700,17 +1710,21 @@ struct sched_class { void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags); /* - * It is the responsibility of the pick_next_task() method that will - * return the next task to call put_prev_task() on the @prev task or - * something equivalent. + * Both @prev and @rf are optional and may be NULL, in which case the + * caller must already have invoked put_prev_task(rq, prev, rf). + * + * Otherwise it is the responsibility of the pick_next_task() to call + * put_prev_task() on the @prev task or something equivalent, IFF it + * returns a next task. * - * May return RETRY_TASK when it finds a higher prio class has runnable - * tasks. + * In that case (@rf != NULL) it may return RETRY_TASK when it finds a + * higher prio class has runnable tasks. */ struct task_struct * (*pick_next_task)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); - void (*put_prev_task)(struct rq *rq, struct task_struct *p); + void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf); + void (*set_next_task)(struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); @@ -1725,7 +1739,6 @@ struct sched_class { void (*rq_offline)(struct rq *rq); #endif - void (*set_curr_task)(struct rq *rq); void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); void (*task_fork)(struct task_struct *p); void (*task_dead)(struct task_struct *p); @@ -1755,12 +1768,14 @@ struct sched_class { static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { - prev->sched_class->put_prev_task(rq, prev); + WARN_ON_ONCE(rq->curr != prev); + prev->sched_class->put_prev_task(rq, prev, NULL); } -static inline void set_curr_task(struct rq *rq, struct task_struct *curr) +static inline void set_next_task(struct rq *rq, struct task_struct *next) { - curr->sched_class->set_curr_task(rq); + WARN_ON_ONCE(rq->curr != next); + next->sched_class->set_next_task(rq, next); } #ifdef CONFIG_SMP @@ -1943,7 +1958,7 @@ unsigned long arch_scale_freq_capacity(int cpu) #endif #ifdef CONFIG_SMP -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION static inline void double_rq_lock(struct rq *rq1, struct rq *rq2); @@ -1995,7 +2010,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) return ret; } -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ /* * double_lock_balance - lock the busiest runqueue, this_rq is locked already. @@ -2266,7 +2281,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef CONFIG_UCLAMP_TASK -unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id); +enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); static __always_inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util, |