summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-03 18:03:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-03 18:03:50 -0800
commit53528695ff6d8b77011bc818407c13e30914a946 (patch)
tree04acd099c5759bf6f1d728c5415f574d572c6872 /include
parentb831ef2cad979912850e34f82415c0c5d59de8cb (diff)
parente73e85f0593832aa583b252f9a16cf90ed6d30fa (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: "The main changes in this cycle were: - sched/fair load tracking fixes and cleanups (Byungchul Park) - Make load tracking frequency scale invariant (Dietmar Eggemann) - sched/deadline updates (Juri Lelli) - stop machine fixes, cleanups and enhancements for bugs triggered by CPU hotplug stress testing (Oleg Nesterov) - scheduler preemption code rework: remove PREEMPT_ACTIVE and related cleanups (Peter Zijlstra) - Rework the sched_info::run_delay code to fix races (Peter Zijlstra) - Optimize per entity utilization tracking (Peter Zijlstra) - ... misc other fixes, cleanups and smaller updates" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (57 commits) sched: Don't scan all-offline ->cpus_allowed twice if !CONFIG_CPUSETS sched: Move cpu_active() tests from stop_two_cpus() into migrate_swap_stop() sched: Start stopper early stop_machine: Kill cpu_stop_threads->setup() and cpu_stop_unpark() stop_machine: Kill smp_hotplug_thread->pre_unpark, introduce stop_machine_unpark() stop_machine: Change cpu_stop_queue_two_works() to rely on stopper->enabled stop_machine: Introduce __cpu_stop_queue_work() and cpu_stop_queue_two_works() stop_machine: Ensure that a queued callback will be called before cpu_stop_park() sched/x86: Fix typo in __switch_to() comments sched/core: Remove a parameter in the migrate_task_rq() function sched/core: Drop unlikely behind BUG_ON() sched/core: Fix task and run queue sched_info::run_delay inconsistencies sched/numa: Fix task_tick_fair() from disabling numa_balancing sched/core: Add preempt_count invariant check sched/core: More notrace annotations sched/core: Kill PREEMPT_ACTIVE sched/core, sched/x86: Kill thread_info::saved_preempt_count sched/core: Simplify preempt_count tests sched/core: Robustify preemption leak checks sched/core: Stop setting PREEMPT_ACTIVE ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/preempt.h2
-rw-r--r--include/linux/preempt.h20
-rw-r--r--include/linux/sched.h36
-rw-r--r--include/linux/sched/deadline.h5
-rw-r--r--include/linux/smpboot.h4
-rw-r--r--include/linux/stop_machine.h2
-rw-r--r--include/trace/events/sched.h22
7 files changed, 38 insertions, 53 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index 0bec580a4885..5d8ffa3e6f8c 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -24,7 +24,7 @@ static __always_inline void preempt_count_set(int pc)
* must be macros to avoid header recursion hell
*/
#define init_task_preempt_count(p) do { \
- task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
+ task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
} while (0)
#define init_idle_preempt_count(p, cpu) do { \
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index bea8dd8ff5e0..75e4e30677f1 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -26,7 +26,6 @@
* SOFTIRQ_MASK: 0x0000ff00
* HARDIRQ_MASK: 0x000f0000
* NMI_MASK: 0x00100000
- * PREEMPT_ACTIVE: 0x00200000
* PREEMPT_NEED_RESCHED: 0x80000000
*/
#define PREEMPT_BITS 8
@@ -53,10 +52,6 @@
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
-#define PREEMPT_ACTIVE_BITS 1
-#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
-#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
-
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
@@ -126,8 +121,7 @@
* Check whether we were atomic before we did preempt_disable():
* (used by the scheduler)
*/
-#define in_atomic_preempt_off() \
- ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET)
+#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
extern void preempt_count_add(int val);
@@ -146,18 +140,6 @@ extern void preempt_count_sub(int val);
#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)
-#define preempt_active_enter() \
-do { \
- preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
- barrier(); \
-} while (0)
-
-#define preempt_active_exit() \
-do { \
- barrier(); \
- preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
-} while (0)
-
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 56667292d1e4..9e1e06c3ce05 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -599,20 +599,26 @@ struct task_cputime_atomic {
.sum_exec_runtime = ATOMIC64_INIT(0), \
}
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
-#else
-#define PREEMPT_DISABLED PREEMPT_ENABLED
-#endif
+#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
+/*
+ * Disable preemption until the scheduler is running -- use an unconditional
+ * value so that it also works on !PREEMPT_COUNT kernels.
+ *
+ * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
+ */
+#define INIT_PREEMPT_COUNT PREEMPT_OFFSET
/*
- * Disable preemption until the scheduler is running.
- * Reset by start_kernel()->sched_init()->init_idle().
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
*
- * We include PREEMPT_ACTIVE to avoid cond_resched() from working
- * before the scheduler is active -- see should_resched().
+ * preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
*/
-#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE)
+#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
/**
* struct thread_group_cputimer - thread group interval timer counts
@@ -1142,8 +1148,6 @@ struct sched_domain_topology_level {
#endif
};
-extern struct sched_domain_topology_level *sched_domain_topology;
-
extern void set_sched_topology(struct sched_domain_topology_level *tl);
extern void wake_up_if_idle(int cpu);
@@ -1192,10 +1196,10 @@ struct load_weight {
/*
* The load_avg/util_avg accumulates an infinite geometric series.
- * 1) load_avg factors the amount of time that a sched_entity is
- * runnable on a rq into its weight. For cfs_rq, it is the aggregated
- * such weights of all runnable and blocked sched_entities.
- * 2) util_avg factors frequency scaling into the amount of time
+ * 1) load_avg factors frequency scaling into the amount of time that a
+ * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
+ * aggregated such weights of all runnable and blocked sched_entities.
+ * 2) util_avg factors frequency and cpu scaling into the amount of time
* that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
* For cfs_rq, it is the aggregated such times of all runnable and
* blocked sched_entities.
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9d303b8847df..9089a2ae913d 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -21,4 +21,9 @@ static inline int dl_task(struct task_struct *p)
return dl_prio(p->prio);
}
+static inline bool dl_time_before(u64 a, u64 b)
+{
+ return (s64)(a - b) < 0;
+}
+
#endif /* _SCHED_DEADLINE_H */
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index e6109a6cd8f6..12910cf19869 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -24,9 +24,6 @@ struct smpboot_thread_data;
* parked (cpu offline)
* @unpark: Optional unpark function, called when the thread is
* unparked (cpu online)
- * @pre_unpark: Optional unpark function, called before the thread is
- * unparked (cpu online). This is not guaranteed to be
- * called on the target cpu of the thread. Careful!
* @cpumask: Internal state. To update which threads are unparked,
* call smpboot_update_cpumask_percpu_thread().
* @selfparking: Thread is not parked by the park function.
@@ -42,7 +39,6 @@ struct smp_hotplug_thread {
void (*cleanup)(unsigned int cpu, bool online);
void (*park)(unsigned int cpu);
void (*unpark)(unsigned int cpu);
- void (*pre_unpark)(unsigned int cpu);
cpumask_var_t cpumask;
bool selfparking;
const char *thread_comm;
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 414d924318ce..0adedca24c5b 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -33,6 +33,8 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf);
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
+void stop_machine_park(int cpu);
+void stop_machine_unpark(int cpu);
#else /* CONFIG_SMP */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 539d6bc3216a..9b90c57517a9 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -104,22 +104,17 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS
-static inline long __trace_sched_switch_state(struct task_struct *p)
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
- long state = p->state;
-
-#ifdef CONFIG_PREEMPT
#ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */
+
/*
- * For all intents and purposes a preempted task is a running task.
+ * Preemption ignores task state, therefore preempted tasks are always
+ * RUNNING (we will not have dequeued if state != RUNNING).
*/
- if (preempt_count() & PREEMPT_ACTIVE)
- state = TASK_RUNNING | TASK_STATE_MAX;
-#endif /* CONFIG_PREEMPT */
-
- return state;
+ return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
}
#endif /* CREATE_TRACE_POINTS */
@@ -128,10 +123,11 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
*/
TRACE_EVENT(sched_switch,
- TP_PROTO(struct task_struct *prev,
+ TP_PROTO(bool preempt,
+ struct task_struct *prev,
struct task_struct *next),
- TP_ARGS(prev, next),
+ TP_ARGS(preempt, prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
@@ -147,7 +143,7 @@ TRACE_EVENT(sched_switch,
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
- __entry->prev_state = __trace_sched_switch_state(prev);
+ __entry->prev_state = __trace_sched_switch_state(preempt, prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;