summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 13:06:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 13:06:42 -0700
commitd479c5a1919b4e569dcd3ae9c84ed74a675d0b94 (patch)
tree99d0ad6f712b152991a5846034a184ba857beed7 /include
parentf6aee505c71bbb035dde146caf5a6abbf3ccbe47 (diff)
parent25de110d148666752dc0e0da7a0b69de31cd7098 (diff)
Merge tag 'sched-core-2020-06-02' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The changes in this cycle are: - Optimize the task wakeup CPU selection logic, to improve scalability and reduce wakeup latency spikes - PELT enhancements - CFS bandwidth handling fixes - Optimize the wakeup path by remove rq->wake_list and replacing it with ->ttwu_pending - Optimize IPI cross-calls by making flush_smp_call_function_queue() process sync callbacks first. - Misc fixes and enhancements" * tag 'sched-core-2020-06-02' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) irq_work: Define irq_work_single() on !CONFIG_IRQ_WORK too sched/headers: Split out open-coded prototypes into kernel/sched/smp.h sched: Replace rq::wake_list sched: Add rq::ttwu_pending irq_work, smp: Allow irq_work on call_single_queue smp: Optimize send_call_function_single_ipi() smp: Move irq_work_run() out of flush_smp_call_function_queue() smp: Optimize flush_smp_call_function_queue() sched: Fix smp_call_function_single_async() usage for ILB sched/core: Offload wakee task activation if it the wakee is descheduling sched/core: Optimize ttwu() spinning on p->on_cpu sched: Defend cfs and rt bandwidth quota against overflow sched/cpuacct: Fix charge cpuacct.usage_sys sched/fair: Replace zero-length array with flexible-array sched/pelt: Sync util/runnable_sum with PELT window when propagating sched/cpuacct: Use __this_cpu_add() instead of this_cpu_ptr() sched/fair: Optimize enqueue_task_fair() sched: Make scheduler_ipi inline sched: Clean up scheduler_ipi() sched/core: Simplify sched_init() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/irq_work.h9
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/sched/mm.h2
-rw-r--r--include/linux/sched/topology.h29
-rw-r--r--include/linux/smp.h24
-rw-r--r--include/linux/swait.h23
6 files changed, 62 insertions, 36 deletions
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 3b752e80c017..2735da5f839e 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -13,6 +13,8 @@
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
*/
+/* flags share CSD_FLAG_ space */
+
#define IRQ_WORK_PENDING BIT(0)
#define IRQ_WORK_BUSY BIT(1)
@@ -23,9 +25,12 @@
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
+/*
+ * structure shares layout with single_call_data_t.
+ */
struct irq_work {
- atomic_t flags;
struct llist_node llnode;
+ atomic_t flags;
void (*func)(struct irq_work *);
};
@@ -53,9 +58,11 @@ void irq_work_sync(struct irq_work *work);
void irq_work_run(void);
bool irq_work_needs_cpu(void);
+void irq_work_single(void *arg);
#else
static inline bool irq_work_needs_cpu(void) { return false; }
static inline void irq_work_run(void) { }
+static inline void irq_work_single(void *arg) { }
#endif
#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33bb7c539246..12938d438d69 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -654,6 +654,7 @@ struct task_struct {
#ifdef CONFIG_SMP
struct llist_node wake_entry;
+ unsigned int wake_entry_type;
int on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
@@ -1730,7 +1731,15 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
})
#ifdef CONFIG_SMP
-void scheduler_ipi(void);
+static __always_inline void scheduler_ipi(void)
+{
+ /*
+ * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
+ * TIF_NEED_RESCHED remotely (for the first time) will also send
+ * this IPI.
+ */
+ preempt_fold_need_resched();
+}
extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
#else
static inline void scheduler_ipi(void) { }
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index c49257a3b510..a132d875d351 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,8 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+void mmdrop(struct mm_struct *mm);
+
/*
* This has to be called after a get_task_mm()/mmget_not_zero()
* followed by taking the mmap_sem for writing before modifying the
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 95253ad792b0..fb11091129b3 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -11,21 +11,20 @@
*/
#ifdef CONFIG_SMP
-#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
-#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
-#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
-#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
-#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY 0x0040 /* Domain members have different CPU capacities */
-#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share CPU capacity */
-#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share CPU pkg resources */
-#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
-#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
-#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
-#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
-#define SD_NUMA 0x4000 /* cross-node balancing */
+#define SD_BALANCE_NEWIDLE 0x0001 /* Balance when about to become idle */
+#define SD_BALANCE_EXEC 0x0002 /* Balance on exec */
+#define SD_BALANCE_FORK 0x0004 /* Balance on fork, clone */
+#define SD_BALANCE_WAKE 0x0008 /* Balance on wakeup */
+#define SD_WAKE_AFFINE 0x0010 /* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY 0x0020 /* Domain members have different CPU capacities */
+#define SD_SHARE_CPUCAPACITY 0x0040 /* Domain members share CPU capacity */
+#define SD_SHARE_POWERDOMAIN 0x0080 /* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES 0x0100 /* Domain members share CPU pkg resources */
+#define SD_SERIALIZE 0x0200 /* Only a single load balancing instance */
+#define SD_ASYM_PACKING 0x0400 /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING 0x0800 /* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP 0x1000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x2000 /* cross-node balancing */
#ifdef CONFIG_SCHED_SMT
static inline int cpu_smt_flags(void)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 04019872c7bc..7ee202ad21a6 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -16,17 +16,39 @@
typedef void (*smp_call_func_t)(void *info);
typedef bool (*smp_cond_func_t)(int cpu, void *info);
+
+enum {
+ CSD_FLAG_LOCK = 0x01,
+
+ /* IRQ_WORK_flags */
+
+ CSD_TYPE_ASYNC = 0x00,
+ CSD_TYPE_SYNC = 0x10,
+ CSD_TYPE_IRQ_WORK = 0x20,
+ CSD_TYPE_TTWU = 0x30,
+ CSD_FLAG_TYPE_MASK = 0xF0,
+};
+
+/*
+ * structure shares (partial) layout with struct irq_work
+ */
struct __call_single_data {
struct llist_node llist;
+ unsigned int flags;
smp_call_func_t func;
void *info;
- unsigned int flags;
};
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
typedef struct __call_single_data call_single_data_t
__aligned(sizeof(struct __call_single_data));
+/*
+ * Enqueue a llist_node on the call_single_queue; be very careful, read
+ * flush_smp_call_function_queue() in detail.
+ */
+extern void __smp_call_single_queue(int cpu, struct llist_node *node);
+
/* total number of cpus in this system (may exceed NR_CPUS) */
extern unsigned int total_cpus;
diff --git a/include/linux/swait.h b/include/linux/swait.h
index 73e06e9986d4..6a8c22b8c2a5 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -9,23 +9,10 @@
#include <asm/current.h>
/*
- * BROKEN wait-queues.
- *
- * These "simple" wait-queues are broken garbage, and should never be
- * used. The comments below claim that they are "similar" to regular
- * wait-queues, but the semantics are actually completely different, and
- * every single user we have ever had has been buggy (or pointless).
- *
- * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
- * "wake_up()" does, and has led to problems. In other cases, it has
- * been fine, because there's only ever one waiter (kvm), but in that
- * case gthe whole "simple" wait-queue is just pointless to begin with,
- * since there is no "queue". Use "wake_up_process()" with a direct
- * pointer instead.
- *
- * While these are very similar to regular wait queues (wait.h) the most
- * important difference is that the simple waitqueue allows for deterministic
- * behaviour -- IOW it has strictly bounded IRQ and lock hold times.
+ * Simple waitqueues are semantically very different to regular wait queues
+ * (wait.h). The most important difference is that the simple waitqueue allows
+ * for deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
*
* Mainly, this is accomplished by two things. Firstly not allowing swake_up_all
* from IRQ disabled, and dropping the lock upon every wakeup, giving a higher
@@ -39,7 +26,7 @@
* sleeper state.
*
* - the !exclusive mode; because that leads to O(n) wakeups, everything is
- * exclusive.
+ * exclusive. As such swake_up_one will only ever awake _one_ waiter.
*
* - custom wake callback functions; because you cannot give any guarantees
* about random code. This also allows swait to be used in RT, such that