summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/bpf/cgroup.c4
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/devmap.c33
-rw-r--r--kernel/bpf/syscall.c38
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--kernel/cgroup/cpuset.c3
-rw-r--r--kernel/cpu.c27
-rw-r--r--kernel/fork.c33
-rw-r--r--kernel/irq/irqdomain.c2
-rw-r--r--kernel/sched/core.c23
-rw-r--r--kernel/sched/deadline.c40
-rw-r--r--kernel/sched/fair.c15
-rw-r--r--kernel/sched/idle.c9
-rw-r--r--kernel/sched/rt.c37
-rw-r--r--kernel/sched/sched.h30
-rw-r--r--kernel/sched/stop_task.c18
-rw-r--r--kernel/sched/topology.c11
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/stacktrace.c6
-rw-r--r--kernel/time/vsyscall.c9
21 files changed, 255 insertions, 94 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 1f31c2f1e6fc..4508d5e0cf69 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- inode_unlock(d_backing_inode(parent->dentry));
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d->d_sb->s_dev;
watch->ino = d_backing_inode(d)->i_ino;
}
+ inode_unlock(d_backing_inode(parent->dentry));
dput(d);
return 0;
}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ddd8addcdb5c..a3eaf08e7dd3 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1311,12 +1311,12 @@ static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
return false;
switch (off) {
- case offsetof(struct bpf_sysctl, write):
+ case bpf_ctx_range(struct bpf_sysctl, write):
if (type != BPF_READ)
return false;
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
- case offsetof(struct bpf_sysctl, file_pos):
+ case bpf_ctx_range(struct bpf_sysctl, file_pos):
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 66088a9e9b9e..ef0e1e3e66f4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -502,7 +502,7 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
}
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
{
int i;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d27f3b60ff6d..3867864cdc2f 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -128,7 +128,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
- cost += sizeof(struct hlist_head) * dtab->n_buckets;
+ cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
}
/* if map size is larger than memlock limit, reject it */
@@ -719,6 +719,32 @@ const struct bpf_map_ops dev_map_hash_ops = {
.map_check_btf = map_check_no_btf,
};
+static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
+ struct net_device *netdev)
+{
+ unsigned long flags;
+ u32 i;
+
+ spin_lock_irqsave(&dtab->index_lock, flags);
+ for (i = 0; i < dtab->n_buckets; i++) {
+ struct bpf_dtab_netdev *dev;
+ struct hlist_head *head;
+ struct hlist_node *next;
+
+ head = dev_map_index_hash(dtab, i);
+
+ hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+ if (netdev != dev->dev)
+ continue;
+
+ dtab->items--;
+ hlist_del_rcu(&dev->index_hlist);
+ call_rcu(&dev->rcu, __dev_map_entry_free);
+ }
+ }
+ spin_unlock_irqrestore(&dtab->index_lock, flags);
+}
+
static int dev_map_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
@@ -735,6 +761,11 @@ static int dev_map_notification(struct notifier_block *notifier,
*/
rcu_read_lock();
list_for_each_entry_rcu(dtab, &dev_map_list, list) {
+ if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ dev_map_hash_remove_netdev(dtab, netdev);
+ continue;
+ }
+
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev, *odev;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 82eabd4e38ad..ace1cfaa24b6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -126,7 +126,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-void *bpf_map_area_alloc(size_t size, int numa_node)
+void *bpf_map_area_alloc(u64 size, int numa_node)
{
/* We really just want to fail instead of triggering OOM killer
* under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
@@ -141,6 +141,9 @@ void *bpf_map_area_alloc(size_t size, int numa_node)
const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
+ if (size >= SIZE_MAX)
+ return NULL;
+
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
numa_node);
@@ -197,7 +200,7 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
atomic_long_sub(pages, &user->locked_vm);
}
-int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
+int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
{
u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
struct user_struct *user;
@@ -1326,24 +1329,32 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
{
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
+ kvfree(aux->func_info);
free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
}
+static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
+{
+ bpf_prog_kallsyms_del_all(prog);
+ btf_put(prog->aux->btf);
+ bpf_prog_free_linfo(prog);
+
+ if (deferred)
+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ else
+ __bpf_prog_put_rcu(&prog->aux->rcu);
+}
+
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
- bpf_prog_kallsyms_del_all(prog);
- btf_put(prog->aux->btf);
- kvfree(prog->aux->func_info);
- bpf_prog_free_linfo(prog);
-
- call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ __bpf_prog_put_noref(prog, true);
}
}
@@ -1741,11 +1752,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
return err;
free_used_maps:
- bpf_prog_free_linfo(prog);
- kvfree(prog->aux->func_info);
- btf_put(prog->aux->btf);
- bpf_prog_kallsyms_del_subprogs(prog);
- free_used_maps(prog->aux);
+ /* In case we have subprogs, we need to wait for a grace
+ * period before we can tear down JIT memory since symbols
+ * are already exposed under kallsyms.
+ */
+ __bpf_prog_put_noref(prog, prog->aux->func_cnt);
+ return err;
free_prog:
bpf_prog_uncharge_memlock(prog);
free_prog_sec:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 080561bb8a4b..ef4242e5d4bc 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc)
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root);
- fc->root = nsdentry;
if (IS_ERR(nsdentry)) {
- ret = PTR_ERR(nsdentry);
deactivate_locked_super(sb);
+ ret = PTR_ERR(nsdentry);
+ nsdentry = NULL;
}
+ fc->root = nsdentry;
}
if (!ctx->kfc.new_sb_created)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index c52bc91f882b..c87ee6412b36 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -798,7 +798,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
continue;
- if (is_sched_load_balance(cp))
+ if (is_sched_load_balance(cp) &&
+ !cpumask_empty(cp->effective_cpus))
csa[csn++] = cp;
/* skip @cp's subtree if not a partition root */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fc28e17940e0..e2cad3ee2ead 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2373,7 +2373,18 @@ void __init boot_cpu_hotplug_init(void)
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
}
-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+ CPU_MITIGATIONS_OFF,
+ CPU_MITIGATIONS_AUTO,
+ CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+static enum cpu_mitigations cpu_mitigations __ro_after_init =
+ CPU_MITIGATIONS_AUTO;
static int __init mitigations_parse_cmdline(char *arg)
{
@@ -2390,3 +2401,17 @@ static int __init mitigations_parse_cmdline(char *arg)
return 0;
}
early_param("mitigations", mitigations_parse_cmdline);
+
+/* mitigations=off */
+bool cpu_mitigations_off(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_off);
+
+/* mitigations=auto,nosmt */
+bool cpu_mitigations_auto_nosmt(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
diff --git a/kernel/fork.c b/kernel/fork.c
index bcdf53125210..55af6931c6ec 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2561,7 +2561,35 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
return 0;
}
-static bool clone3_args_valid(const struct kernel_clone_args *kargs)
+/**
+ * clone3_stack_valid - check and prepare stack
+ * @kargs: kernel clone args
+ *
+ * Verify that the stack arguments userspace gave us are sane.
+ * In addition, set the stack direction for userspace since it's easy for us to
+ * determine.
+ */
+static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
+{
+ if (kargs->stack == 0) {
+ if (kargs->stack_size > 0)
+ return false;
+ } else {
+ if (kargs->stack_size == 0)
+ return false;
+
+ if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
+ return false;
+
+#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
+ kargs->stack += kargs->stack_size;
+#endif
+ }
+
+ return true;
+}
+
+static bool clone3_args_valid(struct kernel_clone_args *kargs)
{
/*
* All lower bits of the flag word are taken.
@@ -2581,6 +2609,9 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs)
kargs->exit_signal)
return false;
+ if (!clone3_stack_valid(kargs))
+ return false;
+
return true;
}
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 132672b74e4b..dd822fd8a7d5 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
* @type: Type of irqchip_fwnode. See linux/irqdomain.h
* @name: Optional user provided domain name
* @id: Optional user provided id if name != NULL
- * @data: Optional user-provided data
+ * @pa: Optional user-provided physical address
*
* Allocate a struct irqchip_fwid, and return a poiner to the embedded
* fwnode_handle (or NULL on failure).
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..0f2eb3629070 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1073,6 +1073,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
task_rq_unlock(rq, p, &rf);
}
+#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline void
uclamp_update_active_tasks(struct cgroup_subsys_state *css,
unsigned int clamps)
@@ -1091,7 +1092,6 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css,
css_task_iter_end(&it);
}
-#ifdef CONFIG_UCLAMP_TASK_GROUP
static void cpu_util_update_eff(struct cgroup_subsys_state *css);
static void uclamp_update_root_tg(void)
{
@@ -3929,13 +3929,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
restart:
+#ifdef CONFIG_SMP
/*
- * Ensure that we put DL/RT tasks before the pick loop, such that they
- * can PULL higher prio tasks when we lower the RQ 'priority'.
+ * We must do the balancing pass before put_next_task(), such
+ * that when we release the rq->lock the task is in the same
+ * state as before we took rq->lock.
+ *
+ * We can terminate the balance pass as soon as we know there is
+ * a runnable task of @class priority or higher.
*/
- prev->sched_class->put_prev_task(rq, prev, rf);
- if (!rq->nr_running)
- newidle_balance(rq, rf);
+ for_class_range(class, prev->sched_class, &idle_sched_class) {
+ if (class->balance(rq, prev, rf))
+ break;
+ }
+#endif
+
+ put_prev_task(rq, prev);
for_each_class(class) {
p = class->pick_next_task(rq, NULL, NULL);
@@ -6201,7 +6210,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
for_each_class(class) {
next = class->pick_next_task(rq, NULL, NULL);
if (next) {
- next->sched_class->put_prev_task(rq, next, NULL);
+ next->sched_class->put_prev_task(rq, next);
return next;
}
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 2dc48720f189..a8a08030a8f7 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1691,6 +1691,22 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
+static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+ if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet started the picking loop.
+ */
+ rq_unpin_lock(rq, rf);
+ pull_dl_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq);
+}
#endif /* CONFIG_SMP */
/*
@@ -1758,45 +1774,28 @@ static struct task_struct *
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct sched_dl_entity *dl_se;
+ struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
- struct dl_rq *dl_rq;
WARN_ON_ONCE(prev || rf);
- dl_rq = &rq->dl;
-
- if (unlikely(!dl_rq->dl_nr_running))
+ if (!sched_dl_runnable(rq))
return NULL;
dl_se = pick_next_dl_entity(rq, dl_rq);
BUG_ON(!dl_se);
-
p = dl_task_of(dl_se);
-
set_next_task_dl(rq, p);
-
return p;
}
-static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
{
update_curr_dl(rq);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
-
- if (rf && !on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet started the picking loop.
- */
- rq_unpin_lock(rq, rf);
- pull_dl_task(rq);
- rq_repin_lock(rq, rf);
- }
}
/*
@@ -2442,6 +2441,7 @@ const struct sched_class dl_sched_class = {
.set_next_task = set_next_task_dl,
#ifdef CONFIG_SMP
+ .balance = balance_dl,
.select_task_rq = select_task_rq_dl,
.migrate_task_rq = migrate_task_rq_dl,
.set_cpus_allowed = set_cpus_allowed_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 682a754ea3e1..22a2fed29054 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6570,6 +6570,15 @@ static void task_dead_fair(struct task_struct *p)
{
remove_entity_load_avg(&p->se);
}
+
+static int
+balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ if (rq->nr_running)
+ return 1;
+
+ return newidle_balance(rq, rf) != 0;
+}
#endif /* CONFIG_SMP */
static unsigned long wakeup_gran(struct sched_entity *se)
@@ -6746,7 +6755,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
int new_tasks;
again:
- if (!cfs_rq->nr_running)
+ if (!sched_fair_runnable(rq))
goto idle;
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6884,7 +6893,7 @@ idle:
/*
* Account for a descheduled task:
*/
-static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
{
struct sched_entity *se = &prev->se;
struct cfs_rq *cfs_rq;
@@ -10414,11 +10423,11 @@ const struct sched_class fair_sched_class = {
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
-
.put_prev_task = put_prev_task_fair,
.set_next_task = set_next_task_fair,
#ifdef CONFIG_SMP
+ .balance = balance_fair,
.select_task_rq = select_task_rq_fair,
.migrate_task_rq = migrate_task_rq_fair,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8dad5aa600ea..f65ef1e2f204 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -365,6 +365,12 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
{
return task_cpu(p); /* IDLE tasks as never migrated */
}
+
+static int
+balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ return WARN_ON_ONCE(1);
+}
#endif
/*
@@ -375,7 +381,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
resched_curr(rq);
}
-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
@@ -460,6 +466,7 @@ const struct sched_class idle_sched_class = {
.set_next_task = set_next_task_idle,
#ifdef CONFIG_SMP
+ .balance = balance_idle,
.select_task_rq = select_task_rq_idle,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ebaa4e619684..9b8adc01be3d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1469,6 +1469,22 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
+static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+ if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet started the picking loop.
+ */
+ rq_unpin_lock(rq, rf);
+ pull_rt_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
+}
#endif /* CONFIG_SMP */
/*
@@ -1552,21 +1568,18 @@ static struct task_struct *
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct task_struct *p;
- struct rt_rq *rt_rq = &rq->rt;
WARN_ON_ONCE(prev || rf);
- if (!rt_rq->rt_queued)
+ if (!sched_rt_runnable(rq))
return NULL;
p = _pick_next_task_rt(rq);
-
set_next_task_rt(rq, p);
-
return p;
}
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);
@@ -1578,18 +1591,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_fla
*/
if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
-
- if (rf && !on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet started the picking loop.
- */
- rq_unpin_lock(rq, rf);
- pull_rt_task(rq);
- rq_repin_lock(rq, rf);
- }
}
#ifdef CONFIG_SMP
@@ -2366,8 +2367,8 @@ const struct sched_class rt_sched_class = {
.set_next_task = set_next_task_rt,
#ifdef CONFIG_SMP
+ .balance = balance_rt,
.select_task_rq = select_task_rq_rt,
-
.set_cpus_allowed = set_cpus_allowed_common,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..c8870c5bd7df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1727,10 +1727,11 @@ struct sched_class {
struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev,
struct rq_flags *rf);
- void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
+ void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
+ int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
@@ -1773,7 +1774,7 @@ struct sched_class {
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
WARN_ON_ONCE(rq->curr != prev);
- prev->sched_class->put_prev_task(rq, prev, NULL);
+ prev->sched_class->put_prev_task(rq, prev);
}
static inline void set_next_task(struct rq *rq, struct task_struct *next)
@@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
#else
#define sched_class_highest (&dl_sched_class)
#endif
+
+#define for_class_range(class, _from, _to) \
+ for (class = (_from); class != (_to); class = class->next)
+
#define for_each_class(class) \
- for (class = sched_class_highest; class; class = class->next)
+ for_class_range(class, sched_class_highest, NULL)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
@@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
+static inline bool sched_stop_runnable(struct rq *rq)
+{
+ return rq->stop && task_on_rq_queued(rq->stop);
+}
+
+static inline bool sched_dl_runnable(struct rq *rq)
+{
+ return rq->dl.dl_nr_running > 0;
+}
+
+static inline bool sched_rt_runnable(struct rq *rq)
+{
+ return rq->rt.rt_queued > 0;
+}
+
+static inline bool sched_fair_runnable(struct rq *rq)
+{
+ return rq->cfs.nr_running > 0;
+}
#ifdef CONFIG_SMP
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7e1cee4e65b2..c0640739e05e 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -15,6 +15,12 @@ select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
{
return task_cpu(p); /* stop tasks as never migrate */
}
+
+static int
+balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ return sched_stop_runnable(rq);
+}
#endif /* CONFIG_SMP */
static void
@@ -31,16 +37,13 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
static struct task_struct *
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
- struct task_struct *stop = rq->stop;
-
WARN_ON_ONCE(prev || rf);
- if (!stop || !task_on_rq_queued(stop))
+ if (!sched_stop_runnable(rq))
return NULL;
- set_next_task_stop(rq, stop);
-
- return stop;
+ set_next_task_stop(rq, rq->stop);
+ return rq->stop;
}
static void
@@ -60,7 +63,7 @@ static void yield_task_stop(struct rq *rq)
BUG(); /* the stop task should never yield, its pointless. */
}
-static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
struct task_struct *curr = rq->curr;
u64 delta_exec;
@@ -129,6 +132,7 @@ const struct sched_class stop_sched_class = {
.set_next_task = set_next_task_stop,
#ifdef CONFIG_SMP
+ .balance = balance_stop,
.select_task_rq = select_task_rq_stop,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b5667a273bf6..49b835f1305f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1948,7 +1948,7 @@ next_level:
static int
build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
{
- enum s_alloc alloc_state;
+ enum s_alloc alloc_state = sa_none;
struct sched_domain *sd;
struct s_data d;
struct rq *rq = NULL;
@@ -1956,6 +1956,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
struct sched_domain_topology_level *tl_asym;
bool has_asym = false;
+ if (WARN_ON(cpumask_empty(cpu_map)))
+ goto error;
+
alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
if (alloc_state != sa_rootdomain)
goto error;
@@ -2026,7 +2029,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
rcu_read_unlock();
if (has_asym)
- static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
+ static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
@@ -2121,8 +2124,12 @@ int sched_init_domains(const struct cpumask *cpu_map)
*/
static void detach_destroy_domains(const struct cpumask *cpu_map)
{
+ unsigned int cpu = cpumask_any(cpu_map);
int i;
+ if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
+ static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
+
rcu_read_lock();
for_each_cpu(i, cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);
diff --git a/kernel/signal.c b/kernel/signal.c
index c4da1ef56fdf..bcd46f547db3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2205,8 +2205,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
preempt_disable();
read_unlock(&tasklist_lock);
- preempt_enable_no_resched();
cgroup_enter_frozen();
+ preempt_enable_no_resched();
freezable_schedule();
cgroup_leave_frozen(true);
} else {
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 6d1f68b7e528..c9ea7eb2cb1a 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -141,7 +141,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
struct stacktrace_cookie c = {
.store = store,
.size = size,
- .skip = skipnr + 1,
+ /* skip this function if they are tracing us */
+ .skip = skipnr + !!(current == tsk),
};
if (!try_get_task_stack(tsk))
@@ -298,7 +299,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *task,
struct stack_trace trace = {
.entries = store,
.max_entries = size,
- .skip = skipnr + 1,
+ /* skip this function if they are tracing us */
+ .skip = skipnr + !!(current == task),
};
save_stack_trace_tsk(task, &trace);
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 4bc37ac3bb05..5ee0f7709410 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -110,8 +110,7 @@ void update_vsyscall(struct timekeeper *tk)
nsec = nsec + tk->wall_to_monotonic.tv_nsec;
vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);
- if (__arch_use_vsyscall(vdata))
- update_vdso_data(vdata, tk);
+ update_vdso_data(vdata, tk);
__arch_update_vsyscall(vdata, tk);
@@ -124,10 +123,8 @@ void update_vsyscall_tz(void)
{
struct vdso_data *vdata = __arch_get_k_vdso_data();
- if (__arch_use_vsyscall(vdata)) {
- vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
- vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
- }
+ vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+ vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
__arch_sync_vdso_data(vdata);
}