summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup-internal.h7
-rw-r--r--kernel/cgroup/cgroup-v1.c20
-rw-r--r--kernel/cgroup/cgroup.c40
-rw-r--r--kernel/cgroup/cpuset.c11
-rw-r--r--kernel/cgroup/namespace.c2
-rw-r--r--kernel/irq/affinity.c2
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/cpufreq_schedutil.c82
-rw-r--r--kernel/sched/cputime.c27
-rw-r--r--kernel/sched/sched.h9
-rw-r--r--kernel/time/tick-sched.c12
-rw-r--r--kernel/trace/blktrace.c35
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/ring_buffer.c16
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/workqueue.c5
16 files changed, 177 insertions, 105 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 9203bfb05603..00f4d6bf048f 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -5,6 +5,7 @@
#include <linux/kernfs.h>
#include <linux/workqueue.h>
#include <linux/list.h>
+#include <linux/refcount.h>
/*
* A cgroup can be associated with multiple css_sets as different tasks may
@@ -134,7 +135,7 @@ static inline void put_css_set(struct css_set *cset)
* can see it. Similar to atomic_dec_and_lock(), but for an
* rwlock
*/
- if (atomic_add_unless(&cset->refcount, -1, 1))
+ if (refcount_dec_not_one(&cset->refcount))
return;
spin_lock_irqsave(&css_set_lock, flags);
@@ -147,7 +148,7 @@ static inline void put_css_set(struct css_set *cset)
*/
static inline void get_css_set(struct css_set *cset)
{
- atomic_inc(&cset->refcount);
+ refcount_inc(&cset->refcount);
}
bool cgroup_ssid_enabled(int ssid);
@@ -163,7 +164,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
void cgroup_free_root(struct cgroup_root *root);
void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
-int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_root *root, unsigned long magic,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 1dc22f6b49f5..85d75152402d 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -346,7 +346,7 @@ static int cgroup_task_count(const struct cgroup *cgrp)
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
- count += atomic_read(&link->cset->refcount);
+ count += refcount_read(&link->cset->refcount);
spin_unlock_irq(&css_set_lock);
return count;
}
@@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
struct cgroup_subsys *ss;
struct dentry *dentry;
int i, ret;
+ bool new_root = false;
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
@@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
ret = -ENOMEM;
goto out_unlock;
}
+ new_root = true;
init_cgroup_root(root, &opts);
- ret = cgroup_setup_root(root, opts.subsys_mask);
+ ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
if (ret)
cgroup_free_root(root);
@@ -1201,6 +1203,18 @@ out_free:
CGROUP_SUPER_MAGIC, ns);
/*
+ * There's a race window after we release cgroup_mutex and before
+ * allocating a superblock. Make sure a concurrent process won't
+ * be able to re-use the root during this window by delaying the
+ * initialization of root refcnt.
+ */
+ if (new_root) {
+ mutex_lock(&cgroup_mutex);
+ percpu_ref_reinit(&root->cgrp.self.refcnt);
+ mutex_unlock(&cgroup_mutex);
+ }
+
+ /*
* If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref.
*/
@@ -1286,7 +1300,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
u64 count;
rcu_read_lock();
- count = atomic_read(&task_css_set(current)->refcount);
+ count = refcount_read(&task_css_set(current)->refcount);
rcu_read_unlock();
return count;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 687f5e0194ef..c3c9a0e1b3c9 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -189,7 +189,7 @@ static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = {
- .count = { .counter = 2, },
+ .count = REFCOUNT_INIT(2),
.user_ns = &init_user_ns,
.ns.ops = &cgroupns_operations,
.ns.inum = PROC_CGROUP_INIT_INO,
@@ -436,7 +436,12 @@ out_unlock:
return css;
}
-static void cgroup_get(struct cgroup *cgrp)
+static void __maybe_unused cgroup_get(struct cgroup *cgrp)
+{
+ css_get(&cgrp->self);
+}
+
+static void cgroup_get_live(struct cgroup *cgrp)
{
WARN_ON_ONCE(cgroup_is_dead(cgrp));
css_get(&cgrp->self);
@@ -554,7 +559,7 @@ EXPORT_SYMBOL_GPL(of_css);
* haven't been created.
*/
struct css_set init_css_set = {
- .refcount = ATOMIC_INIT(1),
+ .refcount = REFCOUNT_INIT(1),
.tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
@@ -724,7 +729,7 @@ void put_css_set_locked(struct css_set *cset)
lockdep_assert_held(&css_set_lock);
- if (!atomic_dec_and_test(&cset->refcount))
+ if (!refcount_dec_and_test(&cset->refcount))
return;
/* This css_set is dead. unlink it and release cgroup and css refs */
@@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
list_add_tail(&link->cgrp_link, &cset->cgrp_links);
if (cgroup_parent(cgrp))
- cgroup_get(cgrp);
+ cgroup_get_live(cgrp);
}
/**
@@ -977,7 +982,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
return NULL;
}
- atomic_set(&cset->refcount, 1);
+ refcount_set(&cset->refcount, 1);
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks);
INIT_LIST_HEAD(&cset->task_iters);
@@ -1640,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
}
-int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
+int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp;
@@ -1656,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
root_cgrp->id = ret;
root_cgrp->ancestor_ids[0] = ret;
- ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
- GFP_KERNEL);
+ ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
+ ref_flags, GFP_KERNEL);
if (ret)
goto out;
@@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
return ERR_PTR(-EINVAL);
}
cgrp_dfl_visible = true;
- cgroup_get(&cgrp_dfl_root.cgrp);
+ cgroup_get_live(&cgrp_dfl_root.cgrp);
dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
CGROUP2_SUPER_MAGIC, ns);
@@ -2576,7 +2581,7 @@ restart:
if (!css || !percpu_ref_is_dying(&css->refcnt))
continue;
- cgroup_get(dsct);
+ cgroup_get_live(dsct);
prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
@@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
{
lockdep_assert_held(&cgroup_mutex);
- cgroup_get(cgrp);
+ cgroup_get_live(cgrp);
memset(css, 0, sizeof(*css));
css->cgroup = cgrp;
@@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
/* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
atomic_inc(&root->nr_cgrps);
- cgroup_get(parent);
+ cgroup_get_live(parent);
/*
* @cgrp is now fully operational. If something fails after this
@@ -4513,7 +4518,7 @@ int __init cgroup_init(void)
hash_add(css_set_table, &init_css_set.hlist,
css_set_hash(init_css_set.subsys));
- BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
+ BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
mutex_unlock(&cgroup_mutex);
@@ -4947,7 +4952,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv;
- cgroup_get(cgrp);
+ cgroup_get_live(cgrp);
} else {
cgrp = ERR_PTR(-ENOTDIR);
}
@@ -5027,6 +5032,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
/* Socket clone path */
if (skcd->val) {
+ /*
+ * We might be cloning a socket which is left in an empty
+ * cgroup and the cgroup might have already been rmdir'd.
+ * Don't use cgroup_get_live().
+ */
cgroup_get(sock_cgroup_ptr(skcd));
return;
}
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0f41292be0fb..f6501f4f6040 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2121,10 +2121,8 @@ int __init cpuset_init(void)
{
int err = 0;
- if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
- BUG();
- if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
- BUG();
+ BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
@@ -2139,8 +2137,7 @@ int __init cpuset_init(void)
if (err < 0)
return err;
- if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
- BUG();
+ BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
return 0;
}
@@ -2354,7 +2351,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
rebuild_sched_domains();
}
-void cpuset_update_active_cpus(bool cpu_online)
+void cpuset_update_active_cpus(void)
{
/*
* We're inside cpu hotplug critical region which usually nests
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index 96d38dab6fb2..66129eb4371d 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -31,7 +31,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
kfree(new_ns);
return ERR_PTR(ret);
}
- atomic_set(&new_ns->count, 1);
+ refcount_set(&new_ns->count, 1);
new_ns->ns.ops = &cgroupns_operations;
return new_ns;
}
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index d052947fe785..e2d356dd7581 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -98,7 +98,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
int ncpus, v, vecs_to_assign, vecs_per_node;
/* Spread the vectors per node */
- vecs_per_node = (affv - curvec) / nodes;
+ vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b31fc05a0f1..430b0460db89 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5732,7 +5732,7 @@ static void cpuset_cpu_active(void)
* cpuset configurations.
*/
}
- cpuset_update_active_cpus(true);
+ cpuset_update_active_cpus();
}
static int cpuset_cpu_inactive(unsigned int cpu)
@@ -5755,7 +5755,7 @@ static int cpuset_cpu_inactive(unsigned int cpu)
if (overflow)
return -EBUSY;
- cpuset_update_active_cpus(false);
+ cpuset_update_active_cpus();
} else {
num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 54c577578da6..76877a62b5fa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -61,6 +61,11 @@ struct sugov_cpu {
unsigned long util;
unsigned long max;
unsigned int flags;
+
+ /* The field below is for single-CPU policies only. */
+#ifdef CONFIG_NO_HZ_COMMON
+ unsigned long saved_idle_calls;
+#endif
};
static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -93,22 +98,23 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
{
struct cpufreq_policy *policy = sg_policy->policy;
+ if (sg_policy->next_freq == next_freq)
+ return;
+
+ if (sg_policy->next_freq > next_freq)
+ next_freq = (sg_policy->next_freq + next_freq) >> 1;
+
+ sg_policy->next_freq = next_freq;
sg_policy->last_freq_update_time = time;
if (policy->fast_switch_enabled) {
- if (sg_policy->next_freq == next_freq) {
- trace_cpu_frequency(policy->cur, smp_processor_id());
- return;
- }
- sg_policy->next_freq = next_freq;
next_freq = cpufreq_driver_fast_switch(policy, next_freq);
if (next_freq == CPUFREQ_ENTRY_INVALID)
return;
policy->cur = next_freq;
trace_cpu_frequency(next_freq, smp_processor_id());
- } else if (sg_policy->next_freq != next_freq) {
- sg_policy->next_freq = next_freq;
+ } else {
sg_policy->work_in_progress = true;
irq_work_queue(&sg_policy->irq_work);
}
@@ -192,6 +198,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
sg_cpu->iowait_boost >>= 1;
}
+#ifdef CONFIG_NO_HZ_COMMON
+static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
+{
+ unsigned long idle_calls = tick_nohz_get_idle_calls();
+ bool ret = idle_calls == sg_cpu->saved_idle_calls;
+
+ sg_cpu->saved_idle_calls = idle_calls;
+ return ret;
+}
+#else
+static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
+#endif /* CONFIG_NO_HZ_COMMON */
+
static void sugov_update_single(struct update_util_data *hook, u64 time,
unsigned int flags)
{
@@ -200,6 +219,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
struct cpufreq_policy *policy = sg_policy->policy;
unsigned long util, max;
unsigned int next_f;
+ bool busy;
sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
@@ -207,40 +227,37 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
if (!sugov_should_update_freq(sg_policy, time))
return;
+ busy = sugov_cpu_is_busy(sg_cpu);
+
if (flags & SCHED_CPUFREQ_RT_DL) {
next_f = policy->cpuinfo.max_freq;
} else {
sugov_get_util(&util, &max);
sugov_iowait_boost(sg_cpu, &util, &max);
next_f = get_next_freq(sg_policy, util, max);
+ /*
+ * Do not reduce the frequency if the CPU has not been idle
+ * recently, as the reduction is likely to be premature then.
+ */
+ if (busy && next_f < sg_policy->next_freq)
+ next_f = sg_policy->next_freq;
}
sugov_update_commit(sg_policy, time, next_f);
}
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
- unsigned long util, unsigned long max,
- unsigned int flags)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
- unsigned int max_f = policy->cpuinfo.max_freq;
u64 last_freq_update_time = sg_policy->last_freq_update_time;
+ unsigned long util = 0, max = 1;
unsigned int j;
- if (flags & SCHED_CPUFREQ_RT_DL)
- return max_f;
-
- sugov_iowait_boost(sg_cpu, &util, &max);
-
for_each_cpu(j, policy->cpus) {
- struct sugov_cpu *j_sg_cpu;
+ struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
unsigned long j_util, j_max;
s64 delta_ns;
- if (j == smp_processor_id())
- continue;
-
- j_sg_cpu = &per_cpu(sugov_cpu, j);
/*
* If the CPU utilization was last updated before the previous
* frequency update and the time elapsed between the last update
@@ -254,7 +271,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
continue;
}
if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
- return max_f;
+ return policy->cpuinfo.max_freq;
j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
@@ -289,7 +306,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
sg_cpu->last_update = time;
if (sugov_should_update_freq(sg_policy, time)) {
- next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
+ if (flags & SCHED_CPUFREQ_RT_DL)
+ next_f = sg_policy->policy->cpuinfo.max_freq;
+ else
+ next_f = sugov_next_freq_shared(sg_cpu);
+
sugov_update_commit(sg_policy, time, next_f);
}
@@ -473,7 +494,6 @@ static int sugov_init(struct cpufreq_policy *policy)
{
struct sugov_policy *sg_policy;
struct sugov_tunables *tunables;
- unsigned int lat;
int ret = 0;
/* State should be equivalent to EXIT */
@@ -512,10 +532,16 @@ static int sugov_init(struct cpufreq_policy *policy)
goto stop_kthread;
}
- tunables->rate_limit_us = LATENCY_MULTIPLIER;
- lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
- if (lat)
- tunables->rate_limit_us *= lat;
+ if (policy->transition_delay_us) {
+ tunables->rate_limit_us = policy->transition_delay_us;
+ } else {
+ unsigned int lat;
+
+ tunables->rate_limit_us = LATENCY_MULTIPLIER;
+ lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+ if (lat)
+ tunables->rate_limit_us *= lat;
+ }
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f3778e2b46c8..aea3135c5d90 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0;
}
+static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
+ enum cpu_usage_stat idx)
+{
+ u64 *cpustat = kcpustat_this_cpu->cpustat;
+
+ u64_stats_update_begin(&irqtime->sync);
+ cpustat[idx] += delta;
+ irqtime->total += delta;
+ irqtime->tick_delta += delta;
+ u64_stats_update_end(&irqtime->sync);
+}
+
/*
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void)
void irqtime_account_irq(struct task_struct *curr)
{
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
- u64 *cpustat = kcpustat_this_cpu->cpustat;
s64 delta;
int cpu;
@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr)
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
irqtime->irq_start_time += delta;
- u64_stats_update_begin(&irqtime->sync);
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
- if (hardirq_count()) {
- cpustat[CPUTIME_IRQ] += delta;
- irqtime->tick_delta += delta;
- } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) {
- cpustat[CPUTIME_SOFTIRQ] += delta;
- irqtime->tick_delta += delta;
- }
-
- u64_stats_update_end(&irqtime->sync);
+ if (hardirq_count())
+ irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
+ else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+ irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5cbf92214ad8..767aab3505a8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
+ u64 total;
u64 tick_delta;
u64 irq_start_time;
struct u64_stats_sync sync;
@@ -1876,16 +1877,20 @@ struct irqtime {
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
+/*
+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
+ * and never move forward.
+ */
static inline u64 irq_time_read(int cpu)
{
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
- u64 *cpustat = kcpustat_cpu(cpu).cpustat;
unsigned int seq;
u64 total;
do {
seq = __u64_stats_fetch_begin(&irqtime->sync);
- total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ];
+ total = irqtime->total;
} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
return total;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7fe53be86077..64c97fc130c4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -993,6 +993,18 @@ ktime_t tick_nohz_get_sleep_length(void)
return ts->sleep_length;
}
+/**
+ * tick_nohz_get_idle_calls - return the current idle calls counter value
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls(void)
+{
+ struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+ return ts->idle_calls;
+}
+
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b2058a7f94bd..bd8ae8d5ae9c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -690,8 +690,8 @@ void blk_trace_shutdown(struct request_queue *q)
/**
* blk_add_trace_rq - Add a trace for a request oriented action
- * @q: queue the io is for
* @rq: the source request
+ * @error: return status to log
* @nr_bytes: number of completed bytes
* @what: the action
*
@@ -699,10 +699,10 @@ void blk_trace_shutdown(struct request_queue *q)
* Records an action against a request. Will log the bio offset + size.
*
**/
-static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+static void blk_add_trace_rq(struct request *rq, int error,
unsigned int nr_bytes, u32 what)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt = rq->q->blk_trace;
if (likely(!bt))
return;
@@ -713,40 +713,32 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
- rq->cmd_flags, what, rq->errors, 0, NULL);
-}
-
-static void blk_add_trace_rq_abort(void *ignore,
- struct request_queue *q, struct request *rq)
-{
- blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT);
+ rq->cmd_flags, what, error, 0, NULL);
}
static void blk_add_trace_rq_insert(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT);
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT);
}
static void blk_add_trace_rq_issue(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE);
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE);
}
static void blk_add_trace_rq_requeue(void *ignore,
struct request_queue *q,
struct request *rq)
{
- blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE);
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE);
}
-static void blk_add_trace_rq_complete(void *ignore,
- struct request_queue *q,
- struct request *rq,
- unsigned int nr_bytes)
+static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
+ int error, unsigned int nr_bytes)
{
- blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE);
+ blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE);
}
/**
@@ -941,7 +933,7 @@ static void blk_add_trace_rq_remap(void *ignore,
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
- rq_data_dir(rq), 0, BLK_TA_REMAP, !!rq->errors,
+ rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
sizeof(r), &r);
}
@@ -966,7 +958,7 @@ void blk_add_driver_data(struct request_queue *q,
return;
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
- BLK_TA_DRV_DATA, rq->errors, len, data);
+ BLK_TA_DRV_DATA, 0, len, data);
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -974,8 +966,6 @@ static void blk_register_tracepoints(void)
{
int ret;
- ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
- WARN_ON(ret);
ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
WARN_ON(ret);
ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
@@ -1028,7 +1018,6 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
- unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
tracepoint_synchronize_unregister();
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cee9802cf3e0..f806dbd66de9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -96,7 +96,7 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
if (unlikely(in_interrupt() ||
current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
- if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+ if (unlikely(uaccess_kernel()))
return -EPERM;
if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
return -EPERM;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 54e7a90db848..ca47a4fa2986 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3405,11 +3405,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *reader;
+ struct buffer_page *head_page;
+ struct buffer_page *commit_page;
+ unsigned commit;
cpu_buffer = iter->cpu_buffer;
- return iter->head_page == cpu_buffer->commit_page &&
- iter->head == rb_commit_index(cpu_buffer);
+ /* Remember, trace recording is off when iterator is in use */
+ reader = cpu_buffer->reader_page;
+ head_page = cpu_buffer->head_page;
+ commit_page = cpu_buffer->commit_page;
+ commit = rb_page_commit(commit_page);
+
+ return ((iter->head_page == commit_page && iter->head == commit) ||
+ (iter->head_page == reader && commit_page == head_page &&
+ head_page->read == commit &&
+ iter->head == rb_page_commit(cpu_buffer->reader_page)));
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d484452ae648..0ad75e9698f6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6733,11 +6733,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
return ret;
out_reg:
- ret = register_ftrace_function_probe(glob, ops, count);
+ ret = alloc_snapshot(&global_trace);
+ if (ret < 0)
+ goto out;
- if (ret >= 0)
- alloc_snapshot(&global_trace);
+ ret = register_ftrace_function_probe(glob, ops, count);
+ out:
return ret < 0 ? ret : 0;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c0168b7da1ea..bbf46da28e9a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3209,9 +3209,8 @@ static int init_worker_pool(struct worker_pool *pool)
INIT_LIST_HEAD(&pool->idle_list);
hash_init(pool->busy_hash);
- init_timer_deferrable(&pool->idle_timer);
- pool->idle_timer.function = idle_worker_timeout;
- pool->idle_timer.data = (unsigned long)pool;
+ setup_deferrable_timer(&pool->idle_timer, idle_worker_timeout,
+ (unsigned long)pool);
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
(unsigned long)pool);