summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2019-04-02 16:20:47 +0700
committerMark Brown <broonie@kernel.org>2019-04-02 16:20:47 +0700
commit6d5e2bf9d203e4d9e08ca2e9420c6ff22ad190af (patch)
treec2c20648773d3326f942e34ef73b06566f7a19ec /kernel
parentb820d52e7eed7b30b2dfef5f4213a2bc3cbea6f3 (diff)
parent257f9053c0204ea47491aa236004fd1226f75fa8 (diff)
Merge branch 'acpi-utils' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm into asoc-5.2
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/syscall.c22
-rw-r--r--kernel/bpf/verifier.c154
-rw-r--r--kernel/cpu.c20
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/irq/devres.c2
-rw-r--r--kernel/irq/manage.c1
-rw-r--r--kernel/ptrace.c15
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c59
-rw-r--r--kernel/sched/fair.c84
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_events_hist.c1
-rw-r--r--kernel/watchdog.c10
-rw-r--r--kernel/workqueue.c5
17 files changed, 250 insertions, 147 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 62f6bced3a3c..afca36f53c49 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
void *bpf_map_area_alloc(size_t size, int numa_node)
{
- /* We definitely need __GFP_NORETRY, so OOM killer doesn't
- * trigger under memory pressure as we really just want to
- * fail instead.
+ /* We really just want to fail instead of triggering OOM killer
+ * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
+ * which is used for lower order allocation requests.
+ *
+ * It has been observed that higher order allocation requests done by
+ * vmalloc with __GFP_NORETRY being set might fail due to not trying
+ * to reclaim memory from the page cache, thus we set
+ * __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+
+ const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- area = kmalloc_node(size, GFP_USER | flags, numa_node);
+ area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+ numa_node);
if (area != NULL)
return area;
}
- return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
- __builtin_return_address(0));
+ return __vmalloc_node_flags_caller(size, numa_node,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+ flags, __builtin_return_address(0));
}
void bpf_map_area_free(void *area)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce166a002d16..fd502c1f71eb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
int access_size;
s64 msize_smax_value;
u64 msize_umax_value;
- int ptr_id;
+ int ref_obj_id;
int func_id;
};
@@ -346,35 +346,23 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK_OR_NULL;
}
-static bool type_is_refcounted(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET;
-}
-
-static bool type_is_refcounted_or_null(enum bpf_reg_type type)
-{
- return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
-}
-
-static bool reg_is_refcounted(const struct bpf_reg_state *reg)
-{
- return type_is_refcounted(reg->type);
-}
-
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return reg->type == PTR_TO_MAP_VALUE &&
map_value_has_spin_lock(reg->map_ptr);
}
-static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
- return type_is_refcounted_or_null(reg->type);
+ return type == PTR_TO_SOCKET ||
+ type == PTR_TO_SOCKET_OR_NULL ||
+ type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_TCP_SOCK_OR_NULL;
}
-static bool arg_type_is_refcounted(enum bpf_arg_type type)
+static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
{
- return type == ARG_PTR_TO_SOCKET;
+ return type == ARG_PTR_TO_SOCK_COMMON;
}
/* Determine whether the function releases some resources allocated by another
@@ -392,6 +380,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_sk_lookup_udp;
}
+static bool is_ptr_cast_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_tcp_sock ||
+ func_id == BPF_FUNC_sk_fullsock;
+}
+
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
@@ -466,6 +460,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, ",call_%d", func(env, reg)->callsite);
} else {
verbose(env, "(id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t))
+ verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t))
@@ -2414,16 +2410,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
if (!type_is_sk_pointer(type))
goto err_type;
- } else if (arg_type == ARG_PTR_TO_SOCKET) {
- expected_type = PTR_TO_SOCKET;
- if (type != expected_type)
- goto err_type;
- if (meta->ptr_id || !reg->id) {
- verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
- meta->ptr_id, reg->id);
- return -EFAULT;
+ if (reg->ref_obj_id) {
+ if (meta->ref_obj_id) {
+ verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ regno, reg->ref_obj_id,
+ meta->ref_obj_id);
+ return -EFAULT;
+ }
+ meta->ref_obj_id = reg->ref_obj_id;
}
- meta->ptr_id = reg->id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -2740,32 +2735,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
return true;
}
-static bool check_refcount_ok(const struct bpf_func_proto *fn)
+static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
{
int count = 0;
- if (arg_type_is_refcounted(fn->arg1_type))
+ if (arg_type_may_be_refcounted(fn->arg1_type))
count++;
- if (arg_type_is_refcounted(fn->arg2_type))
+ if (arg_type_may_be_refcounted(fn->arg2_type))
count++;
- if (arg_type_is_refcounted(fn->arg3_type))
+ if (arg_type_may_be_refcounted(fn->arg3_type))
count++;
- if (arg_type_is_refcounted(fn->arg4_type))
+ if (arg_type_may_be_refcounted(fn->arg4_type))
count++;
- if (arg_type_is_refcounted(fn->arg5_type))
+ if (arg_type_may_be_refcounted(fn->arg5_type))
count++;
+ /* A reference acquiring function cannot acquire
+ * another refcounted ptr.
+ */
+ if (is_acquire_function(func_id) && count)
+ return false;
+
/* We only support one arg being unreferenced at the moment,
* which is sufficient for the helper functions we have right now.
*/
return count <= 1;
}
-static int check_func_proto(const struct bpf_func_proto *fn)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
- check_refcount_ok(fn) ? 0 : -EINVAL;
+ check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2800,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
}
static void release_reg_references(struct bpf_verifier_env *env,
- struct bpf_func_state *state, int id)
+ struct bpf_func_state *state,
+ int ref_obj_id)
{
struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].id == id)
+ if (regs[i].ref_obj_id == ref_obj_id)
mark_reg_unknown(env, regs, i);
bpf_for_each_spilled_reg(i, state, reg) {
if (!reg)
continue;
- if (reg_is_refcounted(reg) && reg->id == id)
+ if (reg->ref_obj_id == ref_obj_id)
__mark_reg_unknown(reg);
}
}
@@ -2820,15 +2822,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
* resources. Identify all copies of the same pointer and clear the reference.
*/
static int release_reference(struct bpf_verifier_env *env,
- struct bpf_call_arg_meta *meta)
+ int ref_obj_id)
{
struct bpf_verifier_state *vstate = env->cur_state;
+ int err;
int i;
+ err = release_reference_state(cur_func(env), ref_obj_id);
+ if (err)
+ return err;
+
for (i = 0; i <= vstate->curframe; i++)
- release_reg_references(env, vstate->frame[i], meta->ptr_id);
+ release_reg_references(env, vstate->frame[i], ref_obj_id);
- return release_reference_state(cur_func(env), meta->ptr_id);
+ return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3054,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn);
+ err = check_func_proto(fn, func_id);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -3093,7 +3100,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return err;
}
} else if (is_release_function(func_id)) {
- err = release_reference(env, &meta);
+ err = release_reference(env, meta.ref_obj_id);
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -3154,8 +3161,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (id < 0)
return id;
- /* For release_reference() */
+ /* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = id;
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = id;
} else {
/* For mark_ptr_or_null_reg() */
regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3179,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
+ if (is_ptr_cast_function(func_id))
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -3368,7 +3381,7 @@ do_sim:
*dst_reg = *ptr_reg;
}
ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
- if (!ptr_is_dst_reg)
+ if (!ptr_is_dst_reg && ret)
*dst_reg = tmp;
return !ret ? -EFAULT : 0;
}
@@ -4665,11 +4678,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK;
}
- if (is_null || !(reg_is_refcounted(reg) ||
- reg_may_point_to_spin_lock(reg))) {
- /* We don't need id from this point onwards anymore,
- * thus we should better reset it, so that state
- * pruning has chances to take effect.
+ if (is_null) {
+ /* We don't need id and ref_obj_id from this point
+ * onwards anymore, thus we should better reset it,
+ * so that state pruning has chances to take effect.
+ */
+ reg->id = 0;
+ reg->ref_obj_id = 0;
+ } else if (!reg_may_point_to_spin_lock(reg)) {
+ /* For not-NULL ptr, reg->ref_obj_id will be reset
+ * in release_reg_references().
+ *
+ * reg->id is still used by spin_lock ptr. Other
+ * than spin_lock ptr type, reg->id can be reset.
*/
reg->id = 0;
}
@@ -4684,11 +4705,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
struct bpf_reg_state *reg, *regs = state->regs;
+ u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id;
int i, j;
- if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
- release_reference_state(state, id);
+ if (ref_obj_id && ref_obj_id == id && is_null)
+ /* regs[regno] is in the " == NULL" branch.
+ * No one could have freed the reference state before
+ * doing the NULL check.
+ */
+ WARN_ON_ONCE(release_reference_state(state, id));
for (i = 0; i < MAX_BPF_REG; i++)
mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -6052,15 +6078,17 @@ static int propagate_liveness(struct bpf_verifier_env *env,
}
/* Propagate read liveness of registers... */
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- /* We don't need to worry about FP liveness because it's read-only */
- for (i = 0; i < BPF_REG_FP; i++) {
- if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
- continue;
- if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
- err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
- &vparent->frame[vstate->curframe]->regs[i]);
- if (err)
- return err;
+ for (frame = 0; frame <= vstate->curframe; frame++) {
+ /* We don't need to worry about FP liveness, it's read-only */
+ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
+ if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
+ continue;
+ if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
+ err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
+ &vparent->frame[frame]->regs[i]);
+ if (err)
+ return err;
+ }
}
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 025f419d16f6..6754f3ecfd94 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -564,6 +564,20 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
}
+static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
+{
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ return true;
+ /*
+ * When CPU hotplug is disabled, then taking the CPU down is not
+ * possible because takedown_cpu() and the architecture and
+ * subsystem specific mechanisms are not available. So the CPU
+ * which would be completely unplugged again needs to stay around
+ * in the current state.
+ */
+ return st->state <= CPUHP_BRINGUP_CPU;
+}
+
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
@@ -574,8 +588,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
if (ret) {
- st->target = prev_state;
- undo_cpu_up(cpu, st);
+ if (can_rollback_cpu(st)) {
+ st->target = prev_state;
+ undo_cpu_up(cpu, st);
+ }
break;
}
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1032a16bd186..72d06e302e99 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7189,6 +7189,7 @@ static void perf_event_mmap_output(struct perf_event *event,
struct perf_output_handle handle;
struct perf_sample_data sample;
int size = mmap_event->event_id.header.size;
+ u32 type = mmap_event->event_id.header.type;
int ret;
if (!perf_event_mmap_match(event, data))
@@ -7232,6 +7233,7 @@ static void perf_event_mmap_output(struct perf_event *event,
perf_output_end(&handle);
out:
mmap_event->event_id.header.size = size;
+ mmap_event->event_id.header.type = type;
}
static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
diff --git a/kernel/futex.c b/kernel/futex.c
index c3b73b0311bc..9e40cf7be606 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3436,6 +3436,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
{
u32 uval, uninitialized_var(nval), mval;
+ /* Futex address must be 32bit aligned */
+ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
+ return -1;
+
retry:
if (get_user(uval, uaddr))
return -1;
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 5d5378ea0afe..f808c6a97dcc 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -84,8 +84,6 @@ EXPORT_SYMBOL(devm_request_threaded_irq);
* @dev: device to request interrupt for
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs
- * @thread_fn: function to be called in a threaded interrupt context. NULL
- * for devices which handle everything in @handler
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device, dev_name(dev) if NULL
* @dev_id: A cookie passed back to the handler function
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9ec34a2a6638..1401afa0d58a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -196,6 +196,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
case IRQ_SET_MASK_OK:
case IRQ_SET_MASK_OK_DONE:
cpumask_copy(desc->irq_common_data.affinity, mask);
+ /* fall through */
case IRQ_SET_MASK_OK_NOCOPY:
irq_validate_effective_affinity(data);
irq_set_thread_affinity(desc);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 771e93f9c43f..6f357f4fc859 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -29,6 +29,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/cn_proc.h>
#include <linux/compat.h>
+#include <linux/sched/signal.h>
/*
* Access another process' address space via ptrace.
@@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request,
ret = ptrace_setsiginfo(child, &siginfo);
break;
- case PTRACE_GETSIGMASK:
+ case PTRACE_GETSIGMASK: {
+ sigset_t *mask;
+
if (addr != sizeof(sigset_t)) {
ret = -EINVAL;
break;
}
- if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t)))
+ if (test_tsk_restore_sigmask(child))
+ mask = &child->saved_sigmask;
+ else
+ mask = &child->blocked;
+
+ if (copy_to_user(datavp, mask, sizeof(sigset_t)))
ret = -EFAULT;
else
ret = 0;
break;
+ }
case PTRACE_SETSIGMASK: {
sigset_t new_set;
@@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request,
child->blocked = new_set;
spin_unlock_irq(&child->sighand->siglock);
+ clear_tsk_restore_sigmask(child);
+
ret = 0;
break;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ead464a0f2e5..4778c48a7fda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6998,7 +6998,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
{
char tok[21]; /* U64_MAX */
- if (!sscanf(buf, "%s %llu", tok, periodp))
+ if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
return -EINVAL;
*periodp *= NSEC_PER_USEC;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2efe629425be..5c41ea367422 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
bool iowait_boost_pending;
unsigned int iowait_boost;
- unsigned int iowait_boost_max;
u64 last_update;
unsigned long bw_dl;
+ unsigned long min;
unsigned long max;
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
if (delta_ns <= TICK_NSEC)
return false;
- sg_cpu->iowait_boost = set_iowait_boost
- ? sg_cpu->sg_policy->policy->min : 0;
+ sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
sg_cpu->iowait_boost_pending = set_iowait_boost;
return true;
@@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
/* Double the boost at each request */
if (sg_cpu->iowait_boost) {
- sg_cpu->iowait_boost <<= 1;
- if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
- sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+ sg_cpu->iowait_boost =
+ min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
return;
}
/* First wakeup after IO: start with minimum boost */
- sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+ sg_cpu->iowait_boost = sg_cpu->min;
}
/**
@@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
* This mechanism is designed to boost high frequently IO waiting tasks, while
* being more conservative on tasks which does sporadic IO operations.
*/
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
- unsigned long *util, unsigned long *max)
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+ unsigned long util, unsigned long max)
{
- unsigned int boost_util, boost_max;
+ unsigned long boost;
/* No boost currently required */
if (!sg_cpu->iowait_boost)
- return;
+ return util;
/* Reset boost if the CPU appears to have been idle enough */
if (sugov_iowait_reset(sg_cpu, time, false))
- return;
+ return util;
- /*
- * An IO waiting task has just woken up:
- * allow to further double the boost value
- */
- if (sg_cpu->iowait_boost_pending) {
- sg_cpu->iowait_boost_pending = false;
- } else {
+ if (!sg_cpu->iowait_boost_pending) {
/*
- * Otherwise: reduce the boost value and disable it when we
- * reach the minimum.
+ * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1;
- if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+ if (sg_cpu->iowait_boost < sg_cpu->min) {
sg_cpu->iowait_boost = 0;
- return;
+ return util;
}
}
+ sg_cpu->iowait_boost_pending = false;
+
/*
- * Apply the current boost value: a CPU is boosted only if its current
- * utilization is smaller then the current IO boost level.
+ * @util is already in capacity scale; convert iowait_boost
+ * into the same scale so we can compare.
*/
- boost_util = sg_cpu->iowait_boost;
- boost_max = sg_cpu->iowait_boost_max;
- if (*util * boost_max < *max * boost_util) {
- *util = boost_util;
- *max = boost_max;
- }
+ boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
+ return max(boost, util);
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
util = sugov_get_util(sg_cpu);
max = sg_cpu->max;
- sugov_iowait_apply(sg_cpu, time, &util, &max);
+ util = sugov_iowait_apply(sg_cpu, time, util, max);
next_f = get_next_freq(sg_policy, util, max);
/*
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
j_util = sugov_get_util(j_sg_cpu);
j_max = j_sg_cpu->max;
- sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+ j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
if (j_util * max > j_max * util) {
util = j_util;
@@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy)
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ sg_cpu->min =
+ (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+ policy->cpuinfo.max_freq;
}
for_each_cpu(cpu, policy->cpus) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea74d43924b2..fdab7eb6f351 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8060,6 +8060,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
}
/*
+ * Check whether a rq has a misfit task and if it looks like we can actually
+ * help that task: we can migrate the task to a CPU of higher capacity, or
+ * the task's current CPU is heavily pressured.
+ */
+static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
+{
+ return rq->misfit_task_load &&
+ (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
+ check_cpu_capacity(rq, sd));
+}
+
+/*
* Group imbalance indicates (and tries to solve) the problem where balancing
* groups is inadequate due to ->cpus_allowed constraints.
*
@@ -9586,35 +9598,21 @@ static void nohz_balancer_kick(struct rq *rq)
if (time_before(now, nohz.next_balance))
goto out;
- if (rq->nr_running >= 2 || rq->misfit_task_load) {
+ if (rq->nr_running >= 2) {
flags = NOHZ_KICK_MASK;
goto out;
}
rcu_read_lock();
- sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
- if (sds) {
- /*
- * If there is an imbalance between LLC domains (IOW we could
- * increase the overall cache use), we need some less-loaded LLC
- * domain to pull some load. Likewise, we may need to spread
- * load within the current LLC domain (e.g. packed SMT cores but
- * other CPUs are idle). We can't really know from here how busy
- * the others are - so just get a nohz balance going if it looks
- * like this LLC domain has tasks we could move.
- */
- nr_busy = atomic_read(&sds->nr_busy_cpus);
- if (nr_busy > 1) {
- flags = NOHZ_KICK_MASK;
- goto unlock;
- }
-
- }
sd = rcu_dereference(rq->sd);
if (sd) {
- if ((rq->cfs.h_nr_running >= 1) &&
- check_cpu_capacity(rq, sd)) {
+ /*
+ * If there's a CFS task and the current CPU has reduced
+ * capacity; kick the ILB to see if there's a better CPU to run
+ * on.
+ */
+ if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
flags = NOHZ_KICK_MASK;
goto unlock;
}
@@ -9622,6 +9620,11 @@ static void nohz_balancer_kick(struct rq *rq)
sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
if (sd) {
+ /*
+ * When ASYM_PACKING; see if there's a more preferred CPU
+ * currently idle; in which case, kick the ILB to move tasks
+ * around.
+ */
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
if (sched_asym_prefer(i, cpu)) {
flags = NOHZ_KICK_MASK;
@@ -9629,6 +9632,45 @@ static void nohz_balancer_kick(struct rq *rq)
}
}
}
+
+ sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu));
+ if (sd) {
+ /*
+ * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
+ * to run the misfit task on.
+ */
+ if (check_misfit_status(rq, sd)) {
+ flags = NOHZ_KICK_MASK;
+ goto unlock;
+ }
+
+ /*
+ * For asymmetric systems, we do not want to nicely balance
+ * cache use, instead we want to embrace asymmetry and only
+ * ensure tasks have enough CPU capacity.
+ *
+ * Skip the LLC logic because it's not relevant in that case.
+ */
+ goto unlock;
+ }
+
+ sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+ if (sds) {
+ /*
+ * If there is an imbalance between LLC domains (IOW we could
+ * increase the overall cache use), we need some less-loaded LLC
+ * domain to pull some load. Likewise, we may need to spread
+ * load within the current LLC domain (e.g. packed SMT cores but
+ * other CPUs are idle). We can't really know from here how busy
+ * the others are - so just get a nohz balance going if it looks
+ * like this LLC domain has tasks we could move.
+ */
+ nr_busy = atomic_read(&sds->nr_busy_cpus);
+ if (nr_busy > 1) {
+ flags = NOHZ_KICK_MASK;
+ goto unlock;
+ }
+ }
unlock:
rcu_read_unlock();
out:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index dc1b6f1929f9..ac9c03dd6c7d 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -89,7 +89,7 @@ struct clocksource * __init __weak clocksource_default_clock(void)
return &clocksource_jiffies;
}
-struct clocksource refined_jiffies;
+static struct clocksource refined_jiffies;
int register_refined_jiffies(long cycles_per_second)
{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa79323331b2..26c8ca9bd06b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1992,7 +1992,7 @@ static void print_bug_type(void)
* modifying the code. @failed should be one of either:
* EFAULT - if the problem happens on reading the @ip address
* EINVAL - if what is read at @ip is not what was expected
- * EPERM - if the problem happens on writting to the @ip address
+ * EPERM - if the problem happens on writing to the @ip address
*/
void ftrace_bug(int failed, struct dyn_ftrace *rec)
{
@@ -2391,7 +2391,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
}
- return -1; /* unknow ftrace bug */
+ return -1; /* unknown ftrace bug */
}
void __weak ftrace_replace_code(int mod_flags)
@@ -3004,7 +3004,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
int cnt;
if (!num_to_init)
- return 0;
+ return NULL;
start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg)
@@ -4755,7 +4755,7 @@ static int
ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
int reset, int enable)
{
- return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
+ return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
}
/**
@@ -5463,7 +5463,7 @@ void ftrace_create_filter_files(struct ftrace_ops *ops,
/*
* The name "destroy_filter_files" is really a misnomer. Although
- * in the future, it may actualy delete the files, but this is
+ * in the future, it may actually delete the files, but this is
* really intended to make sure the ops passed in are disabled
* and that when this function returns, the caller is free to
* free the ops.
@@ -5786,7 +5786,7 @@ void ftrace_module_enable(struct module *mod)
/*
* If the tracing is enabled, go ahead and enable the record.
*
- * The reason not to enable the record immediatelly is the
+ * The reason not to enable the record immediately is the
* inherent check of ftrace_make_nop/ftrace_make_call for
* correct previous instructions. Making first the NOP
* conversion puts the module to the correct state, thus
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index dd1f43588d70..fa100ed3b4de 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -74,7 +74,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type)
static int create_dyn_event(int argc, char **argv)
{
struct dyn_event_operations *ops;
- int ret;
+ int ret = -ENODEV;
if (argv[0][0] == '-' || argv[0][0] == '!')
return dyn_event_release(argc, argv, NULL);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index ca46339f3009..795aa2038377 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3713,7 +3713,6 @@ static void track_data_destroy(struct hist_trigger_data *hist_data,
struct trace_event_file *file = hist_data->event_file;
destroy_hist_field(data->track_data.track_var, 0);
- destroy_hist_field(data->track_data.var_ref, 0);
if (data->action == ACTION_SNAPSHOT) {
struct track_data *track_data;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 8fbfda94a67b..6a5787233113 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -42,9 +42,9 @@ int __read_mostly watchdog_user_enabled = 1;
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
-int __read_mostly nmi_watchdog_available;
+static int __read_mostly nmi_watchdog_available;
-struct cpumask watchdog_allowed_mask __read_mostly;
+static struct cpumask watchdog_allowed_mask __read_mostly;
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -554,13 +554,15 @@ static void softlockup_start_all(void)
int lockup_detector_online_cpu(unsigned int cpu)
{
- watchdog_enable(cpu);
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ watchdog_enable(cpu);
return 0;
}
int lockup_detector_offline_cpu(unsigned int cpu)
{
- watchdog_disable(cpu);
+ if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
+ watchdog_disable(cpu);
return 0;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4026d1871407..ddee541ea97a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4266,7 +4266,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
INIT_LIST_HEAD(&wq->list);
if (alloc_and_link_pwqs(wq) < 0)
- goto err_free_wq;
+ goto err_unreg_lockdep;
if (wq_online && init_rescuer(wq) < 0)
goto err_destroy;
@@ -4292,9 +4292,10 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
return wq;
-err_free_wq:
+err_unreg_lockdep:
wq_unregister_lockdep(wq);
wq_free_lockdep(wq);
+err_free_wq:
free_workqueue_attrs(wq->unbound_attrs);
kfree(wq);
return NULL;