summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c19
-rw-r--r--kernel/bpf/disasm.c16
-rw-r--r--kernel/bpf/verifier.c148
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_events_hist.c24
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/workqueue.c20
8 files changed, 117 insertions, 120 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9b1577498373..b1a5fc04492b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -32,6 +32,8 @@
#include <linux/perf_event.h>
#include <linux/extable.h>
#include <linux/log2.h>
+
+#include <asm/barrier.h>
#include <asm/unaligned.h>
/* Registers */
@@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
/* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+ [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC,
[BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
[BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
[BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
@@ -1621,7 +1624,21 @@ out:
COND_JMP(s, JSGE, >=)
COND_JMP(s, JSLE, <=)
#undef COND_JMP
- /* STX and ST and LDX*/
+ /* ST, STX and LDX*/
+ ST_NOSPEC:
+ /* Speculation barrier for mitigating Speculative Store Bypass.
+ * In case of arm64, we rely on the firmware mitigation as
+ * controlled via the ssbd kernel parameter. Whenever the
+ * mitigation is enabled, it works for all of the kernel code
+ * with no need to provide any additional instructions here.
+ * In case of x86, we use 'lfence' insn for mitigation. We
+ * reuse preexisting logic from Spectre v1 mitigation that
+ * happens to produce the required code on x86 for v4 as well.
+ */
+#ifdef CONFIG_X86
+ barrier_nospec();
+#endif
+ CONT;
#define LDST(SIZEOP, SIZE) \
STX_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index bbfc6bb79240..ca3cd9aaa6ce 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
}
} else if (class == BPF_ST) {
- if (BPF_MODE(insn->code) != BPF_MEM) {
+ if (BPF_MODE(insn->code) == BPF_MEM) {
+ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg,
+ insn->off, insn->imm);
+ } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) {
+ verbose(cbs->private_data, "(%02x) nospec\n", insn->code);
+ } else {
verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
- return;
}
- verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->dst_reg,
- insn->off, insn->imm);
} else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9de3c9c3267c..f9bda5476ea5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2610,6 +2610,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
cur = env->cur_state->frame[env->cur_state->curframe];
if (value_regno >= 0)
reg = &cur->regs[value_regno];
+ if (!env->bypass_spec_v4) {
+ bool sanitize = reg && is_spillable_regtype(reg->type);
+
+ for (i = 0; i < size; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+ sanitize = true;
+ break;
+ }
+ }
+
+ if (sanitize)
+ env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+ }
if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
!register_is_null(reg) && env->bpf_capable) {
@@ -2632,47 +2645,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
verbose(env, "invalid size of register spill\n");
return -EACCES;
}
-
if (state != cur && reg->type == PTR_TO_STACK) {
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
-
- if (!env->bypass_spec_v4) {
- bool sanitize = false;
-
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
- register_is_const(&state->stack[spi].spilled_ptr))
- sanitize = true;
- for (i = 0; i < BPF_REG_SIZE; i++)
- if (state->stack[spi].slot_type[i] == STACK_MISC) {
- sanitize = true;
- break;
- }
- if (sanitize) {
- int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
- int soff = (-spi - 1) * BPF_REG_SIZE;
-
- /* detected reuse of integer stack slot with a pointer
- * which means either llvm is reusing stack slot or
- * an attacker is trying to exploit CVE-2018-3639
- * (speculative store bypass)
- * Have to sanitize that slot with preemptive
- * store of zero.
- */
- if (*poff && *poff != soff) {
- /* disallow programs where single insn stores
- * into two different stack slots, since verifier
- * cannot sanitize them
- */
- verbose(env,
- "insn %d cannot access two stack slots fp%d and fp%d",
- insn_idx, *poff, soff);
- return -EINVAL;
- }
- *poff = soff;
- }
- }
save_register_state(state, spi, reg);
} else {
u8 type = STACK_MISC;
@@ -6561,6 +6537,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
alu_state |= ptr_is_dst_reg ?
BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+ /* Limit pruning on unknown scalars to enable deep search for
+ * potential masking differences from other program paths.
+ */
+ if (!off_is_imm)
+ env->explore_alu_limits = true;
}
err = update_alu_sanitation_state(aux, alu_state, alu_limit);
@@ -9936,8 +9918,8 @@ next:
}
/* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
- struct bpf_id_pair *idmap)
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
{
bool equal;
@@ -9963,6 +9945,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
return false;
switch (rold->type) {
case SCALAR_VALUE:
+ if (env->explore_alu_limits)
+ return false;
if (rcur->type == SCALAR_VALUE) {
if (!rold->precise && !rcur->precise)
return true;
@@ -10053,9 +10037,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
return false;
}
-static bool stacksafe(struct bpf_func_state *old,
- struct bpf_func_state *cur,
- struct bpf_id_pair *idmap)
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+ struct bpf_func_state *cur, struct bpf_id_pair *idmap)
{
int i, spi;
@@ -10100,9 +10083,8 @@ static bool stacksafe(struct bpf_func_state *old,
continue;
if (old->stack[spi].slot_type[0] != STACK_SPILL)
continue;
- if (!regsafe(&old->stack[spi].spilled_ptr,
- &cur->stack[spi].spilled_ptr,
- idmap))
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+ &cur->stack[spi].spilled_ptr, idmap))
/* when explored and current stack slot are both storing
* spilled registers, check that stored pointers types
* are the same as well.
@@ -10159,10 +10141,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
for (i = 0; i < MAX_BPF_REG; i++)
- if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
+ if (!regsafe(env, &old->regs[i], &cur->regs[i],
+ env->idmap_scratch))
return false;
- if (!stacksafe(old, cur, env->idmap_scratch))
+ if (!stacksafe(env, old, cur, env->idmap_scratch))
return false;
if (!refsafe(old, cur))
@@ -11906,35 +11889,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
bpf_convert_ctx_access_t convert_ctx_access;
+ bool ctx_access;
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_DW))
+ ctx_access = true;
+ } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- else
+ ctx_access = BPF_CLASS(insn->code) == BPF_STX;
+ } else {
continue;
+ }
if (type == BPF_WRITE &&
- env->insn_aux_data[i + delta].sanitize_stack_off) {
+ env->insn_aux_data[i + delta].sanitize_stack_spill) {
struct bpf_insn patch[] = {
- /* Sanitize suspicious stack slot with zero.
- * There are no memory dependencies for this store,
- * since it's only using frame pointer and immediate
- * constant of zero
- */
- BPF_ST_MEM(BPF_DW, BPF_REG_FP,
- env->insn_aux_data[i + delta].sanitize_stack_off,
- 0),
- /* the original STX instruction will immediately
- * overwrite the same stack slot with appropriate value
- */
*insn,
+ BPF_ST_NOSPEC(),
};
cnt = ARRAY_SIZE(patch);
@@ -11948,6 +11929,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
continue;
}
+ if (!ctx_access)
+ continue;
+
switch (env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
@@ -12752,37 +12736,6 @@ static void free_states(struct bpf_verifier_env *env)
}
}
-/* The verifier is using insn_aux_data[] to store temporary data during
- * verification and to store information for passes that run after the
- * verification like dead code sanitization. do_check_common() for subprogram N
- * may analyze many other subprograms. sanitize_insn_aux_data() clears all
- * temporary data after do_check_common() finds that subprogram N cannot be
- * verified independently. pass_cnt counts the number of times
- * do_check_common() was run and insn->aux->seen tells the pass number
- * insn_aux_data was touched. These variables are compared to clear temporary
- * data from failed pass. For testing and experiments do_check_common() can be
- * run multiple times even when prior attempt to verify is unsuccessful.
- *
- * Note that special handling is needed on !env->bypass_spec_v1 if this is
- * ever called outside of error path with subsequent program rejection.
- */
-static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
-{
- struct bpf_insn *insn = env->prog->insnsi;
- struct bpf_insn_aux_data *aux;
- int i, class;
-
- for (i = 0; i < env->prog->len; i++) {
- class = BPF_CLASS(insn[i].code);
- if (class != BPF_LDX && class != BPF_STX)
- continue;
- aux = &env->insn_aux_data[i];
- if (aux->seen != env->pass_cnt)
- continue;
- memset(aux, 0, offsetof(typeof(*aux), orig_idx));
- }
-}
-
static int do_check_common(struct bpf_verifier_env *env, int subprog)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -12859,9 +12812,6 @@ out:
if (!ret && pop_log)
bpf_vlog_reset(&env->log, 0);
free_states(env);
- if (ret)
- /* clean aux data in case subprog was rejected */
- sanitize_insn_aux_data(env);
return ret;
}
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8d6bf56ed77a..de2c432dee20 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc)
ret = cgroup_do_get_tree(fc);
if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
- struct super_block *sb = fc->root->d_sb;
- dput(fc->root);
- deactivate_locked_super(sb);
+ fc_drop_locked(fc);
ret = 1;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c59dd35a6da5..33899a71fdc1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9135,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr)
return -EINVAL;
ret = event_trace_add_tracer(tr->dir, tr);
- if (ret)
+ if (ret) {
tracefs_remove(tr->dir);
+ return ret;
+ }
init_tracer_tracefs(tr, tr->dir);
__update_tracer_options(tr);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 34325f41ebc0..949ef09dc537 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -65,7 +65,8 @@
C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \
C(EMPTY_SORT_FIELD, "Empty sort field"), \
C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \
- C(INVALID_SORT_FIELD, "Sort field must be a key or a val"),
+ C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
+ C(INVALID_STR_OPERAND, "String type can not be an operand in expression"),
#undef C
#define C(a, b) HIST_ERR_##a
@@ -2156,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
ret = PTR_ERR(operand1);
goto free;
}
+ if (operand1->flags & HIST_FIELD_FL_STRING) {
+ /* String type can not be the operand of unary operator. */
+ hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+ destroy_hist_field(operand1, 0);
+ ret = -EINVAL;
+ goto free;
+ }
expr->flags |= operand1->flags &
(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
@@ -2257,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
operand1 = NULL;
goto free;
}
+ if (operand1->flags & HIST_FIELD_FL_STRING) {
+ hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
+ ret = -EINVAL;
+ goto free;
+ }
/* rest of string could be another expression e.g. b+c in a+b+c */
operand_flags = 0;
@@ -2266,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
operand2 = NULL;
goto free;
}
+ if (operand2->flags & HIST_FIELD_FL_STRING) {
+ hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+ ret = -EINVAL;
+ goto free;
+ }
ret = check_expr_operands(file->tr, operand1, operand2);
if (ret)
@@ -2287,6 +2305,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr->operands[0] = operand1;
expr->operands[1] = operand2;
+
+ /* The operand sizes should be the same, so just pick one */
+ expr->size = operand1->size;
+
expr->operator = field_op;
expr->name = expr_str(expr, 0);
expr->type = kstrdup(operand1->type, GFP_KERNEL);
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index a6c0cdaf4b87..14f46aae1981 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -327,7 +327,7 @@ static void move_to_next_cpu(void)
get_online_cpus();
cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
- next_cpu = cpumask_next(smp_processor_id(), current_mask);
+ next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
put_online_cpus();
if (next_cpu >= nr_cpu_ids)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 50142fc08902..f148eacda55a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
unbound_release_work);
struct workqueue_struct *wq = pwq->wq;
struct worker_pool *pool = pwq->pool;
- bool is_last;
+ bool is_last = false;
- if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
- return;
+ /*
+ * when @pwq is not linked, it doesn't hold any reference to the
+ * @wq, and @wq is invalid to access.
+ */
+ if (!list_empty(&pwq->pwqs_node)) {
+ if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+ return;
- mutex_lock(&wq->mutex);
- list_del_rcu(&pwq->pwqs_node);
- is_last = list_empty(&wq->pwqs);
- mutex_unlock(&wq->mutex);
+ mutex_lock(&wq->mutex);
+ list_del_rcu(&pwq->pwqs_node);
+ is_last = list_empty(&wq->pwqs);
+ mutex_unlock(&wq->mutex);
+ }
mutex_lock(&wq_pool_mutex);
put_unbound_pool(pool);