summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2025-08-07 09:17:03 -0700
committerAlexei Starovoitov <ast@kernel.org>2025-08-07 09:17:03 -0700
commit15a3b798a0264aa160072754a3847f7585694ff9 (patch)
tree6fd8b5c0c4cf39faa6126c5158ccff2c7705c7bc
parent911c0359d8f6da3ffdeee52886f820fbf1c7fa5c (diff)
parent77620d1267392b1a34bfc437d2adea3006f95865 (diff)
Merge branch 'bpf-use-vrealloc-in-bpf_patch_insn_data'
Eduard Zingerman says: ==================== bpf: use vrealloc() in bpf_patch_insn_data() Function bpf_patch_insn_data() uses vzalloc/vfree pair to allocate memory for updated insn_aux_data. These operations are expensive for big programs where a lot of rewrites happen, e.g. for pyperf180 test case. The pair can be replaced with a call to vrealloc in order to reduce the number of actual memory allocations. Perf stat w/o this patch: $ perf stat -B --all-kernel -r10 -- ./veristat -q pyperf180.bpf.o ... 2201.25 msec task-clock # 0.973 CPUs utilized ( +- 2.20% ) 188 context-switches # 85.406 /sec ( +- 9.29% ) 15 cpu-migrations # 6.814 /sec ( +- 5.64% ) 5 page-faults # 2.271 /sec ( +- 3.27% ) 4315057974 instructions # 1.28 insn per cycle # 0.33 stalled cycles per insn ( +- 0.03% ) 3366141387 cycles # 1.529 GHz ( +- 0.21% ) 1420810964 stalled-cycles-frontend # 42.21% frontend cycles idle ( +- 0.23% ) 1049956791 branches # 476.981 M/sec ( +- 0.03% ) 60591781 branch-misses # 5.77% of all branches ( +- 0.07% ) 2.2632 +- 0.0527 seconds time elapsed ( +- 2.33% ) Perf stat with this patch: 1227.15 msec task-clock # 0.963 CPUs utilized ( +- 2.27% ) 170 context-switches # 138.532 /sec ( +- 5.62% ) 2 cpu-migrations # 1.630 /sec ( +- 33.37% ) 5 page-faults # 4.074 /sec ( +- 4.47% ) 3312254304 instructions # 2.17 insn per cycle # 0.15 stalled cycles per insn ( +- 0.03% ) 1528944717 cycles # 1.246 GHz ( +- 0.31% ) 501475146 stalled-cycles-frontend # 32.80% frontend cycles idle ( +- 0.50% ) 730426891 branches # 595.222 M/sec ( +- 0.03% ) 17372363 branch-misses # 2.38% of all branches ( +- 0.16% ) 1.2744 +- 0.0301 seconds time elapsed ( +- 2.36% ) Changelog: v1: https://lore.kernel.org/bpf/20250806200928.3080531-1-eddyz87@gmail.com/T/#t v1 -> v2: - added missing memset(0) in adjust_insn_aux_data(), this fixes CI failure reported in [1]. [1] https://github.com/kernel-patches/bpf/actions/runs/16787563163/job/47542309875 ==================== Link: https://patch.msgid.link/20250807010205.3210608-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--kernel/bpf/verifier.c39
1 files changed, 20 insertions, 19 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0806295945e4..a61d57996692 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3663,7 +3663,7 @@ static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state
* code only. It returns TRUE if the source or destination register operates
* on 64-bit, otherwise return FALSE.
*/
-static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
+static bool is_reg64(struct bpf_insn *insn,
u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
{
u8 code, class, op;
@@ -3774,14 +3774,14 @@ static int insn_def_regno(const struct bpf_insn *insn)
}
/* Return TRUE if INSN has defined any 32-bit value explicitly. */
-static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static bool insn_has_def32(struct bpf_insn *insn)
{
int dst_reg = insn_def_regno(insn);
if (dst_reg == -1)
return false;
- return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
+ return !is_reg64(insn, dst_reg, NULL, DST_OP);
}
static void mark_insn_zext(struct bpf_verifier_env *env,
@@ -3812,7 +3812,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r
mark_reg_scratched(env, regno);
reg = &regs[regno];
- rw64 = is_reg64(env, insn, regno, reg, t);
+ rw64 = is_reg64(insn, regno, reg, t);
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (reg->type == NOT_INIT) {
@@ -20699,12 +20699,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
static void adjust_insn_aux_data(struct bpf_verifier_env *env,
- struct bpf_insn_aux_data *new_data,
struct bpf_prog *new_prog, u32 off, u32 cnt)
{
- struct bpf_insn_aux_data *old_data = env->insn_aux_data;
+ struct bpf_insn_aux_data *data = env->insn_aux_data;
struct bpf_insn *insn = new_prog->insnsi;
- u32 old_seen = old_data[off].seen;
+ u32 old_seen = data[off].seen;
u32 prog_len;
int i;
@@ -20712,22 +20711,20 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env,
* (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
* original insn at old prog.
*/
- old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
+ data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
if (cnt == 1)
return;
prog_len = new_prog->len;
- memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
- memcpy(new_data + off + cnt - 1, old_data + off,
- sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ memmove(data + off + cnt - 1, data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
for (i = off; i < off + cnt - 1; i++) {
/* Expand insni[off]'s seen count to the patched range. */
- new_data[i].seen = old_seen;
- new_data[i].zext_dst = insn_has_def32(env, insn + i);
+ data[i].seen = old_seen;
+ data[i].zext_dst = insn_has_def32(insn + i);
}
- env->insn_aux_data = new_data;
- vfree(old_data);
}
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
@@ -20765,10 +20762,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
struct bpf_insn_aux_data *new_data = NULL;
if (len > 1) {
- new_data = vzalloc(array_size(env->prog->len + len - 1,
- sizeof(struct bpf_insn_aux_data)));
+ new_data = vrealloc(env->insn_aux_data,
+ array_size(env->prog->len + len - 1,
+ sizeof(struct bpf_insn_aux_data)),
+ GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!new_data)
return NULL;
+
+ env->insn_aux_data = new_data;
}
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
@@ -20780,7 +20781,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
vfree(new_data);
return NULL;
}
- adjust_insn_aux_data(env, new_data, new_prog, off, len);
+ adjust_insn_aux_data(env, new_prog, off, len);
adjust_subprog_starts(env, off, len);
adjust_poke_descs(new_prog, off, len);
return new_prog;
@@ -21131,7 +21132,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
* BPF_STX + SRC_OP, so it is safe to pass NULL
* here.
*/
- if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
+ if (is_reg64(&insn, load_reg, NULL, DST_OP)) {
if (class == BPF_LD &&
BPF_MODE(code) == BPF_IMM)
i++;