diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2025-08-07 09:17:03 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2025-08-07 09:17:03 -0700 |
| commit | 15a3b798a0264aa160072754a3847f7585694ff9 (patch) | |
| tree | 6fd8b5c0c4cf39faa6126c5158ccff2c7705c7bc | |
| parent | 911c0359d8f6da3ffdeee52886f820fbf1c7fa5c (diff) | |
| parent | 77620d1267392b1a34bfc437d2adea3006f95865 (diff) | |
Merge branch 'bpf-use-vrealloc-in-bpf_patch_insn_data'
Eduard Zingerman says:
====================
bpf: use vrealloc() in bpf_patch_insn_data()
Function bpf_patch_insn_data() uses vzalloc/vfree pair to allocate
memory for updated insn_aux_data. These operations are expensive for
big programs where a lot of rewrites happen, e.g. for pyperf180 test
case. The pair can be replaced with a call to vrealloc in order to
reduce the number of actual memory allocations.
Perf stat w/o this patch:
$ perf stat -B --all-kernel -r10 -- ./veristat -q pyperf180.bpf.o
...
2201.25 msec task-clock # 0.973 CPUs utilized ( +- 2.20% )
188 context-switches # 85.406 /sec ( +- 9.29% )
15 cpu-migrations # 6.814 /sec ( +- 5.64% )
5 page-faults # 2.271 /sec ( +- 3.27% )
4315057974 instructions # 1.28 insn per cycle
# 0.33 stalled cycles per insn ( +- 0.03% )
3366141387 cycles # 1.529 GHz ( +- 0.21% )
1420810964 stalled-cycles-frontend # 42.21% frontend cycles idle ( +- 0.23% )
1049956791 branches # 476.981 M/sec ( +- 0.03% )
60591781 branch-misses # 5.77% of all branches ( +- 0.07% )
2.2632 +- 0.0527 seconds time elapsed ( +- 2.33% )
Perf stat with this patch:
1227.15 msec task-clock # 0.963 CPUs utilized ( +- 2.27% )
170 context-switches # 138.532 /sec ( +- 5.62% )
2 cpu-migrations # 1.630 /sec ( +- 33.37% )
5 page-faults # 4.074 /sec ( +- 4.47% )
3312254304 instructions # 2.17 insn per cycle
# 0.15 stalled cycles per insn ( +- 0.03% )
1528944717 cycles # 1.246 GHz ( +- 0.31% )
501475146 stalled-cycles-frontend # 32.80% frontend cycles idle ( +- 0.50% )
730426891 branches # 595.222 M/sec ( +- 0.03% )
17372363 branch-misses # 2.38% of all branches ( +- 0.16% )
1.2744 +- 0.0301 seconds time elapsed ( +- 2.36% )
Changelog:
v1: https://lore.kernel.org/bpf/20250806200928.3080531-1-eddyz87@gmail.com/T/#t
v1 -> v2:
- added missing memset(0) in adjust_insn_aux_data(),
this fixes CI failure reported in [1].
[1] https://github.com/kernel-patches/bpf/actions/runs/16787563163/job/47542309875
====================
Link: https://patch.msgid.link/20250807010205.3210608-1-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
| -rw-r--r-- | kernel/bpf/verifier.c | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0806295945e4..a61d57996692 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3663,7 +3663,7 @@ static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. */ -static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, +static bool is_reg64(struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) { u8 code, class, op; @@ -3774,14 +3774,14 @@ static int insn_def_regno(const struct bpf_insn *insn) } /* Return TRUE if INSN has defined any 32-bit value explicitly. */ -static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) +static bool insn_has_def32(struct bpf_insn *insn) { int dst_reg = insn_def_regno(insn); if (dst_reg == -1) return false; - return !is_reg64(env, insn, dst_reg, NULL, DST_OP); + return !is_reg64(insn, dst_reg, NULL, DST_OP); } static void mark_insn_zext(struct bpf_verifier_env *env, @@ -3812,7 +3812,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r mark_reg_scratched(env, regno); reg = ®s[regno]; - rw64 = is_reg64(env, insn, regno, reg, t); + rw64 = is_reg64(insn, regno, reg, t); if (t == SRC_OP) { /* check whether register used as source operand can be read */ if (reg->type == NOT_INIT) { @@ -20699,12 +20699,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) * [0, off) and [off, end) to new locations, so the patched range stays zero */ static void adjust_insn_aux_data(struct bpf_verifier_env *env, - struct bpf_insn_aux_data *new_data, struct bpf_prog *new_prog, u32 off, u32 cnt) { - struct bpf_insn_aux_data *old_data = env->insn_aux_data; + struct bpf_insn_aux_data *data = env->insn_aux_data; struct bpf_insn *insn = new_prog->insnsi; - u32 old_seen = old_data[off].seen; + u32 old_seen = data[off].seen; u32 prog_len; int i; @@ -20712,22 +20711,20 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env, * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the * original insn at old prog. */ - old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); + data[off].zext_dst = insn_has_def32(insn + off + cnt - 1); if (cnt == 1) return; prog_len = new_prog->len; - memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); - memcpy(new_data + off + cnt - 1, old_data + off, - sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + memmove(data + off + cnt - 1, data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1)); for (i = off; i < off + cnt - 1; i++) { /* Expand insni[off]'s seen count to the patched range. */ - new_data[i].seen = old_seen; - new_data[i].zext_dst = insn_has_def32(env, insn + i); + data[i].seen = old_seen; + data[i].zext_dst = insn_has_def32(insn + i); } - env->insn_aux_data = new_data; - vfree(old_data); } static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) @@ -20765,10 +20762,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of struct bpf_insn_aux_data *new_data = NULL; if (len > 1) { - new_data = vzalloc(array_size(env->prog->len + len - 1, - sizeof(struct bpf_insn_aux_data))); + new_data = vrealloc(env->insn_aux_data, + array_size(env->prog->len + len - 1, + sizeof(struct bpf_insn_aux_data)), + GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!new_data) return NULL; + + env->insn_aux_data = new_data; } new_prog = bpf_patch_insn_single(env->prog, off, patch, len); @@ -20780,7 +20781,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of vfree(new_data); return NULL; } - adjust_insn_aux_data(env, new_data, new_prog, off, len); + adjust_insn_aux_data(env, new_prog, off, len); adjust_subprog_starts(env, off, len); adjust_poke_descs(new_prog, off, len); return new_prog; @@ -21131,7 +21132,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, * BPF_STX + SRC_OP, so it is safe to pass NULL * here. */ - if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) { + if (is_reg64(&insn, load_reg, NULL, DST_OP)) { if (class == BPF_LD && BPF_MODE(code) == BPF_IMM) i++; |
