diff options
Diffstat (limited to 'arch/arm64/net/bpf_jit_comp.c')
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 399 |
1 files changed, 306 insertions, 93 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 634d78422adb..97dfd5432809 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -30,6 +30,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCCNT_PTR (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) +#define PRIVATE_SP (MAX_BPF_JIT_REG + 4) #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) #define check_imm(bits, imm) do { \ @@ -68,6 +69,8 @@ static const int bpf2a64[] = { [TCCNT_PTR] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), + /* callee saved register for private stack pointer */ + [PRIVATE_SP] = A64_R(27), /* callee saved register for kern_vm_start address */ [ARENA_VM_START] = A64_R(28), }; @@ -86,6 +89,7 @@ struct jit_ctx { u64 user_vm_start; u64 arena_vm_start; bool fp_used; + bool priv_sp_used; bool write; }; @@ -98,6 +102,10 @@ struct bpf_plt { #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) +/* Memory size/value to protect private stack overflow/underflow */ +#define PRIV_STACK_GUARD_SZ 16 +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL + static inline void emit(const u32 insn, struct jit_ctx *ctx) { if (ctx->image != NULL && ctx->write) @@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx) if (reg_used & 8) ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9]; - if (reg_used & 16) + if (reg_used & 16) { ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP]; + if (ctx->priv_sp_used) + ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP]; + } if (ctx->arena_vm_start) ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START]; @@ -412,6 +423,7 @@ static void push_callee_regs(struct jit_ctx *ctx) emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx); emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx); + ctx->fp_used = true; } else { find_used_callee_regs(ctx); for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) { @@ -461,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx) } } +static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr, + struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + + emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx); + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + emit(A64_MRS_TPIDR_EL2(tmp), ctx); + else + emit(A64_MRS_TPIDR_EL1(tmp), ctx); + emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx); +} + #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) @@ -476,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) const bool is_main_prog = !bpf_is_subprog(prog); const u8 fp = bpf2a64[BPF_REG_FP]; const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 priv_sp = bpf2a64[PRIVATE_SP]; + void __percpu *priv_stack_ptr; const int idx0 = ctx->idx; int cur_offset; @@ -551,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx); } - if (ctx->fp_used) - /* Set up BPF prog stack base register */ - emit(A64_MOV(1, fp, A64_SP), ctx); - /* Stack must be multiples of 16B */ ctx->stack_size = round_up(prog->aux->stack_depth, 16); + if (ctx->fp_used) { + if (ctx->priv_sp_used) { + /* Set up private stack pointer */ + priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ; + emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx); + emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx); + } else { + /* Set up BPF prog stack base register */ + emit(A64_MOV(1, fp, A64_SP), ctx); + } + } + /* Set up function call stack */ - if (ctx->stack_size) + if (ctx->stack_size && !ctx->priv_sp_used) emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); if (ctx->arena_vm_start) @@ -623,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_STR64I(tcc, ptr, 0), ctx); /* restore SP */ - if (ctx->stack_size) + if (ctx->stack_size && !ctx->priv_sp_used) emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); pop_callee_regs(ctx); @@ -991,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic) const u8 ptr = bpf2a64[TCCNT_PTR]; /* We're done with BPF stack */ - if (ctx->stack_size) + if (ctx->stack_size && !ctx->priv_sp_used) emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); pop_callee_regs(ctx); @@ -1120,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const u8 tmp2 = bpf2a64[TMP_REG_2]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 priv_sp = bpf2a64[PRIVATE_SP]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@ -1564,7 +1600,7 @@ emit_cond_jmp: src = tmp2; } if (src == fp) { - src_adj = A64_SP; + src_adj = ctx->priv_sp_used ? priv_sp : A64_SP; off_adj = off + ctx->stack_size; } else { src_adj = src; @@ -1630,17 +1666,14 @@ emit_cond_jmp: return ret; break; - /* speculation barrier */ + /* speculation barrier against v1 and v4 */ case BPF_ST | BPF_NOSPEC: - /* - * Nothing required here. - * - * In case of arm64, we rely on the firmware mitigation of - * Speculative Store Bypass as controlled via the ssbd kernel - * parameter. Whenever the mitigation is enabled, it works - * for all of the kernel code with no need to provide any - * additional instructions. - */ + if (alternative_has_cap_likely(ARM64_HAS_SB)) { + emit(A64_SB, ctx); + } else { + emit(A64_DSB_NSH, ctx); + emit(A64_ISB, ctx); + } break; /* ST: *(size *)(dst + off) = imm */ @@ -1657,7 +1690,7 @@ emit_cond_jmp: dst = tmp2; } if (dst == fp) { - dst_adj = A64_SP; + dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; off_adj = off + ctx->stack_size; } else { dst_adj = dst; @@ -1719,7 +1752,7 @@ emit_cond_jmp: dst = tmp2; } if (dst == fp) { - dst_adj = A64_SP; + dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; off_adj = off + ctx->stack_size; } else { dst_adj = dst; @@ -1862,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } +static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size) +{ + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; + u64 *stack_ptr; + + for_each_possible_cpu(cpu) { + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); + stack_ptr[0] = PRIV_STACK_GUARD_VAL; + stack_ptr[1] = PRIV_STACK_GUARD_VAL; + stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL; + stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL; + } +} + +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size, + struct bpf_prog *prog) +{ + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; + u64 *stack_ptr; + + for_each_possible_cpu(cpu) { + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); + if (stack_ptr[0] != PRIV_STACK_GUARD_VAL || + stack_ptr[1] != PRIV_STACK_GUARD_VAL || + stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL || + stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) { + pr_err("BPF private stack overflow/underflow detected for prog %sx\n", + bpf_jit_get_prog_name(prog)); + break; + } + } +} + struct arm64_jit_data { struct bpf_binary_header *header; u8 *ro_image; @@ -1874,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; - struct bpf_binary_header *ro_header; + struct bpf_binary_header *ro_header = NULL; struct arm64_jit_data *jit_data; + void __percpu *priv_stack_ptr = NULL; bool was_classic = bpf_prog_was_classic(prog); + int priv_stack_alloc_sz; bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; @@ -1908,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } prog->aux->jit_data = jit_data; } + priv_stack_ptr = prog->aux->priv_stack_ptr; + if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) { + /* Allocate actual private stack size with verifier-calculated + * stack size plus two memory guards to protect overflow and + * underflow. + */ + priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) + + 2 * PRIV_STACK_GUARD_SZ; + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL); + if (!priv_stack_ptr) { + prog = orig_prog; + goto out_priv_stack; + } + + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz); + prog->aux->priv_stack_ptr = priv_stack_ptr; + } if (jit_data->ctx.offset) { ctx = jit_data->ctx; ro_image_ptr = jit_data->ro_image; @@ -1931,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + if (priv_stack_ptr) + ctx.priv_sp_used = true; + /* Pass 1: Estimate the maximum image size. * * BPF line info needs ctx->offset[i] to be the offset of @@ -2070,7 +2158,12 @@ skip_init_ctx: ctx.offset[i] *= AARCH64_INSN_SIZE; bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: + if (!ro_header && priv_stack_ptr) { + free_percpu(priv_stack_ptr); + prog->aux->priv_stack_ptr = NULL; + } kvfree(ctx.offset); +out_priv_stack: kfree(jit_data); prog->aux->jit_data = NULL; } @@ -2089,6 +2182,11 @@ out_free_hdr: goto out_off; } +bool bpf_jit_supports_private_stack(void) +{ + return true; +} + bool bpf_jit_supports_kfunc_call(void) { return true; @@ -2113,7 +2211,7 @@ bool bpf_jit_supports_subprog_tailcalls(void) } static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, - int args_off, int retval_off, int run_ctx_off, + int bargs_off, int retval_off, int run_ctx_off, bool save_ret) { __le32 *branch; @@ -2155,7 +2253,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); @@ -2180,7 +2278,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, } static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, - int args_off, int retval_off, int run_ctx_off, + int bargs_off, int retval_off, int run_ctx_off, __le32 **branches) { int i; @@ -2190,7 +2288,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, */ emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); for (i = 0; i < tl->nr_links; i++) { - invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off, run_ctx_off, true); /* if (*(u64 *)(sp + retval_off) != 0) * goto do_fexit; @@ -2204,23 +2302,120 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, } } -static void save_args(struct jit_ctx *ctx, int args_off, int nregs) +struct arg_aux { + /* how many args are passed through registers, the rest of the args are + * passed through stack + */ + int args_in_regs; + /* how many registers are used to pass arguments */ + int regs_for_args; + /* how much stack is used for additional args passed to bpf program + * that did not fit in original function registers + */ + int bstack_for_args; + /* home much stack is used for additional args passed to the + * original function when called from trampoline (this one needs + * arguments to be properly aligned) + */ + int ostack_for_args; +}; + +static int calc_arg_aux(const struct btf_func_model *m, + struct arg_aux *a) { - int i; + int stack_slots, nregs, slots, i; + + /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */ + for (i = 0, nregs = 0; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + if (nregs + slots <= 8) /* passed through register ? */ + nregs += slots; + else + break; + } - for (i = 0; i < nregs; i++) { - emit(A64_STR64I(i, A64_SP, args_off), ctx); - args_off += 8; + a->args_in_regs = i; + a->regs_for_args = nregs; + a->ostack_for_args = 0; + a->bstack_for_args = 0; + + /* the rest arguments are passed through stack */ + for (; i < m->nr_args; i++) { + stack_slots = (m->arg_size[i] + 7) / 8; + a->bstack_for_args += stack_slots * 8; + a->ostack_for_args = a->ostack_for_args + stack_slots * 8; } + + return 0; } -static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) +static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes) +{ + if (effective_bytes) { + int garbage_bits = 64 - 8 * effective_bytes; +#ifdef CONFIG_CPU_BIG_ENDIAN + /* garbage bits are at the right end */ + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); +#else + /* garbage bits are at the left end */ + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); +#endif + } +} + +static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off, + const struct btf_func_model *m, + const struct arg_aux *a, + bool for_call_origin) { int i; + int reg; + int doff; + int soff; + int slots; + u8 tmp = bpf2a64[TMP_REG_1]; + + /* store arguments to the stack for the bpf program, or restore + * arguments from stack for the original function + */ + for (reg = 0; reg < a->regs_for_args; reg++) { + emit(for_call_origin ? + A64_LDR64I(reg, A64_SP, bargs_off) : + A64_STR64I(reg, A64_SP, bargs_off), + ctx); + bargs_off += 8; + } + + soff = 32; /* on stack arguments start from FP + 32 */ + doff = (for_call_origin ? oargs_off : bargs_off); + + /* save on stack arguments */ + for (i = a->args_in_regs; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */ + while (slots-- > 0) { + emit(A64_LDR64I(tmp, A64_FP, soff), ctx); + /* if there is unused space in the last slot, clear + * the garbage contained in the space. + */ + if (slots == 0 && !for_call_origin) + clear_garbage(ctx, tmp, m->arg_size[i] % 8); + emit(A64_STR64I(tmp, A64_SP, doff), ctx); + soff += 8; + doff += 8; + } + } +} + +static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs) +{ + int reg; - for (i = 0; i < nregs; i++) { - emit(A64_LDR64I(i, A64_SP, args_off), ctx); - args_off += 8; + for (reg = 0; reg < nregs; reg++) { + emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx); + bargs_off += 8; } } @@ -2243,17 +2438,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *tlinks, void *func_addr, - int nregs, u32 flags) + const struct btf_func_model *m, + const struct arg_aux *a, + u32 flags) { int i; int stack_size; int retaddr_off; int regs_off; int retval_off; - int args_off; - int nregs_off; + int bargs_off; + int nfuncargs_off; int ip_off; int run_ctx_off; + int oargs_off; + int nfuncargs; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -2262,31 +2461,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, bool is_struct_ops = is_struct_ops_tramp(fentry); /* trampoline stack layout: - * [ parent ip ] - * [ FP ] - * SP + retaddr_off [ self ip ] - * [ FP ] + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] * - * [ padding ] align SP to multiples of 16 + * [ padding ] align SP to multiples of 16 * - * [ x20 ] callee saved reg x20 - * SP + regs_off [ x19 ] callee saved reg x19 + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 * - * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or - * BPF_TRAMP_F_RET_FENTRY_RET + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ arg reg N ] + * [ ... ] + * SP + bargs_off [ arg reg 1 ] for bpf * - * [ arg reg N ] - * [ ... ] - * SP + args_off [ arg reg 1 ] + * SP + nfuncargs_off [ arg regs count ] * - * SP + nregs_off [ arg regs count ] + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * - * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * SP + run_ctx_off [ bpf_tramp_run_ctx ] * - * SP + run_ctx_off [ bpf_tramp_run_ctx ] + * [ stack arg N ] + * [ ... ] + * SP + oargs_off [ stack arg 1 ] for original func */ stack_size = 0; + oargs_off = stack_size; + if (flags & BPF_TRAMP_F_CALL_ORIG) + stack_size += a->ostack_for_args; + run_ctx_off = stack_size; /* room for bpf_tramp_run_ctx */ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); @@ -2296,13 +2502,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; - nregs_off = stack_size; + nfuncargs_off = stack_size; /* room for args count */ stack_size += 8; - args_off = stack_size; + bargs_off = stack_size; /* room for args */ - stack_size += nregs * 8; + nfuncargs = a->regs_for_args + a->bstack_for_args / 8; + stack_size += 8 * nfuncargs; /* room for return value */ retval_off = stack_size; @@ -2349,11 +2556,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } /* save arg regs count*/ - emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); - emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); + emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx); - /* save arg regs */ - save_args(ctx, args_off, nregs); + /* save args for bpf */ + save_args(ctx, bargs_off, oargs_off, m, a, false); /* save callee saved registers */ emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -2369,7 +2576,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } for (i = 0; i < fentry->nr_links; i++) - invoke_bpf_prog(ctx, fentry->links[i], args_off, + invoke_bpf_prog(ctx, fentry->links[i], bargs_off, retval_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); @@ -2379,12 +2586,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (!branches) return -ENOMEM; - invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off, run_ctx_off, branches); } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, args_off, nregs); + /* save args for original func */ + save_args(ctx, bargs_off, oargs_off, m, a, true); /* call original func */ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); @@ -2403,7 +2611,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } for (i = 0; i < fexit->nr_links; i++) - invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off, run_ctx_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { @@ -2417,7 +2625,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, args_off, nregs); + restore_args(ctx, bargs_off, a->regs_for_args); /* restore callee saved register x19 and x20 */ emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -2454,21 +2662,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, return ctx->idx; } -static int btf_func_model_nregs(const struct btf_func_model *m) -{ - int nregs = m->nr_args; - int i; - - /* extra registers needed for struct argument */ - for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) - nregs += (m->arg_size[i] + 7) / 8 - 1; - } - - return nregs; -} - int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { @@ -2477,14 +2670,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, .idx = 0, }; struct bpf_tramp_image im; - int nregs, ret; + struct arg_aux aaux; + int ret; - nregs = btf_func_model_nregs(m); - /* the first 8 registers are used for arguments */ - if (nregs > 8) - return -ENOTSUPP; + ret = calc_arg_aux(m, &aaux); + if (ret < 0) + return ret; - ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); + ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags); if (ret < 0) return ret; @@ -2511,9 +2704,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { - int ret, nregs; - void *image, *tmp; u32 size = ro_image_end - ro_image; + struct arg_aux aaux; + void *image, *tmp; + int ret; /* image doesn't need to be in module memory range, so we can * use kvmalloc. @@ -2529,13 +2723,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, .write = true, }; - nregs = btf_func_model_nregs(m); - /* the first 8 registers are used for arguments */ - if (nregs > 8) - return -ENOTSUPP; jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); - ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); + ret = calc_arg_aux(m, &aaux); + if (ret) + goto out; + ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags); if (ret > 0 && validate_code(&ctx) < 0) { ret = -EINVAL; @@ -2811,6 +3004,17 @@ bool bpf_jit_supports_percpu_insn(void) return true; } +bool bpf_jit_bypass_spec_v4(void) +{ + /* In case of arm64, we rely on the firmware mitigation of Speculative + * Store Bypass as controlled via the ssbd kernel parameter. Whenever + * the mitigation is enabled, it works for all of the kernel code with + * no need to provide any additional instructions. Therefore, skip + * inserting nospec insns against Spectre v4. + */ + return true; +} + bool bpf_jit_inlines_helper_call(s32 imm) { switch (imm) { @@ -2828,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog) if (prog->jited) { struct arm64_jit_data *jit_data = prog->aux->jit_data; struct bpf_binary_header *hdr; + void __percpu *priv_stack_ptr; + int priv_stack_alloc_sz; /* * If we fail the final pass of JIT (from jit_subprogs), @@ -2841,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog) } hdr = bpf_jit_binary_pack_hdr(prog); bpf_jit_binary_pack_free(hdr, NULL); + priv_stack_ptr = prog->aux->priv_stack_ptr; + if (priv_stack_ptr) { + priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) + + 2 * PRIV_STACK_GUARD_SZ; + priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog); + free_percpu(prog->aux->priv_stack_ptr); + } WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); } |