diff options
Diffstat (limited to 'arch/arm64/net/bpf_jit_comp.c')
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 2388 |
1 files changed, 2179 insertions, 209 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc79de0c794..da8b89dd2910 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,22 +7,41 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include <linux/arm-smccc.h> +#include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/memory.h> #include <linux/printk.h> #include <linux/slab.h> +#include <asm/asm-extable.h> #include <asm/byteorder.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> +#include <asm/insn.h> +#include <asm/text-patching.h> #include <asm/set_memory.h> #include "bpf_jit.h" #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) -#define TCALL_CNT (MAX_BPF_JIT_REG + 2) +#define TCCNT_PTR (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) +#define ARENA_VM_START (MAX_BPF_JIT_REG + 5) + +#define check_imm(bits, imm) do { \ + if ((((imm) > 0) && ((imm) >> (bits))) || \ + (((imm) < 0) && (~(imm) >> (bits)))) { \ + pr_info("[%2d] imm=%d(0x%x) out of range\n", \ + i, imm, imm); \ + return -EINVAL; \ + } \ +} while (0) +#define check_imm19(imm) check_imm(19, imm) +#define check_imm26(imm) check_imm(26, imm) /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -41,14 +60,16 @@ static const int bpf2a64[] = { [BPF_REG_9] = A64_R(22), /* read-only frame pointer to access stack */ [BPF_REG_FP] = A64_R(25), - /* temporary registers for internal BPF JIT */ + /* temporary registers for BPF JIT */ [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), [TMP_REG_3] = A64_R(12), - /* tail_call_cnt */ - [TCALL_CNT] = A64_R(26), + /* tail_call_cnt_ptr */ + [TCCNT_PTR] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), + /* callee saved register for kern_vm_start address */ + [ARENA_VM_START] = A64_R(28), }; struct jit_ctx { @@ -56,13 +77,30 @@ struct jit_ctx { int idx; int epilogue_offset; int *offset; + int exentry_idx; + int nr_used_callee_reg; + u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */ __le32 *image; + __le32 *ro_image; u32 stack_size; + u64 user_vm_start; + u64 arena_vm_start; + bool fp_used; + bool write; }; +struct bpf_plt { + u32 insn_ldr; /* load target */ + u32 insn_br; /* branch to target */ + u64 target; /* target value */ +}; + +#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) +#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) + static inline void emit(const u32 insn, struct jit_ctx *ctx) { - if (ctx->image != NULL) + if (ctx->image != NULL && ctx->write) ctx->image[ctx->idx] = cpu_to_le32(insn); ctx->idx++; @@ -122,6 +160,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } } +static inline void emit_bti(u32 insn, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(insn, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -141,14 +185,58 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } -static inline int bpf2a64_offset(int bpf_to, int bpf_from, - const struct jit_ctx *ctx) +static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to]; - /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from] - 1; + long offset; - return to - from; + /* when ctx->ro_image is not allocated or the target is unknown, + * emit indirect call + */ + if (!ctx->ro_image || !target) + return true; + + offset = target - (long)&ctx->ro_image[ctx->idx]; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static void emit_direct_call(u64 target, struct jit_ctx *ctx) +{ + u32 insn; + unsigned long pc; + + pc = (unsigned long)&ctx->ro_image[ctx->idx]; + insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK); + emit(insn, ctx); +} + +static void emit_indirect_call(u64 target, struct jit_ctx *ctx) +{ + u8 tmp; + + tmp = bpf2a64[TMP_REG_1]; + emit_addr_mov_i64(tmp, target, ctx); + emit(A64_BLR(tmp), ctx); +} + +static void emit_call(u64 target, struct jit_ctx *ctx) +{ + if (should_emit_indirect_call((long)target, ctx)) + emit_indirect_call(target, ctx); + else + emit_direct_call(target, ctx); +} + +static inline int bpf2a64_offset(int bpf_insn, int off, + const struct jit_ctx *ctx) +{ + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas arm64 branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); } static void jit_fill_hole(void *area, unsigned int size) @@ -159,6 +247,14 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); } +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) + return -EINVAL; + + return 0; +} + static inline int epilogue_offset(const struct jit_ctx *ctx) { int to = ctx->epilogue_offset; @@ -167,21 +263,219 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from; } -/* Stack must be multiples of 16B */ -#define STACK_ALIGN(sz) (((sz) + 15) & ~15) +static bool is_addsub_imm(u32 imm) +{ + /* Either imm12 or shifted imm12. */ + return !(imm & ~0xfff) || !(imm & ~0xfff000); +} + +static inline void emit_a64_add_i(const bool is64, const int dst, const int src, + const int tmp, const s32 imm, struct jit_ctx *ctx) +{ + if (is_addsub_imm(imm)) { + emit(A64_ADD_I(is64, dst, src, imm), ctx); + } else if (is_addsub_imm(-(u32)imm)) { + emit(A64_SUB_I(is64, dst, src, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_ADD(is64, dst, src, tmp), ctx); + } +} + +/* + * There are 3 types of AArch64 LDR/STR (immediate) instruction: + * Post-index, Pre-index, Unsigned offset. + * + * For BPF ldr/str, the "unsigned offset" type is sufficient. + * + * "Unsigned offset" type LDR(immediate) format: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * "Unsigned offset" type STR(immediate) format: + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * The offset is calculated from imm12 and scale in the following way: + * + * offset = (u64)imm12 << scale + */ +static bool is_lsi_offset(int offset, int scale) +{ + if (offset < 0) + return false; + + if (offset > (0xFFF << scale)) + return false; + + if (offset & ((1 << scale) - 1)) + return false; + + return true; +} + +/* generated main prog prologue: + * bti c // if CONFIG_ARM64_BTI_KERNEL + * mov x9, lr + * nop // POKE_OFFSET + * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL + * stp x29, lr, [sp, #-16]! + * mov x29, sp + * stp xzr, x26, [sp, #-16]! + * mov x26, sp + * // PROLOGUE_OFFSET + * // save callee-saved registers + */ +static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx) +{ + const bool is_main_prog = !bpf_is_subprog(ctx->prog); + const u8 ptr = bpf2a64[TCCNT_PTR]; + + if (is_main_prog) { + /* Initialize tail_call_cnt. */ + emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx); + emit(A64_MOV(1, ptr, A64_SP), ctx); + } else + emit(A64_PUSH(ptr, ptr, A64_SP), ctx); +} + +static void find_used_callee_regs(struct jit_ctx *ctx) +{ + int i; + const struct bpf_prog *prog = ctx->prog; + const struct bpf_insn *insn = &prog->insnsi[0]; + int reg_used = 0; + + for (i = 0; i < prog->len; i++, insn++) { + if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6) + reg_used |= 1; + + if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7) + reg_used |= 2; + + if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8) + reg_used |= 4; + + if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9) + reg_used |= 8; + + if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) { + ctx->fp_used = true; + reg_used |= 16; + } + } + + i = 0; + if (reg_used & 1) + ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6]; + + if (reg_used & 2) + ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7]; + + if (reg_used & 4) + ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8]; + + if (reg_used & 8) + ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9]; + + if (reg_used & 16) + ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP]; + + if (ctx->arena_vm_start) + ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START]; + + ctx->nr_used_callee_reg = i; +} + +/* Save callee-saved registers */ +static void push_callee_regs(struct jit_ctx *ctx) +{ + int reg1, reg2, i; + + /* + * Program acting as exception boundary should save all ARM64 + * Callee-saved registers as the exception callback needs to recover + * all ARM64 Callee-saved registers in its epilogue. + */ + if (ctx->prog->aux->exception_boundary) { + emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx); + emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx); + emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); + emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx); + emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx); + } else { + find_used_callee_regs(ctx); + for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) { + reg1 = ctx->used_callee_reg[i]; + reg2 = ctx->used_callee_reg[i + 1]; + emit(A64_PUSH(reg1, reg2, A64_SP), ctx); + } + if (i < ctx->nr_used_callee_reg) { + reg1 = ctx->used_callee_reg[i]; + /* keep SP 16-byte aligned */ + emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx); + } + } +} + +/* Restore callee-saved registers */ +static void pop_callee_regs(struct jit_ctx *ctx) +{ + struct bpf_prog_aux *aux = ctx->prog->aux; + int reg1, reg2, i; + + /* + * Program acting as exception boundary pushes R23 and R24 in addition + * to BPF callee-saved registers. Exception callback uses the boundary + * program's stack frame, so recover these extra registers in the above + * two cases. + */ + if (aux->exception_boundary || aux->exception_cb) { + emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx); + emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx); + emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); + emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx); + emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx); + } else { + i = ctx->nr_used_callee_reg - 1; + if (ctx->nr_used_callee_reg % 2 != 0) { + reg1 = ctx->used_callee_reg[i]; + emit(A64_POP(reg1, A64_ZR, A64_SP), ctx); + i--; + } + while (i > 0) { + reg1 = ctx->used_callee_reg[i - 1]; + reg2 = ctx->used_callee_reg[i]; + emit(A64_POP(reg1, reg2, A64_SP), ctx); + i -= 2; + } + } +} + +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) +#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) + +/* Offset of nop instruction in bpf prog entry to be poked */ +#define POKE_OFFSET (BTI_INSNS + 1) /* Tail call offset to jump into */ -#define PROLOGUE_OFFSET 7 +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; - const u8 r6 = bpf2a64[BPF_REG_6]; - const u8 r7 = bpf2a64[BPF_REG_7]; - const u8 r8 = bpf2a64[BPF_REG_8]; - const u8 r9 = bpf2a64[BPF_REG_9]; + const bool is_main_prog = !bpf_is_subprog(prog); const u8 fp = bpf2a64[BPF_REG_FP]; - const u8 tcc = bpf2a64[TCALL_CNT]; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const int idx0 = ctx->idx; int cur_offset; @@ -208,38 +502,72 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ - /* Save FP and LR registers to stay align with ARM64 AAPCS */ - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); - - /* Save callee-saved registers */ - emit(A64_PUSH(r6, r7, A64_SP), ctx); - emit(A64_PUSH(r8, r9, A64_SP), ctx); - emit(A64_PUSH(fp, tcc, A64_SP), ctx); - - /* Set up BPF prog stack base register */ - emit(A64_MOV(1, fp, A64_SP), ctx); - - if (!ebpf_from_cbpf) { - /* Initialize tail_call_cnt */ - emit(A64_MOVZ(1, tcc, 0, 0), ctx); - - cur_offset = ctx->idx - idx0; - if (cur_offset != PROLOGUE_OFFSET) { - pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", - cur_offset, PROLOGUE_OFFSET); - return -1; + /* bpf function may be invoked by 3 instruction types: + * 1. bl, attached via freplace to bpf prog via short jump + * 2. br, attached via freplace to bpf prog via long jump + * 3. blr, working as a function pointer, used by emit_call. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); + + if (!prog->aux->exception_cb) { + /* Sign lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_PACIASP, ctx); + + /* Save FP and LR registers to stay align with ARM64 AAPCS */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + prepare_bpf_tail_call_cnt(ctx); + + if (!ebpf_from_cbpf && is_main_prog) { + cur_offset = ctx->idx - idx0; + if (cur_offset != PROLOGUE_OFFSET) { + pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", + cur_offset, PROLOGUE_OFFSET); + return -1; + } + /* BTI landing pad for the tail call, done with a BR */ + emit_bti(A64_BTI_J, ctx); } + push_callee_regs(ctx); + } else { + /* + * Exception callback receives FP of Main Program as third + * parameter + */ + emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); + /* + * Main Program already pushed the frame record and the + * callee-saved registers. The exception callback will not push + * anything and re-use the main program's stack. + * + * 12 registers are on the stack + */ + emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx); } - ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth); + if (ctx->fp_used) + /* Set up BPF prog stack base register */ + emit(A64_MOV(1, fp, A64_SP), ctx); + + /* Stack must be multiples of 16B */ + ctx->stack_size = round_up(prog->aux->stack_depth, 16); /* Set up function call stack */ - emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + if (ctx->stack_size) + emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + + if (ctx->arena_vm_start) + emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx); + return 0; } -static int out_offset = -1; /* initialized on the first pass of build_body() */ static int emit_bpf_tail_call(struct jit_ctx *ctx) { /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ @@ -248,11 +576,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const u8 tmp = bpf2a64[TMP_REG_1]; const u8 prg = bpf2a64[TMP_REG_2]; - const u8 tcc = bpf2a64[TCALL_CNT]; - const int idx0 = ctx->idx; -#define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) + const u8 tcc = bpf2a64[TMP_REG_3]; + const u8 ptr = bpf2a64[TCCNT_PTR]; size_t off; + __le32 *branch1 = NULL; + __le32 *branch2 = NULL; + __le32 *branch3 = NULL; /* if (index >= array->map.max_entries) * goto out; @@ -262,15 +591,20 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_LDR32(tmp, r2, tmp), ctx); emit(A64_MOV(0, r3, r3), ctx); emit(A64_CMP(0, r3, tmp), ctx); - emit(A64_B_(A64_COND_CS, jmp_offset), ctx); + branch1 = ctx->image + ctx->idx; + emit(A64_NOP, ctx); - /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + /* + * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT) * goto out; - * tail_call_cnt++; */ emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(A64_LDR64I(tcc, ptr, 0), ctx); emit(A64_CMP(1, tcc, tmp), ctx); - emit(A64_B_(A64_COND_HI, jmp_offset), ctx); + branch2 = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + + /* (*tail_call_cnt_ptr)++; */ emit(A64_ADD_I(1, tcc, tcc, 1), ctx); /* prog = array->ptrs[index]; @@ -282,57 +616,494 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_ADD(1, tmp, r2, tmp), ctx); emit(A64_LSL(1, prg, r3, 3), ctx); emit(A64_LDR64(prg, tmp, prg), ctx); - emit(A64_CBZ(1, prg, jmp_offset), ctx); + branch3 = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + + /* Update tail_call_cnt if the slot is populated. */ + emit(A64_STR64I(tcc, ptr, 0), ctx); + + /* restore SP */ + if (ctx->stack_size) + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + + pop_callee_regs(ctx); /* goto *(prog->bpf_func + prologue_offset); */ off = offsetof(struct bpf_prog, bpf_func); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR64(tmp, prg, tmp), ctx); emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); - emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); emit(A64_BR(tmp), ctx); - /* out: */ - if (out_offset == -1) - out_offset = cur_offset; - if (cur_offset != out_offset) { - pr_err_once("tail_call out_offset = %d, expected %d!\n", - cur_offset, out_offset); - return -1; + if (ctx->image) { + off = &ctx->image[ctx->idx] - branch1; + *branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off)); + + off = &ctx->image[ctx->idx] - branch2; + *branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off)); + + off = &ctx->image[ctx->idx] - branch3; + *branch3 = cpu_to_le32(A64_CBZ(1, prg, off)); } + return 0; -#undef cur_offset -#undef jmp_offset } -static void build_epilogue(struct jit_ctx *ctx) +static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const s32 imm = insn->imm; + const s16 off = insn->off; + const u8 code = insn->code; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + u8 reg; + + switch (imm) { + case BPF_LOAD_ACQ: + reg = src; + break; + case BPF_STORE_REL: + reg = dst; + break; + default: + pr_err_once("unknown atomic load/store op code %02x\n", imm); + return -EINVAL; + } + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } + + switch (imm) { + case BPF_LOAD_ACQ: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_LDARB(dst, reg), ctx); + break; + case BPF_H: + emit(A64_LDARH(dst, reg), ctx); + break; + case BPF_W: + emit(A64_LDAR32(dst, reg), ctx); + break; + case BPF_DW: + emit(A64_LDAR64(dst, reg), ctx); + break; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_STLRB(src, reg), ctx); + break; + case BPF_H: + emit(A64_STLRH(src, reg), ctx); + break; + case BPF_W: + emit(A64_STLR32(src, reg), ctx); + break; + case BPF_DW: + emit(A64_STLR64(src, reg), ctx); + break; + } + break; + default: + pr_err_once("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + +#ifdef CONFIG_ARM64_LSE_ATOMICS +static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 code = insn->code; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + const u8 tmp2 = bpf2a64[TMP_REG_2]; + const bool isdw = BPF_SIZE(code) == BPF_DW; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; + const s16 off = insn->off; + u8 reg = dst; + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } + + switch (insn->imm) { + /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ + case BPF_ADD: + emit(A64_STADD(isdw, reg, src), ctx); + break; + case BPF_AND: + emit(A64_MVN(isdw, tmp2, src), ctx); + emit(A64_STCLR(isdw, reg, tmp2), ctx); + break; + case BPF_OR: + emit(A64_STSET(isdw, reg, src), ctx); + break; + case BPF_XOR: + emit(A64_STEOR(isdw, reg, src), ctx); + break; + /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ + case BPF_ADD | BPF_FETCH: + emit(A64_LDADDAL(isdw, src, reg, src), ctx); + break; + case BPF_AND | BPF_FETCH: + emit(A64_MVN(isdw, tmp2, src), ctx); + emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); + break; + case BPF_OR | BPF_FETCH: + emit(A64_LDSETAL(isdw, src, reg, src), ctx); + break; + case BPF_XOR | BPF_FETCH: + emit(A64_LDEORAL(isdw, src, reg, src), ctx); + break; + /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ + case BPF_XCHG: + emit(A64_SWPAL(isdw, src, reg, src), ctx); + break; + /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ + case BPF_CMPXCHG: + emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); + break; + default: + pr_err_once("unknown atomic op code %02x\n", insn->imm); + return -EINVAL; + } + + return 0; +} +#else +static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + return -EINVAL; +} +#endif + +static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 code = insn->code; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + const u8 tmp2 = bpf2a64[TMP_REG_2]; + const u8 tmp3 = bpf2a64[TMP_REG_3]; + const int i = insn - ctx->prog->insnsi; + const s32 imm = insn->imm; + const s16 off = insn->off; + const bool isdw = BPF_SIZE(code) == BPF_DW; + u8 reg = dst; + s32 jmp_offset; + + if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { + /* ll_sc based atomics don't support unsafe pointers yet. */ + pr_err_once("unknown atomic opcode %02x\n", code); + return -EINVAL; + } + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + + if (imm == BPF_ADD || imm == BPF_AND || + imm == BPF_OR || imm == BPF_XOR) { + /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ + emit(A64_LDXR(isdw, tmp2, reg), ctx); + if (imm == BPF_ADD) + emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); + else if (imm == BPF_AND) + emit(A64_AND(isdw, tmp2, tmp2, src), ctx); + else if (imm == BPF_OR) + emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); + else + emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); + emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); + jmp_offset = -3; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + } else if (imm == (BPF_ADD | BPF_FETCH) || + imm == (BPF_AND | BPF_FETCH) || + imm == (BPF_OR | BPF_FETCH) || + imm == (BPF_XOR | BPF_FETCH)) { + /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ + const u8 ax = bpf2a64[BPF_REG_AX]; + + emit(A64_MOV(isdw, ax, src), ctx); + emit(A64_LDXR(isdw, src, reg), ctx); + if (imm == (BPF_ADD | BPF_FETCH)) + emit(A64_ADD(isdw, tmp2, src, ax), ctx); + else if (imm == (BPF_AND | BPF_FETCH)) + emit(A64_AND(isdw, tmp2, src, ax), ctx); + else if (imm == (BPF_OR | BPF_FETCH)) + emit(A64_ORR(isdw, tmp2, src, ax), ctx); + else + emit(A64_EOR(isdw, tmp2, src, ax), ctx); + emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); + jmp_offset = -3; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + emit(A64_DMB_ISH, ctx); + } else if (imm == BPF_XCHG) { + /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ + emit(A64_MOV(isdw, tmp2, src), ctx); + emit(A64_LDXR(isdw, src, reg), ctx); + emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); + jmp_offset = -2; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + emit(A64_DMB_ISH, ctx); + } else if (imm == BPF_CMPXCHG) { + /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ + const u8 r0 = bpf2a64[BPF_REG_0]; + + emit(A64_MOV(isdw, tmp2, r0), ctx); + emit(A64_LDXR(isdw, r0, reg), ctx); + emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); + jmp_offset = 4; + check_imm19(jmp_offset); + emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); + emit(A64_STLXR(isdw, src, reg, tmp3), ctx); + jmp_offset = -4; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + emit(A64_DMB_ISH, ctx); + } else { + pr_err_once("unknown atomic op code %02x\n", imm); + return -EINVAL; + } + + return 0; +} + +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .global dummy_tramp\n" +" .type dummy_tramp, %function\n" +"dummy_tramp:" +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +" bti j\n" /* dummy_tramp is called via "br x10" */ +#endif +" mov x10, x30\n" +" mov x30, x9\n" +" ret x10\n" +" .size dummy_tramp, .-dummy_tramp\n" +" .popsection\n" +); + +/* build a plt initialized like this: + * + * plt: + * ldr tmp, target + * br tmp + * target: + * .quad dummy_tramp + * + * when a long jump trampoline is attached, target is filled with the + * trampoline address, and when the trampoline is removed, target is + * restored to dummy_tramp address. + */ +static void build_plt(struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + struct bpf_plt *plt = NULL; + + /* make sure target is 64-bit aligned */ + if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) + emit(A64_NOP, ctx); + + plt = (struct bpf_plt *)(ctx->image + ctx->idx); + /* plt is called via bl, no BTI needed here */ + emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); + emit(A64_BR(tmp), ctx); + + if (ctx->image) + plt->target = (u64)&dummy_tramp; +} + +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (capable(CAP_SYS_ADMIN)) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; - const u8 r6 = bpf2a64[BPF_REG_6]; - const u8 r7 = bpf2a64[BPF_REG_7]; - const u8 r8 = bpf2a64[BPF_REG_8]; - const u8 r9 = bpf2a64[BPF_REG_9]; - const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 ptr = bpf2a64[TCCNT_PTR]; /* We're done with BPF stack */ - emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + if (ctx->stack_size) + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + + pop_callee_regs(ctx); - /* Restore fs (x25) and x26 */ - emit(A64_POP(fp, A64_R(26), A64_SP), ctx); + emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); - /* Restore callee-saved register */ - emit(A64_POP(r8, r9, A64_SP), ctx); - emit(A64_POP(r6, r7, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); + /* Authenticate lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_AUTIASP, ctx); + emit(A64_RET(A64_LR), ctx); } +#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) +#define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ + +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + + if (dst_reg != DONT_CLEAR) + regs->regs[dst_reg] = 0; + regs->pc = (unsigned long)&ex->fixup - offset; + return true; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct jit_ctx *ctx, + int dst_reg) +{ + off_t ins_offset; + off_t fixup_offset; + unsigned long pc; + struct exception_table_entry *ex; + + if (!ctx->image) + /* First pass */ + return 0; + + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && + BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32 && + BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) + return 0; + + if (!ctx->prog->aux->extable || + WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->exentry_idx]; + pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; + + /* + * This is the relative offset of the instruction that may fault from + * the exception table itself. This will be written to the exception + * table and if this instruction faults, the destination register will + * be set to '0' and the execution will jump to the next instruction. + */ + ins_offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) + return -ERANGE; + + /* + * Since the extable follows the program, the fixup offset is always + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value + * to keep things simple, and put the destination register in the upper + * bits. We don't need to worry about buildtime or runtime sort + * modifying the upper bits because the table is already sorted, and + * isn't part of the main exception table. + * + * The fixup_offset is set to the next instruction from the instruction + * that may fault. The execution will jump to this after handling the + * fault. + */ + fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) + return -ERANGE; + + /* + * The offsets above have been calculated using the RO buffer but we + * need to use the R/W buffer for writes. + * switch ex to rw buffer for writing. + */ + ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); + + ex->insn = ins_offset; + + if (BPF_CLASS(insn->code) != BPF_LDX) + dst_reg = DONT_CLEAR; + + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + ex->type = EX_TYPE_BPF; + + ctx->exentry_idx++; + return 0; +} + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -343,36 +1114,62 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) { const u8 code = insn->code; - const u8 dst = bpf2a64[insn->dst_reg]; - const u8 src = bpf2a64[insn->src_reg]; + u8 dst = bpf2a64[insn->dst_reg]; + u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; - const u8 tmp3 = bpf2a64[TMP_REG_3]; + const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_JMP; - const bool isdw = BPF_SIZE(code) == BPF_DW; - u8 jmp_cond, reg; + u8 jmp_cond; s32 jmp_offset; - -#define check_imm(bits, imm) do { \ - if ((((imm) > 0) && ((imm) >> (bits))) || \ - (((imm) < 0) && (~(imm) >> (bits)))) { \ - pr_info("[%2d] imm=%d(0x%x) out of range\n", \ - i, imm, imm); \ - return -EINVAL; \ - } \ -} while (0) -#define check_imm19(imm) check_imm(19, imm) -#define check_imm26(imm) check_imm(26, imm) + u32 a64_insn; + u8 src_adj; + u8 dst_adj; + int off_adj; + int ret; + bool sign_extend; switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: - emit(A64_MOV(is64, dst, src), ctx); + if (insn_is_cast_user(insn)) { + emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits + emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); + emit(A64_LSL(1, dst, dst, 32), ctx); + emit(A64_CBZ(1, tmp, 2), ctx); + emit(A64_ORR(1, tmp, dst, tmp), ctx); + emit(A64_MOV(1, dst, tmp), ctx); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (dst != src) + emit(A64_MOV(1, dst, src), ctx); + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + emit(A64_MRS_TPIDR_EL2(tmp), ctx); + else + emit(A64_MRS_TPIDR_EL1(tmp), ctx); + emit(A64_ADD(1, dst, dst, tmp), ctx); + break; + } + switch (insn->off) { + case 0: + emit(A64_MOV(is64, dst, src), ctx); + break; + case 8: + emit(A64_SXTB(is64, dst, src), ctx); + break; + case 16: + emit(A64_SXTH(is64, dst, src), ctx); + break; + case 32: + emit(A64_SXTW(is64, dst, src), ctx); + break; + } break; /* dst = dst OP src */ case BPF_ALU | BPF_ADD | BPF_X: @@ -401,17 +1198,18 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: + if (!off) + emit(A64_UDIV(is64, dst, dst, src), ctx); + else + emit(A64_SDIV(is64, dst, dst, src), ctx); + break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - switch (BPF_OP(code)) { - case BPF_DIV: - emit(A64_UDIV(is64, dst, dst, src), ctx); - break; - case BPF_MOD: + if (!off) emit(A64_UDIV(is64, tmp, dst, src), ctx); - emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); - break; - } + else + emit(A64_SDIV(is64, tmp, dst, src), ctx); + emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: @@ -433,11 +1231,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: #ifdef CONFIG_CPU_BIG_ENDIAN - if (BPF_SRC(code) == BPF_FROM_BE) + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; #endif switch (imm) { @@ -447,7 +1246,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: - emit(A64_REV32(is64, dst, dst), ctx); + emit(A64_REV32(0, dst, dst), ctx); /* upper 32 bits already cleared */ break; case 64: @@ -478,28 +1277,48 @@ emit_bswap_uxt: /* dst = dst OP imm */ case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_ADD(is64, dst, dst, tmp), ctx); + emit_a64_add_i(is64, dst, dst, tmp, imm, ctx); break; case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_SUB_I(is64, dst, dst, imm), ctx); + } else if (is_addsub_imm(-(u32)imm)) { + emit(A64_ADD_I(is64, dst, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_AND(is64, dst, dst, tmp), ctx); + a64_insn = A64_AND_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_AND(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_ORR(is64, dst, dst, tmp), ctx); + a64_insn = A64_ORR_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_ORR(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_EOR(is64, dst, dst, tmp), ctx); + a64_insn = A64_EOR_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_EOR(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: @@ -509,12 +1328,18 @@ emit_bswap_uxt: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_UDIV(is64, dst, dst, tmp), ctx); + if (!off) + emit(A64_UDIV(is64, dst, dst, tmp), ctx); + else + emit(A64_SDIV(is64, dst, dst, tmp), ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: emit_a64_mov_i(is64, tmp2, imm, ctx); - emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); + if (!off) + emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); + else + emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); break; case BPF_ALU | BPF_LSH | BPF_K: @@ -532,7 +1357,11 @@ emit_bswap_uxt: /* JUMP off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + jmp_offset = bpf2a64_offset(i, off, ctx); + else + jmp_offset = bpf2a64_offset(i, imm, ctx); check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); break; @@ -559,7 +1388,7 @@ emit_bswap_uxt: case BPF_JMP32 | BPF_JSLE | BPF_X: emit(A64_CMP(is64, dst, src), ctx); emit_cond_jmp: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm19(jmp_offset); switch (BPF_OP(code)) { case BPF_JEQ: @@ -623,13 +1452,24 @@ emit_cond_jmp: case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_CMP(is64, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_CMP_I(is64, dst, imm), ctx); + } else if (is_addsub_imm(-(u32)imm)) { + emit(A64_CMN_I(is64, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_CMP(is64, dst, tmp), ctx); + } goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_TST(is64, dst, tmp), ctx); + a64_insn = A64_TST_I(is64, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_TST(is64, dst, tmp), ctx); + } goto emit_cond_jmp; /* function call */ case BPF_JMP | BPF_CALL: @@ -637,14 +1477,34 @@ emit_cond_jmp: const u8 r0 = bpf2a64[BPF_REG_0]; bool func_addr_fixed; u64 func_addr; - int ret; + u32 cpu_offset; + + /* Implement helper call to bpf_get_smp_processor_id() inline */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + cpu_offset = offsetof(struct thread_info, cpu); + + emit(A64_MRS_SP_EL0(tmp), ctx); + if (is_lsi_offset(cpu_offset, 2)) { + emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); + } else { + emit_a64_mov_i(1, tmp2, cpu_offset, ctx); + emit(A64_LDR32(r0, tmp, tmp2), ctx); + } + break; + } + + /* Implement helper call to bpf_get_current_task/_btf() inline */ + if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task || + insn->imm == BPF_FUNC_get_current_task_btf)) { + emit(A64_MRS_SP_EL0(r0), ctx); + break; + } ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) return ret; - emit_addr_mov_i64(tmp, func_addr, ctx); - emit(A64_BLR(tmp), ctx); + emit_call(func_addr, ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; } @@ -671,31 +1531,116 @@ emit_cond_jmp: u64 imm64; imm64 = (u64)insn1.imm << 32 | (u32)imm; - emit_a64_mov_i64(dst, imm64, ctx); + if (bpf_pseudo_func(insn)) + emit_addr_mov_i64(dst, imm64, ctx); + else + emit_a64_mov_i64(dst, imm64, ctx); return 1; } - /* LDX: dst = *(size *)(src + off) */ + /* LDX: dst = (u64)*(unsigned size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); + src = tmp2; + } + if (src == fp) { + src_adj = A64_SP; + off_adj = off + ctx->stack_size; + } else { + src_adj = src; + off_adj = off; + } + sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_LDR32(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 2)) { + if (sign_extend) + emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + if (sign_extend) + emit(A64_LDRSW(dst, src, tmp), ctx); + else + emit(A64_LDR32(dst, src, tmp), ctx); + } break; case BPF_H: - emit(A64_LDRH(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 1)) { + if (sign_extend) + emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + if (sign_extend) + emit(A64_LDRSH(dst, src, tmp), ctx); + else + emit(A64_LDRH(dst, src, tmp), ctx); + } break; case BPF_B: - emit(A64_LDRB(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 0)) { + if (sign_extend) + emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + if (sign_extend) + emit(A64_LDRSB(dst, src, tmp), ctx); + else + emit(A64_LDRB(dst, src, tmp), ctx); + } break; case BPF_DW: - emit(A64_LDR64(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_LDR64I(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR64(dst, src, tmp), ctx); + } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; + break; + + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ break; /* ST: *(size *)(dst + off) = imm */ @@ -703,23 +1648,61 @@ emit_cond_jmp: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); + dst = tmp2; + } + if (dst == fp) { + dst_adj = A64_SP; + off_adj = off + ctx->stack_size; + } else { + dst_adj = dst; + off_adj = off; + } /* Load imm to a register then store it */ - emit_a64_mov_i(1, tmp2, off, ctx); emit_a64_mov_i(1, tmp, imm, ctx); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR32(tmp, dst, tmp2), ctx); + } break; case BPF_H: - emit(A64_STRH(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRH(tmp, dst, tmp2), ctx); + } break; case BPF_B: - emit(A64_STRB(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRB(tmp, dst, tmp2), ctx); + } break; case BPF_DW: - emit(A64_STR64(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR64(tmp, dst, tmp2), ctx); + } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* STX: *(size *)(dst + off) = src */ @@ -727,44 +1710,81 @@ emit_cond_jmp: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); + dst = tmp2; + } + if (dst == fp) { + dst_adj = A64_SP; + off_adj = off + ctx->stack_size; + } else { + dst_adj = dst; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR32(src, dst, tmp), ctx); + } break; case BPF_H: - emit(A64_STRH(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRH(src, dst, tmp), ctx); + } break; case BPF_B: - emit(A64_STRB(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRB(src, dst, tmp), ctx); + } break; case BPF_DW: - emit(A64_STR64(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR64(src, dst, tmp), ctx); + } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: - /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: - if (!off) { - reg = dst; - } else { - emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_ADD(1, tmp, tmp, dst), ctx); - reg = tmp; - } - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) { - emit(A64_STADD(isdw, reg, src), ctx); - } else { - emit(A64_LDXR(isdw, tmp2, reg), ctx); - emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); - emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); - jmp_offset = -3; - check_imm19(jmp_offset); - emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); - } + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + ret = emit_atomic_ld_st(insn, ctx); + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + ret = emit_lse_atomic(insn, ctx); + else + ret = emit_ll_sc_atomic(insn, ctx); + if (ret) + return ret; + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; default: @@ -780,22 +1800,35 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) const struct bpf_prog *prog = ctx->prog; int i; + /* + * - offset[0] offset of the end of prologue, + * start of the 1st instruction. + * - offset[1] - offset of the end of 1st instruction, + * start of the 2nd instruction + * [....] + * - offset[3] - offset of the end of 3rd instruction, + * start of 4th instruction + */ for (i = 0; i < prog->len; i++) { const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + ctx->offset[i] = ctx->idx; ret = build_insn(insn, ctx, extra_pass); if (ret > 0) { i++; - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; + ctx->offset[i] = ctx->idx; continue; } - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; if (ret) return ret; } + /* + * offset is allocated with prog->len + 1 so fill in + * the last element with the offset after the last + * instruction (end of program) + */ + ctx->offset[i] = ctx->idx; return 0; } @@ -810,6 +1843,16 @@ static int validate_code(struct jit_ctx *ctx) if (a64_insn == AARCH64_BREAK_FAULT) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; + + if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) + return -1; return 0; } @@ -821,21 +1864,26 @@ static inline void bpf_flush_icache(void *start, void *end) struct arm64_jit_data { struct bpf_binary_header *header; - u8 *image; + u8 *ro_image; + struct bpf_binary_header *ro_header; struct jit_ctx ctx; }; struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { + int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; + struct bpf_binary_header *ro_header; struct arm64_jit_data *jit_data; bool was_classic = bpf_prog_was_classic(prog); bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - int image_size; u8 *image_ptr; + u8 *ro_image_ptr; + int body_idx; + int exentry_idx; if (!prog->jit_requested) return orig_prog; @@ -862,98 +1910,167 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } if (jit_data->ctx.offset) { ctx = jit_data->ctx; - image_ptr = jit_data->image; + ro_image_ptr = jit_data->ro_image; + ro_header = jit_data->ro_header; header = jit_data->header; + image_ptr = (void *)header + ((void *)ro_image_ptr + - (void *)ro_header); extra_pass = true; - image_size = sizeof(u32) * ctx.idx; + prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; } - /* 1. Initial fake pass to compute ctx->idx. */ + ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); + ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); - /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx, extra_pass)) { + /* Pass 1: Estimate the maximum image size. + * + * BPF line info needs ctx->offset[i] to be the offset of + * instruction[i] in jited image, so build prologue first. + */ + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } - if (build_prologue(&ctx, was_classic)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); - - /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { + build_epilogue(&ctx, was_classic); + build_plt(&ctx); + + extable_align = __alignof__(struct exception_table_entry); + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + + /* Now we know the maximum image size. */ + prog_size = sizeof(u32) * ctx.idx; + /* also allocate space for plt target */ + extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); + image_size = extable_offset + extable_size; + ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, + sizeof(u32), &header, &image_ptr, + jit_fill_hole); + if (!ro_header) { prog = orig_prog; goto out_off; } - /* 2. Now, the actual pass. */ + /* Pass 2: Determine jited position and result for each instruction */ + /* + * Use the image(RW) for writing the JITed instructions. But also save + * the ro_image(RX) for calculating the offsets in the image. The RW + * image will be later copied to the RX image from where the program + * will run. The bpf_jit_binary_pack_finalize() will do this copy in the + * final step. + */ ctx.image = (__le32 *)image_ptr; + ctx.ro_image = (__le32 *)ro_image_ptr; + if (extable_size) + prog->aux->extable = (void *)ro_image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; + ctx.exentry_idx = 0; + ctx.write = true; build_prologue(&ctx, was_classic); + /* Record exentry_idx and body_idx before first build_body */ + exentry_idx = ctx.exentry_idx; + body_idx = ctx.idx; + /* Dont write body instructions to memory for now */ + ctx.write = false; + if (build_body(&ctx, extra_pass)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_off; + goto out_free_hdr; } - build_epilogue(&ctx); + ctx.epilogue_offset = ctx.idx; + ctx.exentry_idx = exentry_idx; + ctx.idx = body_idx; + ctx.write = true; - /* 3. Extra pass to validate JITed code. */ - if (validate_code(&ctx)) { - bpf_jit_binary_free(header); + /* Pass 3: Adjust jump offset and write final image */ + if (build_body(&ctx, extra_pass) || + WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) { prog = orig_prog; - goto out_off; + goto out_free_hdr; } + build_epilogue(&ctx, was_classic); + build_plt(&ctx); + + /* Extra pass to validate JITed code. */ + if (validate_ctx(&ctx)) { + prog = orig_prog; + goto out_free_hdr; + } + + /* update the real prog size */ + prog_size = sizeof(u32) * ctx.idx; + /* And we're done. */ if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx.image); - - bpf_flush_icache(header, ctx.image + ctx.idx); + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); if (!prog->is_func || extra_pass) { - if (extra_pass && ctx.idx != jit_data->ctx.idx) { - pr_err_once("multi-func JIT bug %d != %d\n", + /* The jited image may shrink since the jited result for + * BPF_CALL to subprog may be changed from indirect call + * to direct call. + */ + if (extra_pass && ctx.idx > jit_data->ctx.idx) { + pr_err_once("multi-func JIT bug %d > %d\n", ctx.idx, jit_data->ctx.idx); - bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; + prog->jited_len = 0; + goto out_free_hdr; + } + if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { + /* ro_header has been freed */ + ro_header = NULL; + prog = orig_prog; goto out_off; } - bpf_jit_binary_lock_ro(header); + /* + * The instructions have now been copied to the ROX region from + * where they will execute. Now the data cache has to be cleaned to + * the PoU and the I-cache has to be invalidated for the VAs. + */ + bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); } else { jit_data->ctx = ctx; - jit_data->image = image_ptr; + jit_data->ro_image = ro_image_ptr; jit_data->header = header; + jit_data->ro_header = ro_header; } - prog->bpf_func = (void *)ctx.image; + + prog->bpf_func = (void *)ctx.ro_image; prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; if (!prog->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(prog, ctx.offset); + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= AARCH64_INSN_SIZE; + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: - kfree(ctx.offset); + kvfree(ctx.offset); kfree(jit_data); prog->aux->jit_data = NULL; } @@ -962,17 +2079,870 @@ out: bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); return prog; + +out_free_hdr: + if (header) { + bpf_arch_text_copy(&ro_header->size, &header->size, + sizeof(header->size)); + bpf_jit_binary_pack_free(ro_header, header); + } + goto out_off; } -void *bpf_jit_alloc_exec(unsigned long size) +bool bpf_jit_supports_kfunc_call(void) { - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + return true; } -void bpf_jit_free_exec(void *addr) +void *bpf_arch_text_copy(void *dst, void *src, size_t len) { - return vfree(addr); + if (!aarch64_insn_copy(dst, src, len)) + return ERR_PTR(-EINVAL); + return dst; +} + +u64 bpf_jit_alloc_exec_limit(void) +{ + return VMALLOC_END - VMALLOC_START; +} + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} + +static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int bargs_off, int retval_off, int run_ctx_off, + bool save_ret) +{ + __le32 *branch; + u64 enter_prog; + u64 exit_prog; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + enter_prog = (u64)bpf_trampoline_enter(p); + exit_prog = (u64)bpf_trampoline_exit(p); + + if (l->cookie == 0) { + /* if cookie is zero, one instruction is enough to store it */ + emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); + } else { + emit_a64_mov_i64(A64_R(10), l->cookie, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), + ctx); + } + + /* save p to callee saved register x19 to avoid loading p with mov_i64 + * each time. + */ + emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: &run_ctx */ + emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); + + emit_call(enter_prog, ctx); + + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + + /* if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + */ + branch = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + + emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx); + if (!p->jited) + emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); + + emit_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + + if (ctx->image) { + int offset = &ctx->image[ctx->idx] - branch; + *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); + } + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: start time */ + emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); + /* arg3: &run_ctx */ + emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); + + emit_call(exit_prog, ctx); +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int bargs_off, int retval_off, int run_ctx_off, + __le32 **branches) +{ + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); + for (i = 0; i < tl->nr_links; i++) { + invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off, + run_ctx_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); + /* Save the location of branch, and generate a nop. + * This nop will be replaced with a cbnz later. + */ + branches[i] = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } +} + +struct arg_aux { + /* how many args are passed through registers, the rest of the args are + * passed through stack + */ + int args_in_regs; + /* how many registers are used to pass arguments */ + int regs_for_args; + /* how much stack is used for additional args passed to bpf program + * that did not fit in original function registers + */ + int bstack_for_args; + /* home much stack is used for additional args passed to the + * original function when called from trampoline (this one needs + * arguments to be properly aligned) + */ + int ostack_for_args; +}; + +static int calc_arg_aux(const struct btf_func_model *m, + struct arg_aux *a) +{ + int stack_slots, nregs, slots, i; + + /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */ + for (i = 0, nregs = 0; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + if (nregs + slots <= 8) /* passed through register ? */ + nregs += slots; + else + break; + } + + a->args_in_regs = i; + a->regs_for_args = nregs; + a->ostack_for_args = 0; + a->bstack_for_args = 0; + + /* the rest arguments are passed through stack */ + for (; i < m->nr_args; i++) { + /* We can not know for sure about exact alignment needs for + * struct passed on stack, so deny those + */ + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + return -ENOTSUPP; + stack_slots = (m->arg_size[i] + 7) / 8; + a->bstack_for_args += stack_slots * 8; + a->ostack_for_args = a->ostack_for_args + stack_slots * 8; + } + + return 0; +} + +static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes) +{ + if (effective_bytes) { + int garbage_bits = 64 - 8 * effective_bytes; +#ifdef CONFIG_CPU_BIG_ENDIAN + /* garbage bits are at the right end */ + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); +#else + /* garbage bits are at the left end */ + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); +#endif + } +} + +static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off, + const struct btf_func_model *m, + const struct arg_aux *a, + bool for_call_origin) +{ + int i; + int reg; + int doff; + int soff; + int slots; + u8 tmp = bpf2a64[TMP_REG_1]; + + /* store arguments to the stack for the bpf program, or restore + * arguments from stack for the original function + */ + for (reg = 0; reg < a->regs_for_args; reg++) { + emit(for_call_origin ? + A64_LDR64I(reg, A64_SP, bargs_off) : + A64_STR64I(reg, A64_SP, bargs_off), + ctx); + bargs_off += 8; + } + + soff = 32; /* on stack arguments start from FP + 32 */ + doff = (for_call_origin ? oargs_off : bargs_off); + + /* save on stack arguments */ + for (i = a->args_in_regs; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */ + while (slots-- > 0) { + emit(A64_LDR64I(tmp, A64_FP, soff), ctx); + /* if there is unused space in the last slot, clear + * the garbage contained in the space. + */ + if (slots == 0 && !for_call_origin) + clear_garbage(ctx, tmp, m->arg_size[i] % 8); + emit(A64_STR64I(tmp, A64_SP, doff), ctx); + soff += 8; + doff += 8; + } + } +} + +static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs) +{ + int reg; + + for (reg = 0; reg < nregs; reg++) { + emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx); + bargs_off += 8; + } +} + +static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) +{ + return fentry_links->nr_links == 1 && + fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS; +} + +/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). + * + * bpf prog and function entry before bpf trampoline hooked: + * mov x9, lr + * nop + * + * bpf prog and function entry after bpf trampoline hooked: + * mov x9, lr + * bl <bpf_trampoline or plt> + * + */ +static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_links *tlinks, void *func_addr, + const struct btf_func_model *m, + const struct arg_aux *a, + u32 flags) +{ + int i; + int stack_size; + int retaddr_off; + int regs_off; + int retval_off; + int bargs_off; + int nfuncargs_off; + int ip_off; + int run_ctx_off; + int oargs_off; + int nfuncargs; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + __le32 **branches = NULL; + bool is_struct_ops = is_struct_ops_tramp(fentry); + + /* trampoline stack layout: + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] + * + * [ padding ] align SP to multiples of 16 + * + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 + * + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ arg reg N ] + * [ ... ] + * SP + bargs_off [ arg reg 1 ] for bpf + * + * SP + nfuncargs_off [ arg regs count ] + * + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * + * SP + run_ctx_off [ bpf_tramp_run_ctx ] + * + * [ stack arg N ] + * [ ... ] + * SP + oargs_off [ stack arg 1 ] for original func + */ + + stack_size = 0; + oargs_off = stack_size; + if (flags & BPF_TRAMP_F_CALL_ORIG) + stack_size += a->ostack_for_args; + + run_ctx_off = stack_size; + /* room for bpf_tramp_run_ctx */ + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + + ip_off = stack_size; + /* room for IP address argument */ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; + + nfuncargs_off = stack_size; + /* room for args count */ + stack_size += 8; + + bargs_off = stack_size; + /* room for args */ + nfuncargs = a->regs_for_args + a->bstack_for_args / 8; + stack_size += 8 * nfuncargs; + + /* room for return value */ + retval_off = stack_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + + /* room for callee saved registers, currently x19 and x20 are used */ + regs_off = stack_size; + stack_size += 16; + + /* round up to multiples of 16 to avoid SPAlignmentFault */ + stack_size = round_up(stack_size, 16); + + /* return address locates above FP */ + retaddr_off = stack_size + 8; + + /* bpf trampoline may be invoked by 3 instruction types: + * 1. bl, attached to bpf prog or kernel function via short jump + * 2. br, attached to bpf prog or kernel function via long jump + * 3. blr, working as a function pointer, used by struct_ops. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + /* x9 is not set for struct_ops */ + if (!is_struct_ops) { + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + } + + /* frame for patched function for tracing, or caller for struct_ops */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* allocate stack space */ + emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + + if (flags & BPF_TRAMP_F_IP_ARG) { + /* save ip address of the traced function */ + emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); + } + + /* save arg regs count*/ + emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx); + + /* save args for bpf */ + save_args(ctx, bargs_off, oargs_off, m, a, false); + + /* save callee saved registers */ + emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* for the first pass, assume the worst case */ + if (!ctx->image) + ctx->idx += 4; + else + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_links; i++) + invoke_bpf_prog(ctx, fentry->links[i], bargs_off, + retval_off, run_ctx_off, + flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off, + run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* save args for original func */ + save_args(ctx, bargs_off, oargs_off, m, a, true); + /* call original func */ + emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); + emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); + emit(A64_RET(A64_R(10)), ctx); + /* store return value */ + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->ro_image + ctx->idx; + emit(A64_NOP, ctx); + } + + /* update the branches saved in invoke_bpf_mod_ret with cbnz */ + for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { + int offset = &ctx->image[ctx->idx] - branches[i]; + *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); + } + + for (i = 0; i < fexit->nr_links; i++) + invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off, + run_ctx_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->ro_image + ctx->idx; + /* for the first pass, assume the worst case */ + if (!ctx->image) + ctx->idx += 4; + else + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, bargs_off, a->regs_for_args); + + /* restore callee saved register x19 and x20 */ + emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (save_ret) + emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); + + /* reset SP */ + emit(A64_MOV(1, A64_SP, A64_FP), ctx); + + if (is_struct_ops) { + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_RET(A64_LR), ctx); + } else { + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } + } + + kfree(branches); + + return ctx->idx; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + struct bpf_tramp_image im; + struct arg_aux aaux; + int ret; + + ret = calc_arg_aux(m, &aaux); + if (ret < 0) + return ret; + + ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags); + if (ret < 0) + return ret; + + return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; +} + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, jit_fill_hole); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +int arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + return 0; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, + void *ro_image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *func_addr) +{ + u32 size = ro_image_end - ro_image; + struct arg_aux aaux; + void *image, *tmp; + int ret; + + /* image doesn't need to be in module memory range, so we can + * use kvmalloc. + */ + image = kvmalloc(size, GFP_KERNEL); + if (!image) + return -ENOMEM; + + struct jit_ctx ctx = { + .image = image, + .ro_image = ro_image, + .idx = 0, + .write = true, + }; + + + jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); + ret = calc_arg_aux(m, &aaux); + if (ret) + goto out; + ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags); + + if (ret > 0 && validate_code(&ctx) < 0) { + ret = -EINVAL; + goto out; + } + + if (ret > 0) + ret *= AARCH64_INSN_SIZE; + + tmp = bpf_arch_text_copy(ro_image, image, size); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto out; + } + + bpf_flush_icache(ro_image, ro_image + size); +out: + kvfree(image); + return ret; +} + +static bool is_long_jump(void *ip, void *target) +{ + long offset; + + /* NULL target means this is a NOP */ + if (!target) + return false; + + offset = (long)target - (long)ip; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, void *plt, u32 *insn) +{ + void *target; + + if (!addr) { + *insn = aarch64_insn_gen_nop(); + return 0; + } + + if (is_long_jump(ip, addr)) + target = plt; + else + target = addr; + + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)target, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf + * trampoline with the branch instruction from @ip to @new_addr. If @old_addr + * or @new_addr is NULL, the old or new instruction is NOP. + * + * When @ip is the bpf prog entry, a bpf trampoline is being attached or + * detached. Since bpf trampoline and bpf prog are allocated separately with + * vmalloc, the address distance may exceed 128MB, the maximum branch range. + * So long jump should be handled. + * + * When a bpf prog is constructed, a plt pointing to empty trampoline + * dummy_tramp is placed at the end: + * + * bpf_prog: + * mov x9, lr + * nop // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * This is also the state when no trampoline is attached. + * + * When a short-jump bpf trampoline is attached, the patchsite is patched + * to a bl instruction to the trampoline directly: + * + * bpf_prog: + * mov x9, lr + * bl <short-jump bpf trampoline address> // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * When a long-jump bpf trampoline is attached, the plt target is filled with + * the trampoline address and the patchsite is patched to a bl instruction to + * the plt: + * + * bpf_prog: + * mov x9, lr + * bl plt // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad <long-jump bpf trampoline address> // plt target + * + * The dummy_tramp is used to prevent another CPU from jumping to unknown + * locations during the patching process, making the patching process easier. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + struct bpf_plt *plt = NULL; + unsigned long size = 0UL; + unsigned long offset = ~0UL; + enum aarch64_insn_branch_type branch_type; + char namebuf[KSYM_NAME_LEN]; + void *image = NULL; + u64 plt_target = 0ULL; + bool poking_bpf_entry; + + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + /* Only poking bpf text is supported. Since kernel function + * entry is set up by ftrace, we reply on ftrace to poke kernel + * functions. + */ + return -ENOTSUPP; + + image = ip - offset; + /* zero offset means we're poking bpf prog entry */ + poking_bpf_entry = (offset == 0UL); + + /* bpf prog entry, find plt and the real patchsite */ + if (poking_bpf_entry) { + /* plt locates at the end of bpf prog */ + plt = image + size - PLT_TARGET_OFFSET; + + /* skip to the nop instruction in bpf prog entry: + * bti c // if BTI enabled + * mov x9, x30 + * nop + */ + ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; + } + + /* long jump is only possible at bpf prog entry */ + if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && + !poking_bpf_entry)) + return -EINVAL; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) + return -EFAULT; + + if (is_long_jump(ip, new_addr)) + plt_target = (u64)new_addr; + else if (is_long_jump(ip, old_addr)) + /* if the old target is a long jump and the new target is not, + * restore the plt target to dummy_tramp, so there is always a + * legal and harmless address stored in plt target, and we'll + * never jump from plt to an unknown place. + */ + plt_target = (u64)&dummy_tramp; + + if (plt_target) { + /* non-zero plt_target indicates we're patching a bpf prog, + * which is read only. + */ + if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) + return -EFAULT; + WRITE_ONCE(plt->target, plt_target); + set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); + /* since plt target points to either the new trampoline + * or dummy_tramp, even if another CPU reads the old plt + * target value before fetching the bl instruction to plt, + * it will be brought back by dummy_tramp, so no barrier is + * required here. + */ + } + + /* if the old target and the new target are both long jumps, no + * patching is required + */ + if (old_insn == new_insn) + return 0; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + /* We call aarch64_insn_patch_text_nosync() to replace instruction + * atomically, so no other CPUs will fetch a half-new and half-old + * instruction. But there is chance that another CPU executes the + * old instruction after the patching operation finishes (e.g., + * pipeline not flushed, or icache not synchronized yet). + * + * 1. when a new trampoline is attached, it is not a problem for + * different CPUs to jump to different trampolines temporarily. + * + * 2. when an old trampoline is freed, we should wait for all other + * CPUs to exit the trampoline and make sure the trampoline is no + * longer reachable, since bpf_tramp_image_put() function already + * uses percpu_ref and task-based rcu to do the sync, no need to call + * the sync version here, see bpf_tramp_image_put() for details. + */ + ret = aarch64_insn_patch_text_nosync(ip, new_insn); +out: + mutex_unlock(&text_mutex); + + return ret; +} + +bool bpf_jit_supports_ptr_xchg(void) +{ + return true; +} + +bool bpf_jit_supports_exceptions(void) +{ + /* We unwind through both kernel frames starting from within bpf_throw + * call and BPF frames. Therefore we require FP unwinder to be enabled + * to walk kernel frames and reach BPF frames in the stack trace. + * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y + */ + return true; +} + +bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (!bpf_atomic_is_load_store(insn) && + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + return false; + } + return true; +} + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + case BPF_FUNC_get_current_task: + case BPF_FUNC_get_current_task_btf: + return true; + default: + return false; + } +} + +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct arm64_jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, + sizeof(jit_data->header->size)); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); } |