diff options
Diffstat (limited to 'arch/x86/net/bpf_jit_comp.c')
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 741 |
1 files changed, 645 insertions, 96 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 919f647c740f..a43fc5af973d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -64,6 +64,56 @@ static bool is_imm8(int value) return value <= 127 && value >= -128; } +/* + * Let us limit the positive offset to be <= 123. + * This is to ensure eventual jit convergence For the following patterns: + * ... + * pass4, final_proglen=4391: + * ... + * 20e: 48 85 ff test rdi,rdi + * 211: 74 7d je 0x290 + * 213: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] + * ... + * 289: 48 85 ff test rdi,rdi + * 28c: 74 17 je 0x2a5 + * 28e: e9 7f ff ff ff jmp 0x212 + * 293: bf 03 00 00 00 mov edi,0x3 + * Note that insn at 0x211 is 2-byte cond jump insn for offset 0x7d (-125) + * and insn at 0x28e is 5-byte jmp insn with offset -129. + * + * pass5, final_proglen=4392: + * ... + * 20e: 48 85 ff test rdi,rdi + * 211: 0f 84 80 00 00 00 je 0x297 + * 217: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] + * ... + * 28d: 48 85 ff test rdi,rdi + * 290: 74 1a je 0x2ac + * 292: eb 84 jmp 0x218 + * 294: bf 03 00 00 00 mov edi,0x3 + * Note that insn at 0x211 is 6-byte cond jump insn now since its offset + * becomes 0x80 based on previous round (0x293 - 0x213 = 0x80). + * At the same time, insn at 0x292 is a 2-byte insn since its offset is + * -124. + * + * pass6 will repeat the same code as in pass4 and this will prevent + * eventual convergence. + * + * To fix this issue, we need to break je (2->6 bytes) <-> jmp (5->2 bytes) + * cycle in the above. In the above example je offset <= 0x7c should work. + * + * For other cases, je <-> je needs offset <= 0x7b to avoid no convergence + * issue. For jmp <-> je and jmp <-> jmp cases, jmp offset <= 0x7c should + * avoid no convergence issue. + * + * Overall, let us limit the positive offset for 8bit cond/uncond jmp insn + * to maximum 123 (0x7b). This way, the jit pass can eventually converge. + */ +static bool is_imm8_jmp_offset(int value) +{ + return value <= 123 && value >= -128; +} + static bool is_simm32(s64 value) { return value == (s64)(s32)value; @@ -113,6 +163,7 @@ static int bpf_size_to_x86_bytes(int bpf_size) /* Pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_JIT_REG + 1) #define X86_REG_R9 (MAX_BPF_JIT_REG + 2) +#define X86_REG_R12 (MAX_BPF_JIT_REG + 3) /* * The following table maps BPF registers to x86-64 registers. @@ -139,6 +190,7 @@ static const int reg2hex[] = { [BPF_REG_AX] = 2, /* R10 temp register */ [AUX_REG] = 3, /* R11 temp register */ [X86_REG_R9] = 1, /* R9 register, 6th function argument */ + [X86_REG_R12] = 4, /* R12 callee saved */ }; static const int reg2pt_regs[] = { @@ -167,6 +219,7 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_8) | BIT(BPF_REG_9) | BIT(X86_REG_R9) | + BIT(X86_REG_R12) | BIT(BPF_REG_AX)); } @@ -205,6 +258,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) return byte; } +static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index) +{ + if (is_ereg(r1)) + byte |= 1; + if (is_ereg(index)) + byte |= 2; + if (is_ereg(r2)) + byte |= 4; + return byte; +} + /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ static u8 add_1reg(u8 byte, u32 dst_reg) { @@ -259,7 +323,23 @@ struct jit_context { /* Number of bytes emit_patch() needs to generate instructions */ #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ -#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) +#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) + +static void push_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x51); /* push r9 */ + *pprog = prog; +} + +static void pop_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x59); /* pop r9 */ + *pprog = prog; +} static void push_r12(u8 **pprog) { @@ -389,6 +469,37 @@ static void emit_cfi(u8 **pprog, u32 hash) *pprog = prog; } +static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) +{ + u8 *prog = *pprog; + + if (!is_subprog) { + /* cmp rax, MAX_TAIL_CALL_CNT */ + EMIT4(0x48, 0x83, 0xF8, MAX_TAIL_CALL_CNT); + EMIT2(X86_JA, 6); /* ja 6 */ + /* rax is tail_call_cnt if <= MAX_TAIL_CALL_CNT. + * case1: entry of main prog. + * case2: tail callee of main prog. + */ + EMIT1(0x50); /* push rax */ + /* Make rax as tail_call_cnt_ptr. */ + EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */ + EMIT2(0xEB, 1); /* jmp 1 */ + /* rax is tail_call_cnt_ptr if > MAX_TAIL_CALL_CNT. + * case: tail callee of subprog. + */ + EMIT1(0x50); /* push rax */ + /* push tail_call_cnt_ptr */ + EMIT1(0x50); /* push rax */ + } else { /* is_subprog */ + /* rax is tail_call_cnt_ptr. */ + EMIT1(0x50); /* push rax */ + EMIT1(0x50); /* push rax */ + } + + *pprog = prog; +} + /* * Emit x86-64 prologue code for BPF program. * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes @@ -410,10 +521,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* When it's the entry of the whole tailcall context, * zeroing rax means initialising tail_call_cnt. */ - EMIT2(0x31, 0xC0); /* xor eax, eax */ + EMIT3(0x48, 0x31, 0xC0); /* xor rax, rax */ else /* Keep the same instruction layout. */ - EMIT2(0x66, 0x90); /* nop2 */ + emit_nops(&prog, 3); /* nop3 */ } /* Exception callback receives FP as third parameter */ if (is_exception_cb) { @@ -439,7 +550,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); if (tail_call_reachable) - EMIT1(0x50); /* push rax */ + emit_prologue_tail_call(&prog, is_subprog); *pprog = prog; } @@ -466,7 +577,7 @@ static int emit_call(u8 **pprog, void *func, void *ip) static int emit_rsb_call(u8 **pprog, void *func, void *ip) { OPTIMIZER_HIDE_VAR(func); - x86_call_depth_emit_accounting(pprog, func); + ip += x86_call_depth_emit_accounting(pprog, func, ip); return emit_patch(pprog, func, ip, 0xE8); } @@ -553,7 +664,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ - if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) + if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } @@ -568,20 +679,22 @@ static void emit_return(u8 **pprog, u8 *ip) emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ - if (IS_ENABLED(CONFIG_SLS)) + if (IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } *pprog = prog; } +#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (-16 - round_up(stack, 8)) + /* * Generate the following code: * * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... * if (index >= array->map.max_entries) * goto out; - * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) * goto out; * prog = array->ptrs[index]; * if (prog == NULL) @@ -594,7 +707,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, u32 stack_depth, u8 *ip, struct jit_context *ctx) { - int tcc_off = -4 - round_up(stack_depth, 8); + int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); u8 *prog = *pprog, *start = *pprog; int offset; @@ -616,16 +729,14 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, EMIT2(X86_JBE, offset); /* jbe out */ /* - * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) * goto out; */ - EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ - EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ + EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */ + EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */ offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JAE, offset); /* jae out */ - EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ - EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */ @@ -640,13 +751,23 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JE, offset); /* je out */ + /* Inc tail_call_cnt if the slot is populated. */ + EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */ + if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); } else { pop_callee_regs(&prog, callee_regs_used); + if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena)) + pop_r12(&prog); } + /* Pop tail_call_cnt_ptr. */ + EMIT1(0x58); /* pop rax */ + /* Pop tail_call_cnt, if it's main prog. + * Pop tail_call_cnt_ptr, if it's subprog. + */ EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ @@ -675,21 +796,19 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog, bool *callee_regs_used, u32 stack_depth, struct jit_context *ctx) { - int tcc_off = -4 - round_up(stack_depth, 8); + int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); u8 *prog = *pprog, *start = *pprog; int offset; /* - * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) * goto out; */ - EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ - EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ + EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */ + EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */ offset = ctx->tail_call_direct_label - (prog + 2 - start); EMIT2(X86_JAE, offset); /* jae out */ - EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ - EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ poke->tailcall_bypass = ip + (prog - start); poke->adj_off = X86_TAIL_CALL_OFFSET; @@ -699,13 +818,23 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog, emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); + /* Inc tail_call_cnt if the slot is populated. */ + EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */ + if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); } else { pop_callee_regs(&prog, callee_regs_used); + if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena)) + pop_r12(&prog); } + /* Pop tail_call_cnt_ptr. */ + EMIT1(0x58); /* pop rax */ + /* Pop tail_call_cnt, if it's main prog. + * Pop tail_call_cnt_ptr, if it's subprog. + */ EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); @@ -798,9 +927,10 @@ done: static void emit_mov_imm64(u8 **pprog, u32 dst_reg, const u32 imm32_hi, const u32 imm32_lo) { + u64 imm64 = ((u64)imm32_hi << 32) | (u32)imm32_lo; u8 *prog = *pprog; - if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { + if (is_uimm32(imm64)) { /* * For emitting plain u32, where sign bit must not be * propagated LLVM tends to load imm64 over mov32 @@ -808,6 +938,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, * 'mov %eax, imm32' instead. */ emit_mov_imm32(&prog, false, dst_reg, imm32_lo); + } else if (is_simm32(imm64)) { + emit_mov_imm32(&prog, true, dst_reg, imm32_lo); } else { /* movabsq rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); @@ -887,6 +1019,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) *pprog = prog; } +static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off) +{ + u8 *prog = *pprog; + + if (is_imm8(off)) { + EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off); + } else { + EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off); + } + *pprog = prog; +} + /* * Emit a REX byte if it will be necessary to address these registers */ @@ -968,6 +1112,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) *pprog = prog; } +static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) +{ + u8 *prog = *pprog; + + switch (size) { + case BPF_B: + /* movzx rax, byte ptr [rax + r12 + off] */ + EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6); + break; + case BPF_H: + /* movzx rax, word ptr [rax + r12 + off] */ + EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7); + break; + case BPF_W: + /* mov eax, dword ptr [rax + r12 + off] */ + EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B); + break; + case BPF_DW: + /* mov rax, qword ptr [rax + r12 + off] */ + EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B); + break; + } + emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off); + *pprog = prog; +} + +static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off); +} + /* STX: *(u8*)(dst_reg + off) = src_reg */ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -1002,6 +1177,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) *pprog = prog; } +/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */ +static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) +{ + u8 *prog = *pprog; + + switch (size) { + case BPF_B: + /* mov byte ptr [rax + r12 + off], al */ + EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88); + break; + case BPF_H: + /* mov word ptr [rax + r12 + off], ax */ + EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89); + break; + case BPF_W: + /* mov dword ptr [rax + r12 + 1], eax */ + EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89); + break; + case BPF_DW: + /* mov qword ptr [rax + r12 + 1], rax */ + EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89); + break; + } + emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off); + *pprog = prog; +} + +static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off); +} + +/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */ +static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm) +{ + u8 *prog = *pprog; + + switch (size) { + case BPF_B: + /* mov byte ptr [rax + r12 + off], imm8 */ + EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6); + break; + case BPF_H: + /* mov word ptr [rax + r12 + off], imm16 */ + EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7); + break; + case BPF_W: + /* mov dword ptr [rax + r12 + 1], imm32 */ + EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7); + break; + case BPF_DW: + /* mov qword ptr [rax + r12 + 1], imm32 */ + EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7); + break; + } + emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off); + EMIT(imm, bpf_size_to_x86_bytes(size)); + *pprog = prog; +} + +static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm) +{ + emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm); +} + static int emit_atomic(u8 **pprog, u8 atomic_op, u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) { @@ -1043,24 +1283,73 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, return 0; } +static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, + u32 dst_reg, u32 src_reg, u32 index_reg, int off) +{ + u8 *prog = *pprog; + + EMIT1(0xF0); /* lock prefix */ + switch (size) { + case BPF_W: + EMIT1(add_3mod(0x40, dst_reg, src_reg, index_reg)); + break; + case BPF_DW: + EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg)); + break; + default: + pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n"); + return -EFAULT; + } + + /* emit opcode */ + switch (atomic_op) { + case BPF_ADD: + case BPF_AND: + case BPF_OR: + case BPF_XOR: + /* lock *(u32/u64*)(dst_reg + idx_reg + off) <op>= src_reg */ + EMIT1(simple_alu_opcodes[atomic_op]); + break; + case BPF_ADD | BPF_FETCH: + /* src_reg = atomic_fetch_add(dst_reg + idx_reg + off, src_reg); */ + EMIT2(0x0F, 0xC1); + break; + case BPF_XCHG: + /* src_reg = atomic_xchg(dst_reg + idx_reg + off, src_reg); */ + EMIT1(0x87); + break; + case BPF_CMPXCHG: + /* r0 = atomic_cmpxchg(dst_reg + idx_reg + off, r0, src_reg); */ + EMIT2(0x0F, 0xB1); + break; + default: + pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); + return -EFAULT; + } + emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off); + *pprog = prog; + return 0; +} + +#define DONT_CLEAR 1 + bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { u32 reg = x->fixup >> 8; /* jump over faulting load and clear dest register */ - *(unsigned long *)((void *)regs + reg) = 0; + if (reg != DONT_CLEAR) + *(unsigned long *)((void *)regs + reg) = 0; regs->ip += x->fixup & 0xff; return true; } static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt, - bool *regs_used, bool *tail_call_seen) + bool *regs_used) { int i; for (i = 1; i <= insn_cnt; i++, insn++) { - if (insn->code == (BPF_JMP | BPF_TAIL_CALL)) - *tail_call_seen = true; if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6) regs_used[0] = true; if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7) @@ -1131,11 +1420,35 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) *pprog = prog; } +static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr) +{ + u8 *prog = *pprog; + + /* movabs r9, priv_frame_ptr */ + emit_mov_imm64(&prog, X86_REG_R9, (__force long) priv_frame_ptr >> 32, + (u32) (__force long) priv_frame_ptr); + +#ifdef CONFIG_SMP + /* add <r9>, gs:[<off>] */ + EMIT2(0x65, 0x4c); + EMIT3(0x03, 0x0c, 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); +#endif + + *pprog = prog; +} + #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) -/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ -#define RESTORE_TAIL_CALL_CNT(stack) \ - EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8) +#define __LOAD_TCC_PTR(off) \ + EMIT3_off32(0x48, 0x8B, 0x85, off) +/* mov rax, qword ptr [rbp - rounded_stack_depth - 16] */ +#define LOAD_TAIL_CALL_CNT_PTR(stack) \ + __LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack)) + +/* Memory size/value to protect private stack overflow/underflow */ +#define PRIV_STACK_GUARD_SZ 8 +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) @@ -1144,21 +1457,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image struct bpf_insn *insn = bpf_prog->insnsi; bool callee_regs_used[4] = {}; int insn_cnt = bpf_prog->len; - bool tail_call_seen = false; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; + void __percpu *priv_frame_ptr = NULL; + u64 arena_vm_start, user_vm_start; + void __percpu *priv_stack_ptr; int i, excnt = 0; int ilen, proglen = 0; u8 *prog = temp; + u32 stack_depth; int err; - detect_reg_usage(insn, insn_cnt, callee_regs_used, - &tail_call_seen); + stack_depth = bpf_prog->aux->stack_depth; + priv_stack_ptr = bpf_prog->aux->priv_stack_ptr; + if (priv_stack_ptr) { + priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8); + stack_depth = 0; + } + + arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); + user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); - /* tail call's presence in current prog implies it is reachable */ - tail_call_reachable |= tail_call_seen; + detect_reg_usage(insn, insn_cnt, callee_regs_used); - emit_prologue(&prog, bpf_prog->aux->stack_depth, + emit_prologue(&prog, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); /* Exception callback will clobber callee regs for its own use, and @@ -1172,8 +1494,16 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image push_r12(&prog); push_callee_regs(&prog, all_callee_regs_used); } else { + if (arena_vm_start) + push_r12(&prog); push_callee_regs(&prog, callee_regs_used); } + if (arena_vm_start) + emit_mov_imm64(&prog, X86_REG_R12, + arena_vm_start >> 32, (u32) arena_vm_start); + + if (priv_frame_ptr) + emit_priv_frame_ptr(&prog, priv_frame_ptr); ilen = prog - temp; if (rw_image) @@ -1194,6 +1524,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image u8 *func; int nops; + if (priv_frame_ptr) { + if (src_reg == BPF_REG_FP) + src_reg = X86_REG_R9; + + if (dst_reg == BPF_REG_FP) + dst_reg = X86_REG_R9; + } + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -1213,6 +1551,49 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image break; case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + if (dst_reg != src_reg) + /* 32-bit mov */ + emit_mov_reg(&prog, false, dst_reg, src_reg); + /* shl dst_reg, 32 */ + maybe_emit_1mod(&prog, dst_reg, true); + EMIT3(0xC1, add_1reg(0xE0, dst_reg), 32); + + /* or dst_reg, user_vm_start */ + maybe_emit_1mod(&prog, dst_reg, true); + if (is_axreg(dst_reg)) + EMIT1_off32(0x0D, user_vm_start >> 32); + else + EMIT2_off32(0x81, add_1reg(0xC8, dst_reg), user_vm_start >> 32); + + /* rol dst_reg, 32 */ + maybe_emit_1mod(&prog, dst_reg, true); + EMIT3(0xC1, add_1reg(0xC0, dst_reg), 32); + + /* xor r11, r11 */ + EMIT3(0x4D, 0x31, 0xDB); + + /* test dst_reg32, dst_reg32; check if lower 32-bit are zero */ + maybe_emit_mod(&prog, dst_reg, dst_reg, false); + EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); + + /* cmove r11, dst_reg; if so, set dst_reg to zero */ + /* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */ + maybe_emit_mod(&prog, AUX_REG, dst_reg, true); + EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg)); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + /* mov <dst>, <src> (if necessary) */ + EMIT_mov(dst_reg, src_reg); +#ifdef CONFIG_SMP + /* add <dst>, gs:[<off>] */ + EMIT2(0x65, add_1mod(0x48, dst_reg)); + EMIT3(0x03, add_2reg(0x04, 0, dst_reg), 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); +#endif + break; + } + fallthrough; case BPF_ALU | BPF_MOV | BPF_X: if (insn->off == 0) emit_mov_reg(&prog, @@ -1564,6 +1945,56 @@ st: if (is_imm8(insn->off)) emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); break; + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + start_of_ldx = prog; + emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm); + goto populate_extable; + + /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */ + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + start_of_ldx = prog; + if (BPF_CLASS(insn->code) == BPF_LDX) + emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); + else + emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); +populate_extable: + { + struct exception_table_entry *ex; + u8 *_insn = image + proglen + (start_of_ldx - temp); + s64 delta; + + if (!bpf_prog->aux->extable) + break; + + if (excnt >= bpf_prog->aux->num_exentries) { + pr_err("mem32 extable bug\n"); + return -EFAULT; + } + ex = &bpf_prog->aux->extable[excnt++]; + + delta = _insn - (u8 *)&ex->insn; + /* switch ex to rw buffer for writes */ + ex = (void *)rw_image + ((void *)ex - (void *)image); + + ex->insn = delta; + + ex->data = EX_TYPE_BPF; + + ex->fixup = (prog - start_of_ldx) | + ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8); + } + break; + /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_PROBE_MEM | BPF_B: @@ -1585,36 +2016,41 @@ st: if (is_imm8(insn->off)) if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: - * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE - * src_reg is used as scratch for src_reg += insn->off and restored - * after emit_ldx if necessary + * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE + * and + * src_reg + insn->off < VSYSCALL_ADDR */ - u64 limit = TASK_SIZE_MAX + PAGE_SIZE; + u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR; u8 *end_of_jmp; - /* At end of these emitted checks, insn->off will have been added - * to src_reg, so no need to do relative load with insn->off offset - */ - insn_off = 0; + /* movabsq r10, VSYSCALL_ADDR */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32, + (u32)(long)VSYSCALL_ADDR); - /* movabsq r11, limit */ - EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); - EMIT((u32)limit, 4); - EMIT(limit >> 32, 4); + /* mov src_reg, r11 */ + EMIT_mov(AUX_REG, src_reg); if (insn->off) { - /* add src_reg, insn->off */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); } - /* cmp src_reg, r11 */ - maybe_emit_mod(&prog, src_reg, AUX_REG, true); - EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* sub r11, r10 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); + + /* movabsq r10, limit */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32, + (u32)(long)limit); + + /* cmp r10, r11 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); - /* if unsigned '>=', goto load */ - EMIT2(X86_JAE, 0); + /* if unsigned '>', goto load */ + EMIT2(X86_JA, 0); end_of_jmp = prog; /* xor dst_reg, dst_reg */ @@ -1640,18 +2076,6 @@ st: if (is_imm8(insn->off)) /* populate jmp_offset for JMP above */ start_of_ldx[-1] = prog - start_of_ldx; - if (insn->off && src_reg != dst_reg) { - /* sub src_reg, insn->off - * Restore src_reg after "add src_reg, insn->off" in prev - * if statement. But if src_reg == dst_reg, emit_ldx - * above already clobbered src_reg, so no need to restore. - * If add src_reg, insn->off was unnecessary, no need to - * restore either. - */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off); - } - if (!bpf_prog->aux->extable) break; @@ -1748,23 +2172,35 @@ st: if (is_imm8(insn->off)) return err; break; + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: + start_of_ldx = prog; + err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code), + dst_reg, src_reg, X86_REG_R12, insn->off); + if (err) + return err; + goto populate_extable; + /* call */ case BPF_JMP | BPF_CALL: { - int offs; + u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; - if (tail_call_reachable) { - RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); - if (!imm32) - return -EINVAL; - offs = 7 + x86_call_depth_emit_accounting(&prog, func); - } else { - if (!imm32) - return -EINVAL; - offs = x86_call_depth_emit_accounting(&prog, func); + if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) { + LOAD_TAIL_CALL_CNT_PTR(stack_depth); + ip += 7; + } + if (!imm32) + return -EINVAL; + if (priv_frame_ptr) { + push_r9(&prog); + ip += 2; } - if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + ip += x86_call_depth_emit_accounting(&prog, func, ip); + if (emit_call(&prog, func, ip)) return -EINVAL; + if (priv_frame_ptr) + pop_r9(&prog); break; } @@ -1774,13 +2210,13 @@ st: if (is_imm8(insn->off)) &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, ctx); else emit_bpf_tail_call_indirect(bpf_prog, &prog, callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, image + addrs[i - 1], ctx); break; @@ -1910,7 +2346,7 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ return -EFAULT; } jmp_offset = addrs[i + insn->off] - addrs[i]; - if (is_imm8(jmp_offset)) { + if (is_imm8_jmp_offset(jmp_offset)) { if (jmp_padding) { /* To keep the jmp_offset valid, the extra bytes are * padded before the jump insn, so we subtract the @@ -1992,7 +2428,7 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ break; } emit_jmp: - if (is_imm8(jmp_offset)) { + if (is_imm8_jmp_offset(jmp_offset)) { if (jmp_padding) { /* To avoid breaking jmp_offset, the extra bytes * are padded before the actual jmp insn, so @@ -2036,6 +2472,8 @@ emit_jmp: pop_r12(&prog); } else { pop_callee_regs(&prog, callee_regs_used); + if (arena_vm_start) + pop_r12(&prog); } EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); @@ -2430,6 +2868,10 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ +#define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack) \ + __LOAD_TCC_PTR(-round_up(stack, 8) - 8) + /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -2550,7 +2992,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * [ ... ] * [ stack_arg2 ] * RBP - arg_stack_off [ stack_arg1 ] - * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX + * RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX */ /* room for return value of orig_call or fentry prog */ @@ -2611,7 +3053,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Direct-call fentry stub, as such it needs accounting for the * __fentry__ call. */ - x86_call_depth_emit_accounting(&prog, NULL); + x86_call_depth_emit_accounting(&prog, NULL, image); } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ @@ -2679,10 +3121,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im save_args(m, &prog, arg_stack_off, true); if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { - /* Before calling the original function, restore the - * tail_call_cnt from stack to rax. + /* Before calling the original function, load the + * tail_call_cnt_ptr from stack to rax. */ - RESTORE_TAIL_CALL_CNT(stack_size); + LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size); } if (flags & BPF_TRAMP_F_ORIG_STACK) { @@ -2741,10 +3183,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im goto cleanup; } } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { - /* Before running the original function, restore the - * tail_call_cnt from stack to rax. + /* Before running the original function, load the + * tail_call_cnt_ptr from stack to rax. */ - RESTORE_TAIL_CALL_CNT(stack_size); + LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size); } /* restore return value of orig_call or fentry prog back into RAX */ @@ -2780,12 +3222,9 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } -void arch_protect_bpf_trampoline(void *image, unsigned int size) -{ -} - -void arch_unprotect_bpf_trampoline(void *image, unsigned int size) +int arch_protect_bpf_trampoline(void *image, unsigned int size) { + return 0; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, @@ -2929,6 +3368,42 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf); } +static const char *bpf_get_prog_name(struct bpf_prog *prog) +{ + if (prog->aux->ksym.prog) + return prog->aux->ksym.name; + return prog->aux->name; +} + +static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size) +{ + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; + u64 *stack_ptr; + + for_each_possible_cpu(cpu) { + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); + stack_ptr[0] = PRIV_STACK_GUARD_VAL; + stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL; + } +} + +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size, + struct bpf_prog *prog) +{ + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; + u64 *stack_ptr; + + for_each_possible_cpu(cpu) { + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); + if (stack_ptr[0] != PRIV_STACK_GUARD_VAL || + stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) { + pr_err("BPF private stack overflow/underflow detected for prog %sx\n", + bpf_get_prog_name(prog)); + break; + } + } +} + struct x64_jit_data { struct bpf_binary_header *rw_header; struct bpf_binary_header *header; @@ -2946,7 +3421,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_binary_header *rw_header = NULL; struct bpf_binary_header *header = NULL; struct bpf_prog *tmp, *orig_prog = prog; + void __percpu *priv_stack_ptr = NULL; struct x64_jit_data *jit_data; + int priv_stack_alloc_sz; int proglen, oldproglen = 0; struct jit_context ctx = {}; bool tmp_blinded = false; @@ -2982,6 +3459,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } prog->aux->jit_data = jit_data; } + priv_stack_ptr = prog->aux->priv_stack_ptr; + if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) { + /* Allocate actual private stack size with verifier-calculated + * stack size plus two memory guards to protect overflow and + * underflow. + */ + priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + + 2 * PRIV_STACK_GUARD_SZ; + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL); + if (!priv_stack_ptr) { + prog = orig_prog; + goto out_priv_stack; + } + + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz); + prog->aux->priv_stack_ptr = priv_stack_ptr; + } addrs = jit_data->addrs; if (addrs) { ctx = jit_data->ctx; @@ -3083,7 +3577,7 @@ out_image: * * Both cases are serious bugs and justify WARN_ON. */ - if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) { + if (WARN_ON(bpf_jit_binary_pack_finalize(header, rw_header))) { /* header has been freed */ header = NULL; goto out_image; @@ -3117,6 +3611,11 @@ out_image: bpf_prog_fill_jited_linfo(prog, addrs + 1); out_addrs: kvfree(addrs); + if (!image && priv_stack_ptr) { + free_percpu(priv_stack_ptr); + prog->aux->priv_stack_ptr = NULL; + } +out_priv_stack: kfree(jit_data); prog->aux->jit_data = NULL; } @@ -3145,11 +3644,18 @@ bool bpf_jit_supports_subprog_tailcalls(void) return true; } +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { struct x64_jit_data *jit_data = prog->aux->jit_data; struct bpf_binary_header *hdr; + void __percpu *priv_stack_ptr; + int priv_stack_alloc_sz; /* * If we fail the final pass of JIT (from jit_subprogs), @@ -3157,7 +3663,7 @@ void bpf_jit_free(struct bpf_prog *prog) * before freeing it. */ if (jit_data) { - bpf_jit_binary_pack_finalize(prog, jit_data->header, + bpf_jit_binary_pack_finalize(jit_data->header, jit_data->rw_header); kvfree(jit_data->addrs); kfree(jit_data); @@ -3165,6 +3671,13 @@ void bpf_jit_free(struct bpf_prog *prog) prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset(); hdr = bpf_jit_binary_pack_hdr(prog); bpf_jit_binary_pack_free(hdr, NULL); + priv_stack_ptr = prog->aux->priv_stack_ptr; + if (priv_stack_ptr) { + priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + + 2 * PRIV_STACK_GUARD_SZ; + priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog); + free_percpu(prog->aux->priv_stack_ptr); + } WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); } @@ -3180,6 +3693,11 @@ bool bpf_jit_supports_exceptions(void) return IS_ENABLED(CONFIG_UNWINDER_ORC); } +bool bpf_jit_supports_private_stack(void) +{ + return true; +} + void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { #if defined(CONFIG_UNWINDER_ORC) @@ -3242,3 +3760,34 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, BUG_ON(ret < 0); } } + +bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm == (BPF_AND | BPF_FETCH) || + insn->imm == (BPF_OR | BPF_FETCH) || + insn->imm == (BPF_XOR | BPF_FETCH)) + return false; + } + return true; +} + +bool bpf_jit_supports_ptr_xchg(void) +{ + return true; +} + +/* x86-64 JIT emits its own code to filter user addresses so return 0 here */ +u64 bpf_arch_uaddress_limit(void) +{ + return 0; +} |