summaryrefslogtreecommitdiff
path: root/arch/x86/net/bpf_jit_comp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/net/bpf_jit_comp.c')
-rw-r--r--arch/x86/net/bpf_jit_comp.c426
1 files changed, 351 insertions, 75 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a43fc5af973d..b69dc7194e2c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -8,6 +8,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
@@ -41,6 +42,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT5(b1, b2, b3, b4, b5) \
+ do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0)
#define EMIT1_off32(b1, off) \
do { EMIT1(b1); EMIT(off, 4); } while (0)
@@ -410,16 +413,20 @@ static void emit_nops(u8 **pprog, int len)
* Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT
* in arch/x86/kernel/alternative.c
*/
+static int emit_call(u8 **prog, void *func, void *ip);
-static void emit_fineibt(u8 **pprog, u32 hash)
+static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity)
{
u8 *prog = *pprog;
EMIT_ENDBR();
- EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */
- EMIT2(0x74, 0x07); /* jz.d8 +7 */
- EMIT2(0x0f, 0x0b); /* ud2 */
- EMIT1(0x90); /* nop */
+ EMIT1_off32(0x2d, hash); /* subl $hash, %eax */
+ if (cfi_bhi) {
+ EMIT2(0x2e, 0x2e); /* cs cs */
+ emit_call(&prog, __bhi_args[arity], ip + 11);
+ } else {
+ EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */
+ }
EMIT_ENDBR_POISON();
*pprog = prog;
@@ -448,13 +455,13 @@ static void emit_kcfi(u8 **pprog, u32 hash)
*pprog = prog;
}
-static void emit_cfi(u8 **pprog, u32 hash)
+static void emit_cfi(u8 **pprog, u8 *ip, u32 hash, int arity)
{
u8 *prog = *pprog;
switch (cfi_mode) {
case CFI_FINEIBT:
- emit_fineibt(&prog, hash);
+ emit_fineibt(&prog, ip, hash, arity);
break;
case CFI_KCFI:
@@ -505,13 +512,17 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
* while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+static void emit_prologue(u8 **pprog, u8 *ip, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog,
bool is_exception_cb)
{
u8 *prog = *pprog;
- emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
+ if (is_subprog) {
+ emit_cfi(&prog, ip, cfi_bpf_subprog_hash, 5);
+ } else {
+ emit_cfi(&prog, ip, cfi_bpf_hash, 1);
+ }
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
@@ -586,7 +597,8 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
return emit_patch(pprog, func, ip, 0xE9);
}
-static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t,
void *old_addr, void *new_addr)
{
const u8 *nop_insn = x86_nops[5];
@@ -596,9 +608,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
int ret;
memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
- if (old_addr) {
+ if (old_t != BPF_MOD_NOP && old_addr) {
prog = old_insn;
- ret = t == BPF_MOD_CALL ?
+ ret = old_t == BPF_MOD_CALL ?
emit_call(&prog, old_addr, ip) :
emit_jump(&prog, old_addr, ip);
if (ret)
@@ -606,9 +618,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
}
memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
- if (new_addr) {
+ if (new_t != BPF_MOD_NOP && new_addr) {
prog = new_insn;
- ret = t == BPF_MOD_CALL ?
+ ret = new_t == BPF_MOD_CALL ?
emit_call(&prog, new_addr, ip) :
emit_jump(&prog, new_addr, ip);
if (ret)
@@ -621,7 +633,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
goto out;
ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
- text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ smp_text_poke_single(ip, new_insn, X86_PATCH_SIZE, NULL);
ret = 0;
}
out:
@@ -629,8 +641,9 @@ out:
return ret;
}
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *old_addr, void *new_addr)
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t, void *old_addr,
+ void *new_addr)
{
if (!is_kernel_text((long)ip) &&
!is_bpf_text_address((long)ip))
@@ -641,29 +654,46 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
* See emit_prologue(), for IBT builds the trampoline hook is preceded
* with an ENDBR instruction.
*/
- if (is_endbr(*(u32 *)ip))
+ if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;
- return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
+ return __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
}
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
-static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
+static void __emit_indirect_jump(u8 **pprog, int reg, bool ereg)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (ereg)
+ EMIT1(0x41);
+
+ EMIT2(0xFF, 0xE0 + reg);
+
+ *pprog = prog;
+}
+
+static void emit_indirect_jump(u8 **pprog, int bpf_reg, u8 *ip)
+{
+ u8 *prog = *pprog;
+ int reg = reg2hex[bpf_reg];
+ bool ereg = is_ereg(bpf_reg);
+
+ if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
+ OPTIMIZER_HIDE_VAR(reg);
+ emit_jump(&prog, its_static_thunk(reg + 8*ereg), ip);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
- EMIT2(0xFF, 0xE0 + reg);
+ __emit_indirect_jump(&prog, reg, ereg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
- emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
+ emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg + 8*ereg], ip);
else
- emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
+ emit_jump(&prog, &__x86_indirect_thunk_array[reg + 8*ereg], ip);
} else {
- EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
+ __emit_indirect_jump(&prog, reg, ereg);
if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS))
EMIT1(0xCC); /* int3 */
}
@@ -675,7 +705,7 @@ static void emit_return(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk()) {
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
@@ -783,7 +813,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
+ emit_indirect_jump(&prog, BPF_REG_4 /* R4 -> rcx */, ip + (prog - start));
/* out: */
ctx->tail_call_indirect_label = prog - start;
@@ -869,12 +899,13 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
target = array->ptrs[poke->tail_call.key];
if (target) {
ret = __bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP, NULL,
+ BPF_MOD_NOP, BPF_MOD_JUMP,
+ NULL,
(u8 *)target->bpf_func +
poke->adj_off);
BUG_ON(ret < 0);
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
+ BPF_MOD_JUMP, BPF_MOD_NOP,
(u8 *)poke->tailcall_target +
X86_PATCH_SIZE, NULL);
BUG_ON(ret < 0);
@@ -1138,11 +1169,38 @@ static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 i
*pprog = prog;
}
+static void emit_ldsx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* movsx rax, byte ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBE);
+ break;
+ case BPF_H:
+ /* movsx rax, word ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBF);
+ break;
+ case BPF_W:
+ /* movsx rax, dword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x63);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+ *pprog = prog;
+}
+
static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
}
+static void emit_ldsx_r12(u8 **prog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_ldsx_index(prog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1242,8 +1300,8 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
}
-static int emit_atomic(u8 **pprog, u8 atomic_op,
- u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
+static int emit_atomic_rmw(u8 **pprog, u32 atomic_op,
+ u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
u8 *prog = *pprog;
@@ -1283,8 +1341,9 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
-static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
- u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size,
+ u32 dst_reg, u32 src_reg, u32 index_reg,
+ int off)
{
u8 *prog = *pprog;
@@ -1297,7 +1356,7 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg));
break;
default:
- pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n");
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
return -EFAULT;
}
@@ -1331,16 +1390,110 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
return 0;
}
+static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg,
+ u32 src_reg, s16 off, u8 bpf_size)
+{
+ switch (atomic_op) {
+ case BPF_LOAD_ACQ:
+ /* dst_reg = smp_load_acquire(src_reg + off16) */
+ emit_ldx(pprog, bpf_size, dst_reg, src_reg, off);
+ break;
+ case BPF_STORE_REL:
+ /* smp_store_release(dst_reg + off16, src_reg) */
+ emit_stx(pprog, bpf_size, dst_reg, src_reg, off);
+ break;
+ default:
+ pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
+ atomic_op);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
+ u32 dst_reg, u32 src_reg, u32 index_reg,
+ int off)
+{
+ switch (atomic_op) {
+ case BPF_LOAD_ACQ:
+ /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */
+ emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off);
+ break;
+ case BPF_STORE_REL:
+ /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */
+ emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off);
+ break;
+ default:
+ pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
+ atomic_op);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Metadata encoding for exception handling in JITed code.
+ *
+ * Format of `fixup` and `data` fields in `struct exception_table_entry`:
+ *
+ * Bit layout of `fixup` (32-bit):
+ *
+ * +-----------+--------+-----------+---------+----------+
+ * | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
+ * | | | | | |
+ * | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
+ * +-----------+--------+-----------+---------+----------+
+ *
+ * - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
+ * - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
+ * This is set to DONT_CLEAR if the insn is a store.
+ * - ARENA_REG (8 bits): Offset of the register that is used to calculate the
+ * address for load/store when accessing the arena region.
+ * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
+ *
+ * Bit layout of `data` (32-bit):
+ *
+ * +--------------+--------+--------------+
+ * | 31-16 | 15-8 | 7-0 |
+ * | | | |
+ * | ARENA_OFFSET | Unused | EX_TYPE_BPF |
+ * +--------------+--------+--------------+
+ *
+ * - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
+ * accessing the arena region.
+ */
+
#define DONT_CLEAR 1
+#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
+#define FIXUP_REG_MASK GENMASK(15, 8)
+#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
+#define FIXUP_ARENA_ACCESS BIT(31)
+#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
- u32 reg = x->fixup >> 8;
+ u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
+ u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
+ bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
+ bool is_write = (reg == DONT_CLEAR);
+ unsigned long addr;
+ s16 off;
+ u32 arena_reg;
+
+ if (is_arena) {
+ arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
+ off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
+ addr = *(unsigned long *)((void *)regs + arena_reg) + off;
+ bpf_prog_report_arena_violation(is_write, addr, regs->ip);
+ }
/* jump over faulting load and clear dest register */
if (reg != DONT_CLEAR)
*(unsigned long *)((void *)regs + reg) = 0;
- regs->ip += x->fixup & 0xff;
+ regs->ip += insn_len;
+
return true;
}
@@ -1450,6 +1603,48 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr)
#define PRIV_STACK_GUARD_SZ 8
#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
+ struct bpf_prog *bpf_prog)
+{
+ u8 *prog = *pprog;
+ u8 *func;
+
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
+ /* The clearing sequence clobbers eax and ecx. */
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x51); /* push rcx */
+ ip += 2;
+
+ func = (u8 *)clear_bhb_loop;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+
+ if (emit_call(&prog, func, ip))
+ return -EINVAL;
+ EMIT1(0x59); /* pop rcx */
+ EMIT1(0x58); /* pop rax */
+ }
+ /* Insert IBHF instruction */
+ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&
+ cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) ||
+ cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) {
+ /*
+ * Add an Indirect Branch History Fence (IBHF). IBHF acts as a
+ * fence preventing branch history from before the fence from
+ * affecting indirect branches after the fence. This is
+ * specifically used in cBPF jitted code to prevent Intra-mode
+ * BHI attacks. The IBHF instruction is designed to be a NOP on
+ * hardware that doesn't need or support it. The REP and REX.W
+ * prefixes are required by the microcode, and they also ensure
+ * that the NOP is unlikely to be used in existing code.
+ *
+ * IBHF is not a valid instruction in 32-bit mode.
+ */
+ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */
+ }
+ *pprog = prog;
+ return 0;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
@@ -1480,7 +1675,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
detect_reg_usage(insn, insn_cnt, callee_regs_used);
- emit_prologue(&prog, stack_depth,
+ emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
/* Exception callback will clobber callee regs for its own use, and
@@ -1958,19 +2153,27 @@ st: if (is_imm8(insn->off))
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
start_of_ldx = prog;
- if (BPF_CLASS(insn->code) == BPF_LDX)
- emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
- else
+ if (BPF_CLASS(insn->code) == BPF_LDX) {
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX)
+ emit_ldsx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ else
+ emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ } else {
emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ }
populate_extable:
{
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
+ u32 arena_reg, fixup_reg;
s64 delta;
if (!bpf_prog->aux->extable)
@@ -1990,8 +2193,29 @@ populate_extable:
ex->data = EX_TYPE_BPF;
- ex->fixup = (prog - start_of_ldx) |
- ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+ /*
+ * src_reg/dst_reg holds the address in the arena region with upper
+ * 32-bits being zero because of a preceding addr_space_cast(r<n>,
+ * 0x0, 0x1) instruction. This address is adjusted with the addition
+ * of arena_vm_start (see the implementation of BPF_PROBE_MEM32 and
+ * BPF_PROBE_ATOMIC) before being used for the memory access. Pass
+ * the reg holding the unmodified 32-bit address to
+ * ex_handler_bpf().
+ */
+ if (BPF_CLASS(insn->code) == BPF_LDX) {
+ arena_reg = reg2pt_regs[src_reg];
+ fixup_reg = reg2pt_regs[dst_reg];
+ } else {
+ arena_reg = reg2pt_regs[dst_reg];
+ fixup_reg = DONT_CLEAR;
+ }
+
+ ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
+ FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg) |
+ FIELD_PREP(FIXUP_REG_MASK, fixup_reg);
+ ex->fixup |= FIXUP_ARENA_ACCESS;
+
+ ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
}
break;
@@ -2109,10 +2333,18 @@ populate_extable:
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
* of 4 bytes will be ignored and rbx will be zero inited.
*/
- ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
+ ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
+ FIELD_PREP(FIXUP_REG_MASK, reg2pt_regs[dst_reg]);
}
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ if (!bpf_atomic_is_load_store(insn)) {
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EFAULT;
+ }
+ fallthrough;
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
if (insn->imm == (BPF_AND | BPF_FETCH) ||
@@ -2148,10 +2380,10 @@ populate_extable:
EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
add_2reg(0xC0, AUX_REG, real_src_reg));
/* Attempt to swap in new value */
- err = emit_atomic(&prog, BPF_CMPXCHG,
- real_dst_reg, AUX_REG,
- insn->off,
- BPF_SIZE(insn->code));
+ err = emit_atomic_rmw(&prog, BPF_CMPXCHG,
+ real_dst_reg, AUX_REG,
+ insn->off,
+ BPF_SIZE(insn->code));
if (WARN_ON(err))
return err;
/*
@@ -2166,17 +2398,35 @@ populate_extable:
break;
}
- err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ if (bpf_atomic_is_load_store(insn))
+ err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg,
+ insn->off, BPF_SIZE(insn->code));
+ else
+ err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg,
+ insn->off, BPF_SIZE(insn->code));
if (err)
return err;
break;
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
+ if (!bpf_atomic_is_load_store(insn)) {
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EFAULT;
+ }
+ fallthrough;
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
start_of_ldx = prog;
- err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code),
- dst_reg, src_reg, X86_REG_R12, insn->off);
+
+ if (bpf_atomic_is_load_store(insn))
+ err = emit_atomic_ld_st_index(&prog, insn->imm,
+ BPF_SIZE(insn->code), dst_reg,
+ src_reg, X86_REG_R12, insn->off);
+ else
+ err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code),
+ dst_reg, src_reg, X86_REG_R12,
+ insn->off);
if (err)
return err;
goto populate_extable;
@@ -2381,6 +2631,9 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
break;
+ case BPF_JMP | BPF_JA | BPF_X:
+ emit_indirect_jump(&prog, insn->dst_reg, image + addrs[i - 1]);
+ break;
case BPF_JMP | BPF_JA:
case BPF_JMP32 | BPF_JA:
if (BPF_CLASS(insn->code) == BPF_JMP) {
@@ -2467,6 +2720,13 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
+ if (bpf_prog_was_classic(bpf_prog) &&
+ !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) {
+ u8 *ip = image + addrs[i - 1];
+
+ if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
+ return -EINVAL;
+ }
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@@ -2590,9 +2850,10 @@ static int get_nr_used_regs(const struct btf_func_model *m)
}
static void save_args(const struct btf_func_model *m, u8 **prog,
- int stack_size, bool for_call_origin)
+ int stack_size, bool for_call_origin, u32 flags)
{
int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0;
+ bool use_jmp = bpf_trampoline_use_jmp(flags);
int i, j;
/* Store function arguments to stack.
@@ -2633,7 +2894,7 @@ static void save_args(const struct btf_func_model *m, u8 **prog,
*/
for (j = 0; j < arg_regs; j++) {
emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
- nr_stack_slots * 8 + 0x18);
+ nr_stack_slots * 8 + 16 + (!use_jmp) * 8);
emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
-stack_size);
@@ -3027,16 +3288,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* should be 16-byte aligned. Following code depend on
* that stack_size is already 8-byte aligned.
*/
- stack_size += (stack_size % 16) ? 0 : 8;
+ if (bpf_trampoline_use_jmp(flags)) {
+ /* no rip in the "jmp" case */
+ stack_size += (stack_size % 16) ? 8 : 0;
+ } else {
+ stack_size += (stack_size % 16) ? 0 : 8;
+ }
}
arg_stack_off = stack_size;
- if (flags & BPF_TRAMP_F_SKIP_FRAME) {
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
- if (is_endbr(*(u32 *)orig_call))
+ if (is_endbr(orig_call))
orig_call += ENDBR_INSN_SIZE;
orig_call += X86_PATCH_SIZE;
}
@@ -3047,7 +3313,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/*
* Indirect call for bpf_struct_ops
*/
- emit_cfi(&prog, cfi_get_func_hash(func_addr));
+ emit_cfi(&prog, image,
+ cfi_get_func_hash(func_addr),
+ cfi_get_func_arity(func_addr));
} else {
/*
* Direct-call fentry stub, as such it needs accounting for the
@@ -3085,7 +3353,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
}
- save_args(m, &prog, regs_off, false);
+ save_args(m, &prog, regs_off, false, flags);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
@@ -3118,7 +3386,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
if (flags & BPF_TRAMP_F_CALL_ORIG) {
restore_regs(m, &prog, regs_off);
- save_args(m, &prog, arg_stack_off, true);
+ save_args(m, &prog, arg_stack_off, true, flags);
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before calling the original function, load the
@@ -3301,7 +3569,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image,
if (err)
return err;
- emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf));
+ emit_indirect_jump(&prog, BPF_REG_3 /* R3 -> rdx */, image + (prog - buf));
*pprog = prog;
return 0;
@@ -3368,13 +3636,6 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf);
}
-static const char *bpf_get_prog_name(struct bpf_prog *prog)
-{
- if (prog->aux->ksym.prog)
- return prog->aux->ksym.name;
- return prog->aux->name;
-}
-
static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
{
int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
@@ -3398,7 +3659,7 @@ static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size
if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) {
pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
- bpf_get_prog_name(prog));
+ bpf_jit_get_prog_name(prog));
break;
}
}
@@ -3592,6 +3853,15 @@ out_image:
jit_data->header = header;
jit_data->rw_header = rw_header;
}
+
+ /*
+ * The bpf_prog_update_insn_ptrs function expects addrs to
+ * point to the first byte of the jitted instruction (unlike
+ * the bpf_prog_fill_jited_linfo below, which, for historical
+ * reasons, expects to point to the next instruction)
+ */
+ bpf_prog_update_insn_ptrs(prog, addrs, image);
+
/*
* ctx.prog_offset is used when CFI preambles put code *before*
* the function. See emit_cfi(). For FineIBT specifically this code
@@ -3712,13 +3982,13 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
}
return;
#endif
- WARN(1, "verification of programs using bpf_throw should have failed\n");
}
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
u8 *old_addr, *new_addr, *old_bypass_addr;
+ enum bpf_text_poke_type t;
int ret;
old_bypass_addr = old ? NULL : poke->bypass_addr;
@@ -3731,21 +4001,22 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
* the kallsyms check.
*/
if (new) {
+ t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
+ t, BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0);
if (!old) {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
+ BPF_MOD_JUMP, BPF_MOD_NOP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0);
}
} else {
+ t = old_bypass_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- old_bypass_addr,
+ t, BPF_MOD_JUMP, old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0);
/* let other CPUs finish the execution of program
@@ -3754,9 +4025,9 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
*/
if (!ret)
synchronize_rcu();
+ t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, NULL);
+ t, BPF_MOD_NOP, old_addr, NULL);
BUG_ON(ret < 0);
}
}
@@ -3791,3 +4062,8 @@ u64 bpf_arch_uaddress_limit(void)
{
return 0;
}
+
+bool bpf_jit_supports_timed_may_goto(void)
+{
+ return true;
+}