diff options
Diffstat (limited to 'arch/riscv/net')
-rw-r--r-- | arch/riscv/net/bpf_jit.h | 191 | ||||
-rw-r--r-- | arch/riscv/net/bpf_jit_comp32.c | 5 | ||||
-rw-r--r-- | arch/riscv/net/bpf_jit_comp64.c | 677 | ||||
-rw-r--r-- | arch/riscv/net/bpf_jit_core.c | 31 |
4 files changed, 673 insertions, 231 deletions
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index a5ce1ab76ece..1d1c78d4cff1 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -18,6 +18,16 @@ static inline bool rvc_enabled(void) return IS_ENABLED(CONFIG_RISCV_ISA_C); } +static inline bool rvzba_enabled(void) +{ + return IS_ENABLED(CONFIG_RISCV_ISA_ZBA) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBA); +} + +static inline bool rvzbb_enabled(void) +{ + return IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBB); +} + enum { RV_REG_ZERO = 0, /* The constant value 0 */ RV_REG_RA = 1, /* Return address */ @@ -76,6 +86,8 @@ struct rv_jit_context { int nexentries; unsigned long flags; int stack_size; + u64 arena_vm_start; + u64 user_vm_start; }; /* Convert from ninsns to bytes. */ @@ -601,7 +613,7 @@ static inline u32 rv_nop(void) return rv_i_insn(0, 0, 0, 0, 0x13); } -/* RVC instrutions. */ +/* RVC instructions. */ static inline u16 rvc_addi4spn(u8 rd, u32 imm10) { @@ -730,6 +742,44 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2) return rv_css_insn(0x6, imm, rs2, 0x2); } +/* RVZBA instructions. */ +static inline u32 rvzba_sh2add(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x10, rs2, rs1, 0x4, rd, 0x33); +} + +static inline u32 rvzba_sh3add(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x10, rs2, rs1, 0x6, rd, 0x33); +} + +/* RVZBB instructions. */ +static inline u32 rvzbb_sextb(u8 rd, u8 rs1) +{ + return rv_i_insn(0x604, rs1, 1, rd, 0x13); +} + +static inline u32 rvzbb_sexth(u8 rd, u8 rs1) +{ + return rv_i_insn(0x605, rs1, 1, rd, 0x13); +} + +static inline u32 rvzbb_zexth(u8 rd, u8 rs) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return rv_i_insn(0x80, rs, 4, rd, 0x3b); + + return rv_i_insn(0x80, rs, 4, rd, 0x33); +} + +static inline u32 rvzbb_rev8(u8 rd, u8 rs) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return rv_i_insn(0x6b8, rs, 5, rd, 0x13); + + return rv_i_insn(0x698, rs, 5, rd, 0x13); +} + /* * RV64-only instructions. * @@ -905,6 +955,14 @@ static inline u16 rvc_sdsp(u32 imm9, u8 rs2) return rv_css_insn(0x7, imm, rs2, 0x2); } +/* RV64-only ZBA instructions. */ + +static inline u32 rvzba_zextw(u8 rd, u8 rs1) +{ + /* add.uw rd, rs1, ZERO */ + return rv_r_insn(0x04, RV_REG_ZERO, rs1, 0, rd, 0x3b); +} + #endif /* __riscv_xlen == 64 */ /* Helper functions that emit RVC instructions when possible. */ @@ -1048,6 +1106,28 @@ static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx) emit(rv_sw(rs1, off, rs2), ctx); } +static inline void emit_sh2add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) +{ + if (rvzba_enabled()) { + emit(rvzba_sh2add(rd, rs1, rs2), ctx); + return; + } + + emit_slli(rd, rs1, 2, ctx); + emit_add(rd, rd, rs2, ctx); +} + +static inline void emit_sh3add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) +{ + if (rvzba_enabled()) { + emit(rvzba_sh3add(rd, rs1, rs2), ctx); + return; + } + + emit_slli(rd, rs1, 3, ctx); + emit_add(rd, rd, rs2, ctx); +} + /* RV64-only helper functions. */ #if __riscv_xlen == 64 @@ -1087,9 +1167,116 @@ static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) emit(rv_subw(rd, rs1, rs2), ctx); } +static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + emit(rvzbb_sextb(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 56, ctx); + emit_srai(rd, rd, 56, ctx); +} + +static inline void emit_sexth(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + emit(rvzbb_sexth(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 48, ctx); + emit_srai(rd, rd, 48, ctx); +} + +static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + emit_addiw(rd, rs, 0, ctx); +} + +static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + emit(rvzbb_zexth(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 48, ctx); + emit_srli(rd, rd, 48, ctx); +} + +static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx) +{ + if (rvzba_enabled()) { + emit(rvzba_zextw(rd, rs), ctx); + return; + } + + emit_slli(rd, rs, 32, ctx); + emit_srli(rd, rd, 32, ctx); +} + +static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx) +{ + if (rvzbb_enabled()) { + int bits = 64 - imm; + + emit(rvzbb_rev8(rd, rd), ctx); + if (bits) + emit_srli(rd, rd, bits, ctx); + return; + } + + emit_li(RV_REG_T2, 0, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + if (imm == 16) + goto out_be; + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + if (imm == 32) + goto out_be; + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); + + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); + emit_srli(rd, rd, 8, ctx); +out_be: + emit_andi(RV_REG_T1, rd, 0xff, ctx); + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); + + emit_mv(rd, RV_REG_T2, ctx); +} + #endif /* __riscv_xlen == 64 */ -void bpf_jit_build_prologue(struct rv_jit_context *ctx); +void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog); void bpf_jit_build_epilogue(struct rv_jit_context *ctx); int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 529a83b85c1c..592dd86fbf81 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -811,8 +811,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) * if (!prog) * goto out; */ - emit(rv_slli(RV_REG_T0, lo(idx_reg), 2), ctx); - emit(rv_add(RV_REG_T0, RV_REG_T0, lo(arr_reg)), ctx); + emit_sh2add(RV_REG_T0, lo(idx_reg), lo(arr_reg), ctx); off = offsetof(struct bpf_array, ptrs); if (is_12b_check(off, insn)) return -1; @@ -1301,7 +1300,7 @@ notsupported: return 0; } -void bpf_jit_build_prologue(struct rv_jit_context *ctx) +void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) { const s8 *fp = bpf2rv32[BPF_REG_FP]; const s8 *r1 = bpf2rv32[BPF_REG_1]; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 719a97e7edb2..ca60db75199d 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -10,13 +10,21 @@ #include <linux/filter.h> #include <linux/memory.h> #include <linux/stop_machine.h> -#include <asm/patch.h> +#include <asm/text-patching.h> +#include <asm/cfi.h> +#include <asm/percpu.h> #include "bpf_jit.h" +#define RV_MAX_REG_ARGS 8 #define RV_FENTRY_NINSNS 2 +#define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4) +#define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI_CLANG) ? 1 : 0) +/* imm that allows emit_imm to emit max count insns */ +#define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ +#define RV_REG_ARENA RV_REG_S7 /* For storing arena_vm_start */ static const int regmap[] = { [BPF_REG_0] = RV_REG_A5, @@ -141,6 +149,19 @@ static bool in_auipc_jalr_range(s64 val) val < ((1L << 31) - (1L << 11)); } +/* Modify rd pointer to alternate reg to avoid corrupting original reg */ +static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) +{ + emit_sextw(ra, *rd, ctx); + *rd = ra; +} + +static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx) +{ + emit_zextw(ra, *rd, ctx); + *rd = ra; +} + /* Emit fixed-length instructions for address */ static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx) { @@ -241,13 +262,18 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); store_offset -= 8; } + if (ctx->arena_vm_start) { + emit_ld(RV_REG_ARENA, store_offset, RV_REG_SP, ctx); + store_offset -= 8; + } emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ if (!is_tail_call) emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, - is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ + /* kcfi, fentry and TCC init insns will be skipped on tailcall */ + is_tail_call ? (RV_KCFI_NINSNS + RV_FENTRY_NINSNS + 1) * 4 : 0, ctx); } @@ -326,12 +352,6 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); } -static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) -{ - emit_slli(reg, reg, 32, ctx); - emit_srli(reg, reg, 32, ctx); -} - static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) { int tc_ninsn, off, start_insn = ctx->ninsns; @@ -346,7 +366,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) */ tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : ctx->offset[0]; - emit_zext_32(RV_REG_A2, ctx); + emit_zextw(RV_REG_A2, RV_REG_A2, ctx); off = offsetof(struct bpf_array, map.max_entries); if (is_12b_check(off, insn)) @@ -366,8 +386,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) * if (!prog) * goto out; */ - emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx); + emit_sh3add(RV_REG_T2, RV_REG_A2, RV_REG_A1, ctx); off = offsetof(struct bpf_array, ptrs); if (is_12b_check(off, insn)) return -1; @@ -405,38 +424,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, *rs = bpf_to_rv_reg(insn->src_reg, ctx); } -static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) -{ - emit_mv(RV_REG_T2, *rd, ctx); - emit_zext_32(RV_REG_T2, ctx); - emit_mv(RV_REG_T1, *rs, ctx); - emit_zext_32(RV_REG_T1, ctx); - *rd = RV_REG_T2; - *rs = RV_REG_T1; -} - -static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) -{ - emit_addiw(RV_REG_T2, *rd, 0, ctx); - emit_addiw(RV_REG_T1, *rs, 0, ctx); - *rd = RV_REG_T2; - *rs = RV_REG_T1; -} - -static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx) -{ - emit_mv(RV_REG_T2, *rd, ctx); - emit_zext_32(RV_REG_T2, ctx); - emit_zext_32(RV_REG_T1, ctx); - *rd = RV_REG_T2; -} - -static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) -{ - emit_addiw(RV_REG_T2, *rd, 0, ctx); - *rd = RV_REG_T2; -} - static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr, struct rv_jit_context *ctx) { @@ -480,6 +467,12 @@ static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx) return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx); } +static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx) +{ + if (IS_ENABLED(CONFIG_CFI_CLANG)) + emit(hash, ctx); +} + static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, struct rv_jit_context *ctx) { @@ -516,47 +509,49 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, break; /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ case BPF_ADD | BPF_FETCH: - emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : - rv_amoadd_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) : + rv_amoadd_w(rs, rs, rd, 1, 1), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; case BPF_AND | BPF_FETCH: - emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : - rv_amoand_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) : + rv_amoand_w(rs, rs, rd, 1, 1), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; case BPF_OR | BPF_FETCH: - emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : - rv_amoor_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) : + rv_amoor_w(rs, rs, rd, 1, 1), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; case BPF_XOR | BPF_FETCH: - emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : - rv_amoxor_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) : + rv_amoxor_w(rs, rs, rd, 1, 1), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ case BPF_XCHG: - emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : - rv_amoswap_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) : + rv_amoswap_w(rs, rs, rd, 1, 1), ctx); if (!is64) - emit_zext_32(rs, ctx); + emit_zextw(rs, rs, ctx); break; /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ case BPF_CMPXCHG: r0 = bpf_to_rv_reg(BPF_REG_0, ctx); - emit(is64 ? rv_addi(RV_REG_T2, r0, 0) : - rv_addiw(RV_REG_T2, r0, 0), ctx); + if (is64) + emit_mv(RV_REG_T2, r0, ctx); + else + emit_addiw(RV_REG_T2, r0, 0, ctx); emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) : rv_lr_w(r0, 0, rd, 0, 0), ctx); jmp_offset = ninsns_rvoff(8); emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); - emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : - rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) : + rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx); jmp_offset = ninsns_rvoff(-6); emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); emit(rv_fence(0x3, 0x3), ctx); @@ -566,6 +561,7 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */ bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -573,7 +569,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); - *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; + if (regs_offset != REG_DONT_CLEAR_MARKER) + *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; regs->epc = (unsigned long)&ex->fixup - offset; return true; @@ -590,7 +587,8 @@ static int add_exception_handler(const struct bpf_insn *insn, off_t fixup_offset; if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable || - (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32)) return 0; if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) @@ -681,7 +679,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, if (ret) return ret; - if (memcmp(ip, old_insns, RV_FENTRY_NINSNS * 4)) + if (memcmp(ip, old_insns, RV_FENTRY_NBYTES)) return -EFAULT; ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); @@ -690,34 +688,53 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, cpus_read_lock(); mutex_lock(&text_mutex); - if (memcmp(ip, new_insns, RV_FENTRY_NINSNS * 4)) - ret = patch_text(ip, new_insns, RV_FENTRY_NINSNS); + if (memcmp(ip, new_insns, RV_FENTRY_NBYTES)) + ret = patch_text(ip, new_insns, RV_FENTRY_NBYTES); mutex_unlock(&text_mutex); cpus_read_unlock(); return ret; } -static void store_args(int nregs, int args_off, struct rv_jit_context *ctx) +static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx) { int i; - for (i = 0; i < nregs; i++) { - emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx); + for (i = 0; i < nr_arg_slots; i++) { + if (i < RV_MAX_REG_ARGS) { + emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx); + } else { + /* skip slots for T0 and FP of traced function */ + emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx); + emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx); + } args_off -= 8; } } -static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx) +static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx) { int i; - for (i = 0; i < nregs; i++) { + for (i = 0; i < nr_reg_args; i++) { emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx); args_off -= 8; } } +static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off, + struct rv_jit_context *ctx) +{ + int i; + + for (i = 0; i < nr_stack_args; i++) { + emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx); + emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx); + args_off -= 8; + stk_arg_off -= 8; + } +} + static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off, int run_ctx_off, bool save_ret, struct rv_jit_context *ctx) { @@ -740,6 +757,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; + /* store prog start time */ + emit_mv(RV_REG_S1, RV_REG_A0, ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ @@ -747,9 +767,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of /* nop reserved for conditional jump */ emit(rv_nop(), ctx); - /* store prog start time */ - emit_mv(RV_REG_S1, RV_REG_A0, ctx); - /* arg1: &args_off */ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); if (!p->jited) @@ -790,8 +807,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, { int i, ret, offset; int *branches_off = NULL; - int stack_size = 0, nregs = m->nr_args; - int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off; + int stack_size = 0, nr_arg_slots = 0; + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -837,20 +854,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, * FP - sreg_off [ callee saved reg ] * * [ pads ] pads for 16 bytes alignment + * + * [ stack_argN ] + * [ ... ] + * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG */ if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) return -ENOTSUPP; - /* extra regiters for struct arguments */ - for (i = 0; i < m->nr_args; i++) - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) - nregs += round_up(m->arg_size[i], 8) / 8 - 1; - - /* 8 arguments passed by registers */ - if (nregs > 8) + if (m->nr_args > MAX_BPF_FUNC_ARGS) return -ENOTSUPP; + for (i = 0; i < m->nr_args; i++) + nr_arg_slots += round_up(m->arg_size[i], 8) / 8; + /* room of trampoline frame to store return address and frame pointer */ stack_size += 16; @@ -860,7 +878,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, retval_off = stack_size; } - stack_size += nregs * 8; + stack_size += nr_arg_slots * 8; args_off = stack_size; stack_size += 8; @@ -877,7 +895,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, stack_size += 8; sreg_off = stack_size; - stack_size = round_up(stack_size, 16); + if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0)) + stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8; + + stack_size = round_up(stack_size, STACK_ALIGN); + + /* room for args on stack must be at the top of stack */ + stk_arg_off = stack_size; if (!is_struct_ops) { /* For the trampoline called from function entry, @@ -894,6 +918,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); } else { + /* emit kcfi hash */ + emit_kcfi(cfi_get_func_hash(func_addr), ctx); /* For the trampoline called directly, just handle * the frame of trampoline. */ @@ -912,17 +938,17 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx); } - emit_li(RV_REG_T1, nregs, ctx); + emit_li(RV_REG_T1, nr_arg_slots, ctx); emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx); - store_args(nregs, args_off, ctx); + store_args(nr_arg_slots, args_off, ctx); /* skip to actual body of traced function */ if (flags & BPF_TRAMP_F_SKIP_FRAME) orig_call += RV_FENTRY_NINSNS * 4; if (flags & BPF_TRAMP_F_CALL_ORIG) { - emit_imm(RV_REG_A0, (const s64)im, ctx); + emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx); ret = emit_call((const u64)__bpf_tramp_enter, true, ctx); if (ret) return ret; @@ -955,13 +981,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(nregs, args_off, ctx); + restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx); + restore_stack_args(nr_arg_slots - RV_MAX_REG_ARGS, args_off, stk_arg_off, ctx); ret = emit_call((const u64)orig_call, true, ctx); if (ret) goto out; emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); - im->ip_after_call = ctx->insns + ctx->ninsns; + im->ip_after_call = ctx->ro_insns + ctx->ninsns; /* 2 nops reserved for auipc+jalr pair */ emit(rv_nop(), ctx); emit(rv_nop(), ctx); @@ -982,15 +1009,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_CALL_ORIG) { - im->ip_epilogue = ctx->insns + ctx->ninsns; - emit_imm(RV_REG_A0, (const s64)im, ctx); + im->ip_epilogue = ctx->ro_insns + ctx->ninsns; + emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx); ret = emit_call((const u64)__bpf_tramp_exit, true, ctx); if (ret) goto out; } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(nregs, args_off, ctx); + restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx); if (save_ret) { emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); @@ -1045,31 +1072,52 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns); } -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, - void *image_end, const struct btf_func_model *m, +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, bpf_fill_ill_insns); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, + void *ro_image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { int ret; + void *image, *res; struct rv_jit_context ctx; + u32 size = ro_image_end - ro_image; + + image = kvmalloc(size, GFP_KERNEL); + if (!image) + return -ENOMEM; ctx.ninsns = 0; - /* - * The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to write the - * JITed instructions and later copies it to a RX region (ctx.ro_insns). - * It also uses ctx.ro_insns to calculate offsets for jumps etc. As the - * trampoline image uses the same memory area for writing and execution, - * both ctx.insns and ctx.ro_insns can be set to image. - */ ctx.insns = image; - ctx.ro_insns = image; + ctx.ro_insns = ro_image; ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx); if (ret < 0) - return ret; + goto out; + + if (WARN_ON(size < ninsns_rvoff(ctx.ninsns))) { + ret = -E2BIG; + goto out; + } - bpf_flush_icache(ctx.insns, ctx.insns + ctx.ninsns); + res = bpf_arch_text_copy(ro_image, image, size); + if (IS_ERR(res)) { + ret = PTR_ERR(res); + goto out; + } - return ninsns_rvoff(ret); + bpf_flush_icache(ro_image, ro_image_end); +out: + kvfree(image); + return ret < 0 ? ret : size; } int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, @@ -1089,9 +1137,34 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + emit_mv(RV_REG_T1, rs, ctx); + emit_zextw(RV_REG_T1, RV_REG_T1, ctx); + emit_imm(rd, (ctx->user_vm_start >> 32) << 32, ctx); + emit(rv_beq(RV_REG_T1, RV_REG_ZERO, 4), ctx); + emit_or(RV_REG_T1, rd, RV_REG_T1, ctx); + emit_mv(rd, RV_REG_T1, ctx); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (rd != rs) + emit_mv(rd, rs, ctx); +#ifdef CONFIG_SMP + /* Load current CPU number in T1 */ + emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + /* Load address of __per_cpu_offset array in T2 */ + emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); + /* Get address of __per_cpu_offset[cpu] in T1 */ + emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx); + /* Load __per_cpu_offset[cpu] in T1 */ + emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); + /* Add the offset to Rd */ + emit_add(rd, rd, RV_REG_T1, ctx); +#endif + } if (imm == 1) { /* Special mov32 for zext */ - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; } switch (insn->off) { @@ -1099,16 +1172,17 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_mv(rd, rs, ctx); break; case 8: + emit_sextb(rd, rs, ctx); + break; case 16: - emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx); - emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx); + emit_sexth(rd, rs, ctx); break; case 32: - emit_addiw(rd, rs, 0, ctx); + emit_sextw(rd, rs, ctx); break; } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = dst OP src */ @@ -1116,7 +1190,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_ADD | BPF_X: emit_add(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: @@ -1126,31 +1200,31 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_subw(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: emit_and(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: emit_or(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: emit_xor(rd, rd, rs, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1159,7 +1233,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, else emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: @@ -1168,25 +1242,25 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, else emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = -dst */ @@ -1194,73 +1268,27 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_NEG: emit_sub(rd, RV_REG_ZERO, rd, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: switch (imm) { case 16: - emit_slli(rd, rd, 48, ctx); - emit_srli(rd, rd, 48, ctx); + emit_zexth(rd, rd, ctx); break; case 32: if (!aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case 64: /* Do nothing */ break; } break; - case BPF_ALU | BPF_END | BPF_FROM_BE: case BPF_ALU64 | BPF_END | BPF_FROM_LE: - emit_li(RV_REG_T2, 0, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - if (imm == 16) - goto out_be; - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - if (imm == 32) - goto out_be; - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); - - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); - emit_srli(rd, rd, 8, ctx); -out_be: - emit_andi(RV_REG_T1, rd, 0xff, ctx); - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); - - emit_mv(rd, RV_REG_T2, ctx); + emit_bswap(rd, imm, ctx); break; /* dst = imm */ @@ -1268,7 +1296,7 @@ out_be: case BPF_ALU64 | BPF_MOV | BPF_K: emit_imm(rd, imm, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* dst = dst OP imm */ @@ -1281,7 +1309,7 @@ out_be: emit_add(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: @@ -1292,7 +1320,7 @@ out_be: emit_sub(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: @@ -1303,7 +1331,7 @@ out_be: emit_and(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: @@ -1314,7 +1342,7 @@ out_be: emit_or(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: @@ -1325,7 +1353,7 @@ out_be: emit_xor(rd, rd, RV_REG_T1, ctx); } if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: @@ -1333,7 +1361,7 @@ out_be: emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : rv_mulw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: @@ -1345,7 +1373,7 @@ out_be: emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : rv_divuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: @@ -1357,14 +1385,14 @@ out_be: emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : rv_remuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: emit_slli(rd, rd, imm, ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: @@ -1374,7 +1402,7 @@ out_be: emit(rv_srliw(rd, rd, imm), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: @@ -1384,7 +1412,7 @@ out_be: emit(rv_sraiw(rd, rd, imm), ctx); if (!is64 && !aux->verifier_zext) - emit_zext_32(rd, ctx); + emit_zextw(rd, rd, ctx); break; /* JUMP off */ @@ -1425,10 +1453,13 @@ out_be: rvoff = rv_offset(i, off, ctx); if (!is64) { s = ctx->ninsns; - if (is_signed_bpf_cond(BPF_OP(code))) - emit_sext_32_rd_rs(&rd, &rs, ctx); - else - emit_zext_32_rd_rs(&rd, &rs, ctx); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sextw_alt(&rs, RV_REG_T1, ctx); + emit_sextw_alt(&rd, RV_REG_T2, ctx); + } else { + emit_zextw_alt(&rs, RV_REG_T1, ctx); + emit_zextw_alt(&rd, RV_REG_T2, ctx); + } e = ctx->ninsns; /* Adjust for extra insns */ @@ -1439,8 +1470,7 @@ out_be: /* Adjust for and */ rvoff -= 4; emit_and(RV_REG_T1, rd, rs, ctx); - emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, - ctx); + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); } else { emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); } @@ -1469,18 +1499,18 @@ out_be: case BPF_JMP32 | BPF_JSLE | BPF_K: rvoff = rv_offset(i, off, ctx); s = ctx->ninsns; - if (imm) { + if (imm) emit_imm(RV_REG_T1, imm, ctx); - rs = RV_REG_T1; - } else { - /* If imm is 0, simply use zero register. */ - rs = RV_REG_ZERO; - } + rs = imm ? RV_REG_T1 : RV_REG_ZERO; if (!is64) { - if (is_signed_bpf_cond(BPF_OP(code))) - emit_sext_32_rd(&rd, ctx); - else - emit_zext_32_rd_t1(&rd, ctx); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sextw_alt(&rd, RV_REG_T2, ctx); + /* rs has been sign extended */ + } else { + emit_zextw_alt(&rd, RV_REG_T2, ctx); + if (imm) + emit_zextw(rs, rs, ctx); + } } e = ctx->ninsns; @@ -1504,7 +1534,7 @@ out_be: * as t1 is used only in comparison against zero. */ if (!is64 && imm < 0) - emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx); + emit_sextw(RV_REG_T1, RV_REG_T1, ctx); e = ctx->ninsns; rvoff -= ninsns_rvoff(e - s); emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); @@ -1516,12 +1546,44 @@ out_be: bool fixed_addr; u64 addr; + /* Inline calls to bpf_get_smp_processor_id() + * + * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is + * at offset 0 in task_struct. + * Load cpu from thread_info: + * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu + * + * This replicates the implementation of raw_smp_processor_id() on RISCV + */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + /* Load current CPU number in R0 */ + emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); if (ret < 0) return ret; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + const struct btf_func_model *fm; + int idx; + + fm = bpf_jit_find_kfunc_model(ctx->prog, insn); + if (!fm) + return -EINVAL; + + for (idx = 0; idx < fm->nr_args; idx++) { + u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); + + if (fm->arg_size[idx] == sizeof(int)) + emit_sextw(reg, reg, ctx); + } + } + ret = emit_call(addr, fixed_addr, ctx); if (ret) return ret; @@ -1582,6 +1644,11 @@ out_be: case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + RV_REG_ARENA + off) */ + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: { int insn_len, insns_start; bool sign_ext; @@ -1589,6 +1656,11 @@ out_be: sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || BPF_MODE(insn->code) == BPF_PROBE_MEMSX; + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx); + rs = RV_REG_T2; + } + switch (BPF_SIZE(code)) { case BPF_B: if (is_12b_int(off)) { @@ -1725,6 +1797,86 @@ out_be: emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); break; + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + { + int insn_len, insns_start; + + emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx); + rd = RV_REG_T3; + + /* Load imm to a register then store it */ + emit_imm(RV_REG_T1, imm, ctx); + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sb(rd, off, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sh(rd, off, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sw(rd, off, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sd(rd, off, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, + insn_len); + if (ret) + return ret; + + break; + } + /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_B: if (is_12b_int(off)) { @@ -1771,6 +1923,84 @@ out_be: emit_atomic(rd, rs, off, imm, BPF_SIZE(code) == BPF_DW, ctx); break; + + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + { + int insn_len, insns_start; + + emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx); + rd = RV_REG_T2; + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sb(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sb(RV_REG_T1, 0, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sh(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sh(RV_REG_T1, 0, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sw(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit_sw(RV_REG_T1, 0, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sd(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit_sd(RV_REG_T1, 0, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, + insn_len); + if (ret) + return ret; + + break; + } + default: pr_err("bpf-jit: unknown opcode %02x\n", code); return -EINVAL; @@ -1779,11 +2009,11 @@ out_be: return 0; } -void bpf_jit_build_prologue(struct rv_jit_context *ctx) +void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) { int i, stack_adjust = 0, store_offset, bpf_stack_adjust; - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, STACK_ALIGN); if (bpf_stack_adjust) mark_fp(ctx); @@ -1802,12 +2032,17 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; if (seen_reg(RV_REG_S6, ctx)) stack_adjust += 8; + if (ctx->arena_vm_start) + stack_adjust += 8; - stack_adjust = round_up(stack_adjust, 16); + stack_adjust = round_up(stack_adjust, STACK_ALIGN); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; + /* emit kcfi type preamble immediately before the first insn */ + emit_kcfi(is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash, ctx); + /* nops reserved for auipc+jalr pair */ for (i = 0; i < RV_FENTRY_NINSNS; i++) emit(rv_nop(), ctx); @@ -1850,6 +2085,10 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx) emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); store_offset -= 8; } + if (ctx->arena_vm_start) { + emit_sd(RV_REG_SP, store_offset, RV_REG_ARENA, ctx); + store_offset -= 8; + } emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); @@ -1863,6 +2102,9 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx) emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); ctx->stack_size = stack_adjust; + + if (ctx->arena_vm_start) + emit_imm(RV_REG_ARENA, ctx->arena_vm_start, ctx); } void bpf_jit_build_epilogue(struct rv_jit_context *ctx) @@ -1874,3 +2116,28 @@ bool bpf_jit_supports_kfunc_call(void) { return true; } + +bool bpf_jit_supports_ptr_xchg(void) +{ + return true; +} + +bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + default: + return false; + } +} diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 7b70ccb7fec3..f8cd2f70a7fb 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -9,7 +9,8 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/memory.h> -#include <asm/patch.h> +#include <asm/text-patching.h> +#include <asm/cfi.h> #include "bpf_jit.h" /* Number of iterations to try until offsets converge. */ @@ -79,6 +80,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto skip_init_ctx; } + ctx->arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + ctx->user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx->prog = prog; ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); if (!ctx->offset) { @@ -100,7 +103,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) pass++; ctx->ninsns = 0; - bpf_jit_build_prologue(ctx); + bpf_jit_build_prologue(ctx, bpf_is_subprog(prog)); ctx->prologue_len = ctx->ninsns; if (build_body(ctx, extra_pass, ctx->offset)) { @@ -160,7 +163,7 @@ skip_init_ctx: ctx->ninsns = 0; ctx->nexentries = 0; - bpf_jit_build_prologue(ctx); + bpf_jit_build_prologue(ctx, bpf_is_subprog(prog)); if (build_body(ctx, extra_pass, NULL)) { prog = orig_prog; goto out_free_hdr; @@ -170,13 +173,12 @@ skip_init_ctx: if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, prog_size, pass, ctx->insns); - prog->bpf_func = (void *)ctx->ro_insns; + prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset(); prog->jited = 1; - prog->jited_len = prog_size; + prog->jited_len = prog_size - cfi_get_offset(); if (!prog->is_func || extra_pass) { - if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, - jit_data->header))) { + if (WARN_ON(bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header))) { /* ro_header has been freed */ jit_data->ro_header = NULL; prog = orig_prog; @@ -218,19 +220,6 @@ u64 bpf_jit_alloc_exec_limit(void) return BPF_JIT_REGION_SIZE; } -void *bpf_jit_alloc_exec(unsigned long size) -{ - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} - void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; @@ -268,7 +257,7 @@ void bpf_jit_free(struct bpf_prog *prog) * before freeing it. */ if (jit_data) { - bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, jit_data->header); + bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header); kfree(jit_data); } hdr = bpf_jit_binary_pack_hdr(prog); |