diff options
author | Alexei Starovoitov <ast@kernel.org> | 2023-09-15 17:11:47 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2023-09-15 17:16:57 -0700 |
commit | c4ab64e6da42030c866f0589d1bfc13a037432dd (patch) | |
tree | fa805ad59dae6cc655f9c3e30db637e982f63dca /arch | |
parent | 9b2b86332a9b9932d9022a0c004251d5d6437020 (diff) | |
parent | 9b31b4f1d4ae30627e9a75967e974c766f80e92f (diff) |
Merge branch 'arm32-bpf-add-support-for-cpuv4-insns'
Puranjay Mohan says:
====================
arm32, bpf: add support for cpuv4 insns
Changes in V2 -> V3
- Added comments at places where there could be confustion.
- In the patch for DIV64, fix the if-else case that would never run.
- In the same patch use a single instruction to POP caller saved regs.
- Add a patch to change maintainership of ARM32 BPF JIT.
Changes in V1 -> V2:
- Fix coding style issues.
- Don't use tmp variable for src in emit_ldsx_r() as it is redundant.
- Optimize emit_ldsx_r() when offset can fit in immediate.
Add the support for cpuv4 instructions for ARM32 BPF JIT. 64-bit division
was not supported earlier so this series adds 64-bit DIV, SDIV, MOD, SMOD
instructions as well.
This series needs any one of the patches from [1] to disable zero-extension
for BPF_MEMSX to support ldsx.
The relevant selftests have passed expect ldsx_insn which needs fentry:
Tested on BeagleBone Black (ARMv7-A):
[root@alarm del]# echo 1 > /proc/sys/net/core/bpf_jit_enable
[root@alarm del]# ./test_progs -a verifier_sdiv,verifier_movsx,verifier_ldsx,verifier_gotol,verifier_bswap
#337/1 verifier_bswap/BSWAP, 16:OK
#337/2 verifier_bswap/BSWAP, 16 @unpriv:OK
#337/3 verifier_bswap/BSWAP, 32:OK
#337/4 verifier_bswap/BSWAP, 32 @unpriv:OK
#337/5 verifier_bswap/BSWAP, 64:OK
#337/6 verifier_bswap/BSWAP, 64 @unpriv:OK
#337 verifier_bswap:OK
#351/1 verifier_gotol/gotol, small_imm:OK
#351/2 verifier_gotol/gotol, small_imm @unpriv:OK
#351 verifier_gotol:OK
#359/1 verifier_ldsx/LDSX, S8:OK
#359/2 verifier_ldsx/LDSX, S8 @unpriv:OK
#359/3 verifier_ldsx/LDSX, S16:OK
#359/4 verifier_ldsx/LDSX, S16 @unpriv:OK
#359/5 verifier_ldsx/LDSX, S32:OK
#359/6 verifier_ldsx/LDSX, S32 @unpriv:OK
#359/7 verifier_ldsx/LDSX, S8 range checking, privileged:OK
#359/8 verifier_ldsx/LDSX, S16 range checking:OK
#359/9 verifier_ldsx/LDSX, S16 range checking @unpriv:OK
#359/10 verifier_ldsx/LDSX, S32 range checking:OK
#359/11 verifier_ldsx/LDSX, S32 range checking @unpriv:OK
#359 verifier_ldsx:OK
#370/1 verifier_movsx/MOV32SX, S8:OK
#370/2 verifier_movsx/MOV32SX, S8 @unpriv:OK
#370/3 verifier_movsx/MOV32SX, S16:OK
#370/4 verifier_movsx/MOV32SX, S16 @unpriv:OK
#370/5 verifier_movsx/MOV64SX, S8:OK
#370/6 verifier_movsx/MOV64SX, S8 @unpriv:OK
#370/7 verifier_movsx/MOV64SX, S16:OK
#370/8 verifier_movsx/MOV64SX, S16 @unpriv:OK
#370/9 verifier_movsx/MOV64SX, S32:OK
#370/10 verifier_movsx/MOV64SX, S32 @unpriv:OK
#370/11 verifier_movsx/MOV32SX, S8, range_check:OK
#370/12 verifier_movsx/MOV32SX, S8, range_check @unpriv:OK
#370/13 verifier_movsx/MOV32SX, S16, range_check:OK
#370/14 verifier_movsx/MOV32SX, S16, range_check @unpriv:OK
#370/15 verifier_movsx/MOV32SX, S16, range_check 2:OK
#370/16 verifier_movsx/MOV32SX, S16, range_check 2 @unpriv:OK
#370/17 verifier_movsx/MOV64SX, S8, range_check:OK
#370/18 verifier_movsx/MOV64SX, S8, range_check @unpriv:OK
#370/19 verifier_movsx/MOV64SX, S16, range_check:OK
#370/20 verifier_movsx/MOV64SX, S16, range_check @unpriv:OK
#370/21 verifier_movsx/MOV64SX, S32, range_check:OK
#370/22 verifier_movsx/MOV64SX, S32, range_check @unpriv:OK
#370/23 verifier_movsx/MOV64SX, S16, R10 Sign Extension:OK
#370/24 verifier_movsx/MOV64SX, S16, R10 Sign Extension @unpriv:OK
#370 verifier_movsx:OK
#382/1 verifier_sdiv/SDIV32, non-zero imm divisor, check 1:OK
#382/2 verifier_sdiv/SDIV32, non-zero imm divisor, check 1 @unpriv:OK
#382/3 verifier_sdiv/SDIV32, non-zero imm divisor, check 2:OK
#382/4 verifier_sdiv/SDIV32, non-zero imm divisor, check 2 @unpriv:OK
#382/5 verifier_sdiv/SDIV32, non-zero imm divisor, check 3:OK
#382/6 verifier_sdiv/SDIV32, non-zero imm divisor, check 3 @unpriv:OK
#382/7 verifier_sdiv/SDIV32, non-zero imm divisor, check 4:OK
#382/8 verifier_sdiv/SDIV32, non-zero imm divisor, check 4 @unpriv:OK
#382/9 verifier_sdiv/SDIV32, non-zero imm divisor, check 5:OK
#382/10 verifier_sdiv/SDIV32, non-zero imm divisor, check 5 @unpriv:OK
#382/11 verifier_sdiv/SDIV32, non-zero imm divisor, check 6:OK
#382/12 verifier_sdiv/SDIV32, non-zero imm divisor, check 6 @unpriv:OK
#382/13 verifier_sdiv/SDIV32, non-zero imm divisor, check 7:OK
#382/14 verifier_sdiv/SDIV32, non-zero imm divisor, check 7 @unpriv:OK
#382/15 verifier_sdiv/SDIV32, non-zero imm divisor, check 8:OK
#382/16 verifier_sdiv/SDIV32, non-zero imm divisor, check 8 @unpriv:OK
#382/17 verifier_sdiv/SDIV32, non-zero reg divisor, check 1:OK
#382/18 verifier_sdiv/SDIV32, non-zero reg divisor, check 1 @unpriv:OK
#382/19 verifier_sdiv/SDIV32, non-zero reg divisor, check 2:OK
#382/20 verifier_sdiv/SDIV32, non-zero reg divisor, check 2 @unpriv:OK
#382/21 verifier_sdiv/SDIV32, non-zero reg divisor, check 3:OK
#382/22 verifier_sdiv/SDIV32, non-zero reg divisor, check 3 @unpriv:OK
#382/23 verifier_sdiv/SDIV32, non-zero reg divisor, check 4:OK
#382/24 verifier_sdiv/SDIV32, non-zero reg divisor, check 4 @unpriv:OK
#382/25 verifier_sdiv/SDIV32, non-zero reg divisor, check 5:OK
#382/26 verifier_sdiv/SDIV32, non-zero reg divisor, check 5 @unpriv:OK
#382/27 verifier_sdiv/SDIV32, non-zero reg divisor, check 6:OK
#382/28 verifier_sdiv/SDIV32, non-zero reg divisor, check 6 @unpriv:OK
#382/29 verifier_sdiv/SDIV32, non-zero reg divisor, check 7:OK
#382/30 verifier_sdiv/SDIV32, non-zero reg divisor, check 7 @unpriv:OK
#382/31 verifier_sdiv/SDIV32, non-zero reg divisor, check 8:OK
#382/32 verifier_sdiv/SDIV32, non-zero reg divisor, check 8 @unpriv:OK
#382/33 verifier_sdiv/SDIV64, non-zero imm divisor, check 1:OK
#382/34 verifier_sdiv/SDIV64, non-zero imm divisor, check 1 @unpriv:OK
#382/35 verifier_sdiv/SDIV64, non-zero imm divisor, check 2:OK
#382/36 verifier_sdiv/SDIV64, non-zero imm divisor, check 2 @unpriv:OK
#382/37 verifier_sdiv/SDIV64, non-zero imm divisor, check 3:OK
#382/38 verifier_sdiv/SDIV64, non-zero imm divisor, check 3 @unpriv:OK
#382/39 verifier_sdiv/SDIV64, non-zero imm divisor, check 4:OK
#382/40 verifier_sdiv/SDIV64, non-zero imm divisor, check 4 @unpriv:OK
#382/41 verifier_sdiv/SDIV64, non-zero imm divisor, check 5:OK
#382/42 verifier_sdiv/SDIV64, non-zero imm divisor, check 5 @unpriv:OK
#382/43 verifier_sdiv/SDIV64, non-zero imm divisor, check 6:OK
#382/44 verifier_sdiv/SDIV64, non-zero imm divisor, check 6 @unpriv:OK
#382/45 verifier_sdiv/SDIV64, non-zero reg divisor, check 1:OK
#382/46 verifier_sdiv/SDIV64, non-zero reg divisor, check 1 @unpriv:OK
#382/47 verifier_sdiv/SDIV64, non-zero reg divisor, check 2:OK
#382/48 verifier_sdiv/SDIV64, non-zero reg divisor, check 2 @unpriv:OK
#382/49 verifier_sdiv/SDIV64, non-zero reg divisor, check 3:OK
#382/50 verifier_sdiv/SDIV64, non-zero reg divisor, check 3 @unpriv:OK
#382/51 verifier_sdiv/SDIV64, non-zero reg divisor, check 4:OK
#382/52 verifier_sdiv/SDIV64, non-zero reg divisor, check 4 @unpriv:OK
#382/53 verifier_sdiv/SDIV64, non-zero reg divisor, check 5:OK
#382/54 verifier_sdiv/SDIV64, non-zero reg divisor, check 5 @unpriv:OK
#382/55 verifier_sdiv/SDIV64, non-zero reg divisor, check 6:OK
#382/56 verifier_sdiv/SDIV64, non-zero reg divisor, check 6 @unpriv:OK
#382/57 verifier_sdiv/SMOD32, non-zero imm divisor, check 1:OK
#382/58 verifier_sdiv/SMOD32, non-zero imm divisor, check 1 @unpriv:OK
#382/59 verifier_sdiv/SMOD32, non-zero imm divisor, check 2:OK
#382/60 verifier_sdiv/SMOD32, non-zero imm divisor, check 2 @unpriv:OK
#382/61 verifier_sdiv/SMOD32, non-zero imm divisor, check 3:OK
#382/62 verifier_sdiv/SMOD32, non-zero imm divisor, check 3 @unpriv:OK
#382/63 verifier_sdiv/SMOD32, non-zero imm divisor, check 4:OK
#382/64 verifier_sdiv/SMOD32, non-zero imm divisor, check 4 @unpriv:OK
#382/65 verifier_sdiv/SMOD32, non-zero imm divisor, check 5:OK
#382/66 verifier_sdiv/SMOD32, non-zero imm divisor, check 5 @unpriv:OK
#382/67 verifier_sdiv/SMOD32, non-zero imm divisor, check 6:OK
#382/68 verifier_sdiv/SMOD32, non-zero imm divisor, check 6 @unpriv:OK
#382/69 verifier_sdiv/SMOD32, non-zero reg divisor, check 1:OK
#382/70 verifier_sdiv/SMOD32, non-zero reg divisor, check 1 @unpriv:OK
#382/71 verifier_sdiv/SMOD32, non-zero reg divisor, check 2:OK
#382/72 verifier_sdiv/SMOD32, non-zero reg divisor, check 2 @unpriv:OK
#382/73 verifier_sdiv/SMOD32, non-zero reg divisor, check 3:OK
#382/74 verifier_sdiv/SMOD32, non-zero reg divisor, check 3 @unpriv:OK
#382/75 verifier_sdiv/SMOD32, non-zero reg divisor, check 4:OK
#382/76 verifier_sdiv/SMOD32, non-zero reg divisor, check 4 @unpriv:OK
#382/77 verifier_sdiv/SMOD32, non-zero reg divisor, check 5:OK
#382/78 verifier_sdiv/SMOD32, non-zero reg divisor, check 5 @unpriv:OK
#382/79 verifier_sdiv/SMOD32, non-zero reg divisor, check 6:OK
#382/80 verifier_sdiv/SMOD32, non-zero reg divisor, check 6 @unpriv:OK
#382/81 verifier_sdiv/SMOD64, non-zero imm divisor, check 1:OK
#382/82 verifier_sdiv/SMOD64, non-zero imm divisor, check 1 @unpriv:OK
#382/83 verifier_sdiv/SMOD64, non-zero imm divisor, check 2:OK
#382/84 verifier_sdiv/SMOD64, non-zero imm divisor, check 2 @unpriv:OK
#382/85 verifier_sdiv/SMOD64, non-zero imm divisor, check 3:OK
#382/86 verifier_sdiv/SMOD64, non-zero imm divisor, check 3 @unpriv:OK
#382/87 verifier_sdiv/SMOD64, non-zero imm divisor, check 4:OK
#382/88 verifier_sdiv/SMOD64, non-zero imm divisor, check 4 @unpriv:OK
#382/89 verifier_sdiv/SMOD64, non-zero imm divisor, check 5:OK
#382/90 verifier_sdiv/SMOD64, non-zero imm divisor, check 5 @unpriv:OK
#382/91 verifier_sdiv/SMOD64, non-zero imm divisor, check 6:OK
#382/92 verifier_sdiv/SMOD64, non-zero imm divisor, check 6 @unpriv:OK
#382/93 verifier_sdiv/SMOD64, non-zero imm divisor, check 7:OK
#382/94 verifier_sdiv/SMOD64, non-zero imm divisor, check 7 @unpriv:OK
#382/95 verifier_sdiv/SMOD64, non-zero imm divisor, check 8:OK
#382/96 verifier_sdiv/SMOD64, non-zero imm divisor, check 8 @unpriv:OK
#382/97 verifier_sdiv/SMOD64, non-zero reg divisor, check 1:OK
#382/98 verifier_sdiv/SMOD64, non-zero reg divisor, check 1 @unpriv:OK
#382/99 verifier_sdiv/SMOD64, non-zero reg divisor, check 2:OK
#382/100 verifier_sdiv/SMOD64, non-zero reg divisor, check 2 @unpriv:OK
#382/101 verifier_sdiv/SMOD64, non-zero reg divisor, check 3:OK
#382/102 verifier_sdiv/SMOD64, non-zero reg divisor, check 3 @unpriv:OK
#382/103 verifier_sdiv/SMOD64, non-zero reg divisor, check 4:OK
#382/104 verifier_sdiv/SMOD64, non-zero reg divisor, check 4 @unpriv:OK
#382/105 verifier_sdiv/SMOD64, non-zero reg divisor, check 5:OK
#382/106 verifier_sdiv/SMOD64, non-zero reg divisor, check 5 @unpriv:OK
#382/107 verifier_sdiv/SMOD64, non-zero reg divisor, check 6:OK
#382/108 verifier_sdiv/SMOD64, non-zero reg divisor, check 6 @unpriv:OK
#382/109 verifier_sdiv/SMOD64, non-zero reg divisor, check 7:OK
#382/110 verifier_sdiv/SMOD64, non-zero reg divisor, check 7 @unpriv:OK
#382/111 verifier_sdiv/SMOD64, non-zero reg divisor, check 8:OK
#382/112 verifier_sdiv/SMOD64, non-zero reg divisor, check 8 @unpriv:OK
#382/113 verifier_sdiv/SDIV32, zero divisor:OK
#382/114 verifier_sdiv/SDIV32, zero divisor @unpriv:OK
#382/115 verifier_sdiv/SDIV64, zero divisor:OK
#382/116 verifier_sdiv/SDIV64, zero divisor @unpriv:OK
#382/117 verifier_sdiv/SMOD32, zero divisor:OK
#382/118 verifier_sdiv/SMOD32, zero divisor @unpriv:OK
#382/119 verifier_sdiv/SMOD64, zero divisor:OK
#382/120 verifier_sdiv/SMOD64, zero divisor @unpriv:OK
#382 verifier_sdiv:OK
Summary: 5/163 PASSED, 0 SKIPPED, 0 FAILED
As the selftests don't compile for 32-bit architectures without
modifications due to long being 32-bit,
I have added new tests to lib/test_bpf.c for cpuv4 insns, all are passing:
test_bpf: Summary: 1052 PASSED, 0 FAILED, [891/1040 JIT'ed]
test_bpf: test_tail_calls: Summary: 10 PASSED, 0 FAILED, [10/10 JIT'ed]
test_bpf: test_skb_segment: Summary: 2 PASSED, 0 FAILED
[1] https://lore.kernel.org/all/mb61p5y4u3ptd.fsf@amazon.com/
====================
Link: https://lore.kernel.org/r/20230907230550.1417590-1-puranjay12@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 280 | ||||
-rw-r--r-- | arch/arm/net/bpf_jit_32.h | 4 |
2 files changed, 264 insertions, 20 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6a1c9fca5260..1d672457d02f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -2,6 +2,7 @@ /* * Just-In-Time compiler for eBPF filters on 32bit ARM * + * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com> * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> */ @@ -15,6 +16,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/if_vlan.h> +#include <linux/math64.h> #include <asm/cacheflush.h> #include <asm/hwcap.h> @@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor) return dividend % divisor; } +static s32 jit_sdiv32(s32 dividend, s32 divisor) +{ + return dividend / divisor; +} + +static s32 jit_smod32(s32 dividend, s32 divisor) +{ + return dividend % divisor; +} + +/* Wrappers for 64-bit div/mod */ +static u64 jit_udiv64(u64 dividend, u64 divisor) +{ + return div64_u64(dividend, divisor); +} + +static u64 jit_mod64(u64 dividend, u64 divisor) +{ + u64 rem; + + div64_u64_rem(dividend, divisor, &rem); + return rem; +} + +static s64 jit_sdiv64(s64 dividend, s64 divisor) +{ + return div64_s64(dividend, divisor); +} + +static s64 jit_smod64(s64 dividend, s64 divisor) +{ + u64 q; + + q = div64_s64(dividend, divisor); + + return dividend - q * divisor; +} + static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { inst |= (cond << 28); @@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) +#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off) +#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off) + #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) @@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from - 2; } -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign) { const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; + u32 dst; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { - if (op == BPF_DIV) - emit(ARM_UDIV(rd, rm, rn), ctx); - else { - emit(ARM_UDIV(ARM_IP, rm, rn), ctx); + if (op == BPF_DIV) { + emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx); + } else { + emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx); emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); } return; @@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); /* Call appropriate function */ - emit_mov_i(ARM_IP, op == BPF_DIV ? - (u32)jit_udiv32 : (u32)jit_mod32, ctx); + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv32; + else + dst = (u32)jit_smod32; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv32; + else + dst = (u32)jit_mod32; + } + + emit_mov_i(ARM_IP, dst, ctx); emit_blx_r(ARM_IP, ctx); /* Restore caller-saved registers from stack */ @@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } +static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx, + u8 op, u8 sign) +{ + u32 dst; + + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK), ctx); + + /* + * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in + * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, + * we can recover them later after returning from the function call. + */ + if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { + /* + * Move Rm to {R1, R0} if it is not already there. + */ + if (rm[1] != ARM_R0) { + if (rn[1] == ARM_R0) + emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_MOV_R(ARM_R1, rm[0]), ctx); + emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); + if (rn[1] == ARM_R0) { + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + goto cont; + } + } + /* + * Move Rn to {R3, R2} if it is not already there. + */ + if (rn[1] != ARM_R2) { + emit(ARM_MOV_R(ARM_R3, rn[0]), ctx); + emit(ARM_MOV_R(ARM_R2, rn[1]), ctx); + } + } + +cont: + + /* Call appropriate function */ + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv64; + else + dst = (u32)jit_smod64; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv64; + else + dst = (u32)jit_mod64; + } + + emit_mov_i(ARM_IP, dst, ctx); + emit_blx_r(ARM_IP, ctx); + + /* Save return value */ + if (rd[1] != ARM_R0) { + emit(ARM_MOV_R(rd[0], ARM_R1), ctx); + emit(ARM_MOV_R(rd[1], ARM_R0), ctx); + } + + /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ + if (rd[1] != ARM_R0 && rd[1] != ARM_R2) { + emit(ARM_POP(CALLER_MASK), ctx); + } else if (rd[1] != ARM_R0) { + emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + } else { + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + } +} + /* Is the translated BPF register on stack? */ static bool is_stacked(s8 reg) { @@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, +static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); + if (off && off != 32) { + emit(ARM_LSL_I(rt, rt, 32 - off), ctx); + emit(ARM_ASR_I(rt, rt, 32 - off), ctx); + } arm_bpf_put_reg32(dst, rt, ctx); } @@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, ctx); - emit_a32_mov_r(dst_hi, src_hi, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_hi, src_hi, 0, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], } } +/* dst = (signed)src */ +static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rt; + + rt = arm_bpf_get_reg64(dst, tmp, ctx); + + emit_a32_mov_r(dst_lo, src_lo, off, ctx); + if (!is64) { + if (!ctx->prog->aux->verifier_zext) + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); + } else { + emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + } +} + /* Shift operations */ static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { @@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size) return -off_max <= off && off <= off_max; } +static bool is_ldst_imm8(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + off_max = 0xff; + break; + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], s16 off, struct jit_ctx *ctx, const u8 sz){ @@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, arm_bpf_put_reg64(dst, rd, ctx); } +/* dst = *(signed size*)(src + off) */ +static inline void emit_ldsx_r(const s8 dst[], const s8 src, + s16 off, struct jit_ctx *ctx, const u8 sz){ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + s8 rm = src; + int add_off; + + if (!is_ldst_imm8(off, sz)) { + /* + * offset does not fit in the load/store immediate, + * construct an ADD instruction to apply the offset. + */ + add_off = imm8m(off); + if (add_off > 0) { + emit(ARM_ADD_I(tmp[0], src, add_off), ctx); + rm = tmp[0]; + } else { + emit_a32_mov_i(tmp[0], off, ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); + rm = tmp[0]; + } + off = 0; + } + + switch (sz) { + case BPF_B: + /* Load a Byte with sign extension*/ + emit(ARM_LDRSB_I(rd[1], rm, off), ctx); + break; + case BPF_H: + /* Load a HalfWord with sign extension*/ + emit(ARM_LDRSH_I(rd[1], rm, off), ctx); + break; + case BPF_W: + /* Load a Word*/ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + break; + } + /* Carry the sign extension to upper 32 bits */ + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); + arm_bpf_put_reg64(dst, rd, ctx); +} + /* Arithmatic Operation */ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op, @@ -1385,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_a32_mov_i(dst_hi, 0, ctx); break; } - emit_a32_mov_r64(is64, dst, src, ctx); + if (insn->off) + emit_a32_movsx_r64(is64, insn->off, dst, src, ctx); + else + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ @@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rt = src_lo; break; } - emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off); arm_bpf_put_reg32(dst_lo, rd_lo, ctx); if (!ctx->prog->aux->verifier_zext) emit_a32_mov_i(dst_hi, 0, ctx); @@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: - goto notyet; + rd = arm_bpf_get_reg64(dst, tmp2, ctx); + switch (BPF_SRC(code)) { + case BPF_X: + rs = arm_bpf_get_reg64(src, tmp, ctx); + break; + case BPF_K: + rs = tmp; + emit_a32_mov_se_i64(is64, rs, imm, ctx); + break; + } + emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off); + arm_bpf_put_reg64(dst, rd, ctx); + break; /* dst = dst << imm */ /* dst = dst >> imm */ /* dst = dst >> imm (signed) */ @@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* dst = htole(dst) */ /* dst = htobe(dst) */ - case BPF_ALU | BPF_END | BPF_FROM_LE: - case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ + case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */ + /* dst = bswap(dst) */ + case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ rd = arm_bpf_get_reg64(dst, tmp, ctx); - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64) goto emit_bswap_uxt; switch (imm) { case 16: @@ -1603,8 +1831,15 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: + /* LDSX: dst = *(signed size *)(src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); + if (BPF_MODE(insn->code) == BPF_MEMSX) + emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code)); + else + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* speculation barrier */ case BPF_ST | BPF_NOSPEC: @@ -1761,10 +1996,15 @@ go_jmp: break; /* JMP OFF */ case BPF_JMP | BPF_JA: + case BPF_JMP32 | BPF_JA: { - if (off == 0) + if (BPF_CLASS(code) == BPF_JMP32 && imm != 0) + jmp_offset = bpf2a32_offset(i + imm, i, ctx); + else if (BPF_CLASS(code) == BPF_JMP && off != 0) + jmp_offset = bpf2a32_offset(i + off, i, ctx); + else break; - jmp_offset = bpf2a32_offset(i+off, i, ctx); + check_imm24(jmp_offset); emit(ARM_B(jmp_offset), ctx); break; diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index e0b593a1498d..438f0e1f91a0 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -79,9 +79,11 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 +#define ARM_INST_LDRSB_I 0x015000d0 #define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 +#define ARM_INST_LDRSH_I 0x015000f0 #define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 @@ -137,6 +139,7 @@ #define ARM_INST_TST_I 0x03100000 #define ARM_INST_UDIV 0x0730f010 +#define ARM_INST_SDIV 0x0710f010 #define ARM_INST_UMULL 0x00800090 @@ -265,6 +268,7 @@ #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) #define ARM_UDIV(rd, rn, rm) (ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8) +#define ARM_SDIV(rd, rn, rm) (ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8) #define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \ | (rd_lo) << 12 | (rm) << 8 | rn) |