diff options
| author | Luke Nelson <lukenels@cs.washington.edu> | 2020-05-08 11:15:45 -0700 | 
|---|---|---|
| committer | Will Deacon <will@kernel.org> | 2020-05-11 12:21:39 +0100 | 
| commit | fd49591cb49b72abd1b665222a635ccb17df7923 (patch) | |
| tree | bfc1029abeee3aff6580e3346a49f81436682794 | |
| parent | 579d1b3faa3735e781ff74aac0afd598515dbc63 (diff) | |
bpf, arm64: Optimize AND,OR,XOR,JSET BPF_K using arm64 logical immediates
The current code for BPF_{AND,OR,XOR,JSET} BPF_K loads the immediate to
a temporary register before use.
This patch changes the code to avoid using a temporary register
when the BPF immediate is encodable using an arm64 logical immediate
instruction. If the encoding fails (due to the immediate not being
encodable), it falls back to using a temporary register.
Example of generated code for BPF_ALU32_IMM(BPF_AND, R0, 0x80000001):
without optimization:
  24: mov  w10, #0x8000ffff
  28: movk w10, #0x1
  2c: and  w7, w7, w10
with optimization:
  24: and  w7, w7, #0x80000001
Since the encoding process is quite complex, the JIT reuses existing
functionality in arch/arm64/kernel/insn.c for encoding logical immediates
rather than duplicate it in the JIT.
Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20200508181547.24783-3-luke.r.nels@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
| -rw-r--r-- | arch/arm64/net/bpf_jit.h | 14 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 37 | 
2 files changed, 43 insertions, 8 deletions
| diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index eb73f9f72c46..f36a779949e6 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -189,4 +189,18 @@  /* Rn & Rm; set condition flags */  #define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm) +/* Logical (immediate) */ +#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \ +	u64 imm64 = (sf) ? (u64)imm : (u64)(u32)imm; \ +	aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_##type, \ +		A64_VARIANT(sf), Rn, Rd, imm64); \ +}) +/* Rd = Rn OP imm */ +#define A64_AND_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND) +#define A64_ORR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, ORR) +#define A64_EOR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, EOR) +#define A64_ANDS_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND_SETFLAGS) +/* Rn & imm; set condition flags */ +#define A64_TST_I(sf, Rn, imm) A64_ANDS_I(sf, A64_ZR, Rn, imm) +  #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc79de0c794..083e5d8a5e2c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -356,6 +356,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,  	const bool isdw = BPF_SIZE(code) == BPF_DW;  	u8 jmp_cond, reg;  	s32 jmp_offset; +	u32 a64_insn;  #define check_imm(bits, imm) do {				\  	if ((((imm) > 0) && ((imm) >> (bits))) ||		\ @@ -488,18 +489,33 @@ emit_bswap_uxt:  		break;  	case BPF_ALU | BPF_AND | BPF_K:  	case BPF_ALU64 | BPF_AND | BPF_K: -		emit_a64_mov_i(is64, tmp, imm, ctx); -		emit(A64_AND(is64, dst, dst, tmp), ctx); +		a64_insn = A64_AND_I(is64, dst, dst, imm); +		if (a64_insn != AARCH64_BREAK_FAULT) { +			emit(a64_insn, ctx); +		} else { +			emit_a64_mov_i(is64, tmp, imm, ctx); +			emit(A64_AND(is64, dst, dst, tmp), ctx); +		}  		break;  	case BPF_ALU | BPF_OR | BPF_K:  	case BPF_ALU64 | BPF_OR | BPF_K: -		emit_a64_mov_i(is64, tmp, imm, ctx); -		emit(A64_ORR(is64, dst, dst, tmp), ctx); +		a64_insn = A64_ORR_I(is64, dst, dst, imm); +		if (a64_insn != AARCH64_BREAK_FAULT) { +			emit(a64_insn, ctx); +		} else { +			emit_a64_mov_i(is64, tmp, imm, ctx); +			emit(A64_ORR(is64, dst, dst, tmp), ctx); +		}  		break;  	case BPF_ALU | BPF_XOR | BPF_K:  	case BPF_ALU64 | BPF_XOR | BPF_K: -		emit_a64_mov_i(is64, tmp, imm, ctx); -		emit(A64_EOR(is64, dst, dst, tmp), ctx); +		a64_insn = A64_EOR_I(is64, dst, dst, imm); +		if (a64_insn != AARCH64_BREAK_FAULT) { +			emit(a64_insn, ctx); +		} else { +			emit_a64_mov_i(is64, tmp, imm, ctx); +			emit(A64_EOR(is64, dst, dst, tmp), ctx); +		}  		break;  	case BPF_ALU | BPF_MUL | BPF_K:  	case BPF_ALU64 | BPF_MUL | BPF_K: @@ -628,8 +644,13 @@ emit_cond_jmp:  		goto emit_cond_jmp;  	case BPF_JMP | BPF_JSET | BPF_K:  	case BPF_JMP32 | BPF_JSET | BPF_K: -		emit_a64_mov_i(is64, tmp, imm, ctx); -		emit(A64_TST(is64, dst, tmp), ctx); +		a64_insn = A64_TST_I(is64, dst, imm); +		if (a64_insn != AARCH64_BREAK_FAULT) { +			emit(a64_insn, ctx); +		} else { +			emit_a64_mov_i(is64, tmp, imm, ctx); +			emit(A64_TST(is64, dst, tmp), ctx); +		}  		goto emit_cond_jmp;  	/* function call */  	case BPF_JMP | BPF_CALL: | 
