diff options
Diffstat (limited to 'arch/powerpc/net')
-rw-r--r-- | arch/powerpc/net/Makefile | 6 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit.h | 64 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit32.h | 139 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit64.h | 21 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_asm.S | 226 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp.c | 782 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp32.c | 1100 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 295 |
8 files changed, 1366 insertions, 1267 deletions
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index c2dec3a68d4c..8e60af32e51e 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,8 +2,4 @@ # # Arch-specific network modules # -ifdef CONFIG_PPC64 -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o -else -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o -endif +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d0a67a1bbaf1..99fad093f43e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -26,6 +26,9 @@ /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +/* blr; (unconditional 'branch' with link) to absolute address */ +#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ + (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ (((cond) & 0x3ff) << 16) | \ @@ -42,6 +45,10 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) +#ifdef CONFIG_PPC32 +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) +#endif + #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -108,6 +115,63 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) +#define SEEN_FUNC 0x20000000 /* might call external helpers */ +#define SEEN_STACK 0x40000000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ + +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ + +#ifdef CONFIG_PPC64 +extern const int b2p[MAX_BPF_JIT_REG + 2]; +#else +extern const int b2p[MAX_BPF_JIT_REG + 1]; +#endif + +struct codegen_context { + /* + * This is used to track register usage as well + * as calls to external helpers. + * - register usage is tracked with corresponding + * bits (r3-r31) + * - rest of the bits can be used to track other + * things -- for now, we use bits 0 to 2 + * encoded in SEEN_* macros above + */ + unsigned int seen; + unsigned int idx; + unsigned int stack_size; + int b2p[ARRAY_SIZE(b2p)]; +}; + +static inline void bpf_flush_icache(void *start, void *end) +{ + smp_wmb(); /* smp write barrier */ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) +{ + return ctx->seen & (1 << (31 - i)); +} + +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen |= 1 << (31 - i); +} + +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen &= ~(1 << (31 - i)); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass); +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_realloc_regs(struct codegen_context *ctx); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h deleted file mode 100644 index 448dfd4d98e1..000000000000 --- a/arch/powerpc/net/bpf_jit32.h +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit32.h: BPF JIT compiler for PPC - * - * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation - * - * Split from bpf_jit.h - */ -#ifndef _BPF_JIT32_H -#define _BPF_JIT32_H - -#include <asm/asm-compat.h> -#include "bpf_jit.h" - -#ifdef CONFIG_PPC64 -#define BPF_PPC_STACK_R3_OFF 48 -#define BPF_PPC_STACK_LOCALS 32 -#define BPF_PPC_STACK_BASIC (48+64) -#define BPF_PPC_STACK_SAVE (18*8) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (48+64) -#else -#define BPF_PPC_STACK_R3_OFF 24 -#define BPF_PPC_STACK_LOCALS 16 -#define BPF_PPC_STACK_BASIC (24+32) -#define BPF_PPC_STACK_SAVE (18*4) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (24+32) -#endif - -#define REG_SZ (BITS_PER_LONG/8) - -/* - * Generated code register usage: - * - * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: - * - * skb r3 (Entry parameter) - * A register r4 - * X register r5 - * addr param r6 - * r7-r10 scratch - * skb->data r14 - * skb headlen r15 (skb->len - skb->data_len) - * m[0] r16 - * m[...] ... - * m[15] r31 - */ -#define r_skb 3 -#define r_ret 3 -#define r_A 4 -#define r_X 5 -#define r_addr 6 -#define r_scratch1 7 -#define r_scratch2 8 -#define r_D 14 -#define r_HL 15 -#define r_M 16 - -#ifndef __ASSEMBLY__ - -/* - * Assembly helpers from arch/powerpc/net/bpf_jit.S: - */ -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func[], func##_negative_offset[], func##_positive_offset[] - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); -DECLARE_LOAD_FUNC(sk_load_byte_msh); - -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) - -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) - -#ifdef CONFIG_PPC64 -#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) -#else -#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0) -#endif - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC64 -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \ - PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ - } while (0) -#else -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \ - PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ - } while(0) -#endif -#else -#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) -#endif - -#define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) -#ifdef __LITTLE_ENDIAN__ -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) -#else -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) -#endif - -#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) -#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) - -#define SEEN_DATAREF 0x10000 /* might call external helpers */ -#define SEEN_XREG 0x20000 /* X reg is used */ -#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary - * storage */ -#define SEEN_MEM_MSK 0x0ffff - -struct codegen_context { - unsigned int seen; - unsigned int idx; - int pc_ret0; /* bpf index of first RET #0 instruction (if any) */ -}; - -#endif - -#endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 2e33c6673ff9..7b713edfa7e2 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -39,7 +39,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ -static const int b2p[] = { +const int b2p[MAX_BPF_JIT_REG + 2] = { /* function return value */ [BPF_REG_0] = 8, /* function arguments */ @@ -86,25 +86,6 @@ static const int b2p[] = { } while(0) #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) -#define SEEN_FUNC 0x1000 /* might call external helpers */ -#define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x4000 /* uses tail calls */ - -struct codegen_context { - /* - * This is used to track register usage as well - * as calls to external helpers. - * - register usage is tracked with corresponding - * bits (r3-r10 and r27-r31) - * - rest of the bits can be used to track other - * things -- for now, we use bits 16 to 23 - * encoded in SEEN_* macros above - */ - unsigned int seen; - unsigned int idx; - unsigned int stack_size; -}; - #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S deleted file mode 100644 index 2f5030d8383f..000000000000 --- a/arch/powerpc/net/bpf_jit_asm.S +++ /dev/null @@ -1,226 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* bpf_jit.S: Packet/header access helper functions - * for PPC64 BPF compiler. - * - * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation - */ - -#include <asm/ppc_asm.h> -#include <asm/asm-compat.h> -#include "bpf_jit32.h" - -/* - * All of these routines are called directly from generated code, - * whose register usage is: - * - * r3 skb - * r4,r5 A,X - * r6 *** address parameter to helper *** - * r7-r10 scratch - * r14 skb->data - * r15 skb headlen - * r16-31 M[] - */ - -/* - * To consider: These helpers are so small it could be better to just - * generate them inline. Inline code can do the simple headlen check - * then branch directly to slow_path_XXX if required. (In fact, could - * load a spare GPR with the address of slow_path_generic and pass size - * as an argument, making the call site a mtlr, li and bllr.) - */ - .globl sk_load_word -sk_load_word: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_word_neg - .globl sk_load_word_positive_offset -sk_load_word_positive_offset: - /* Are we accessing past headlen? */ - subi r_scratch1, r_HL, 4 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ -#ifdef __LITTLE_ENDIAN__ - lwbrx r_A, r_D, r_addr -#else - lwzx r_A, r_D, r_addr -#endif - blr /* Return success, cr0 != LT */ - - .globl sk_load_half -sk_load_half: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_half_neg - .globl sk_load_half_positive_offset -sk_load_half_positive_offset: - subi r_scratch1, r_HL, 2 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_half -#ifdef __LITTLE_ENDIAN__ - lhbrx r_A, r_D, r_addr -#else - lhzx r_A, r_D, r_addr -#endif - blr - - .globl sk_load_byte -sk_load_byte: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_neg - .globl sk_load_byte_positive_offset -sk_load_byte_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte - lbzx r_A, r_D, r_addr - blr - -/* - * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) - * r_addr is the offset value - */ - .globl sk_load_byte_msh -sk_load_byte_msh: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_msh_neg - .globl sk_load_byte_msh_positive_offset -sk_load_byte_msh_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte_msh - lbzx r_X, r_D, r_addr - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to skb_copy_bits: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - addi r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* R3 = 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPI r3, 0; \ - blt bpf_error; /* cr0 = LT */ \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - lwz r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1) - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte_msh: - bpf_slow_path_common(1) - lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to bpf_internal_load_pointer_neg_helper: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - /* R3 != 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPLI r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ \ - mr r_addr, r3; \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_word_negative_offset -sk_load_word_negative_offset: - sk_negative_common(4) - lwz r_A, 0(r_addr) - blr - -bpf_slow_path_half_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_half_negative_offset -sk_load_half_negative_offset: - sk_negative_common(2) - lhz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_negative_offset -sk_load_byte_negative_offset: - sk_negative_common(1) - lbz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_msh_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_msh_negative_offset -sk_load_byte_msh_negative_offset: - sk_negative_common(1) - lbz r_X, 0(r_addr) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_scratch1, -1 - PPC_LCMPI r_scratch1, 0 -bpf_error: - /* Entered with cr0 = lt */ - li r3, 0 - /* Generated code will 'blt epilogue', returning 0. */ - blr diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e809cb5a1631..798ac4350a82 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only -/* bpf_jit_comp.c: BPF JIT compiler +/* + * eBPF JIT compiler * - * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation + * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> + * IBM Corporation * - * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) - * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org> + * Based on the powerpc classic BPF JIT compiler by Matt Evans */ #include <linux/moduleloader.h> #include <asm/cacheflush.h> @@ -12,639 +13,204 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> +#include <asm/kprobes.h> +#include <linux/bpf.h> -#include "bpf_jit32.h" +#include "bpf_jit.h" -static inline void bpf_flush_icache(void *start, void *end) +static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); + memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx) +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { - int i; - const struct sock_filter *filter = fp->insns; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - /* Make stackframe */ - if (ctx->seen & SEEN_DATAREF) { - /* If we call any helpers (for loads), save LR */ - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); - - /* Back up non-volatile regs. */ - PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* - * Conditionally save regs r15-r31 as some will be used - * for M[] data. - */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_STL(i, 1, -(REG_SZ*(32-i))); - } - } - PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); - } - - if (ctx->seen & SEEN_DATAREF) { - /* - * If this filter needs to access skb data, - * prepare r_D and r_HL: - * r_HL = skb->len - skb->data_len - * r_D = skb->data - */ - PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - data_len)); - PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); - PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); - } + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; - if (ctx->seen & SEEN_XREG) { + for (i = 0; i < fp->len; i++) { /* - * TODO: Could also detect whether first instr. sets X and - * avoid this (as below, with A). + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. */ - EMIT(PPC_RAW_LI(r_X, 0)); - } - - /* make sure we dont leak kernel information to user */ - if (bpf_needs_clear_a(&filter[0])) - EMIT(PPC_RAW_LI(r_A, 0)); -} - -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - int i; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); - if (ctx->seen & SEEN_DATAREF) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); - PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* Restore any saved non-vol registers */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_LL(i, 1, -(REG_SZ*(32-i))); - } - } - } - /* The RETs have left a return value in R3. */ - - EMIT(PPC_RAW_BLR()); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - -/* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - unsigned int *addrs) -{ - const struct sock_filter *filter = fp->insns; - int flen = fp->len; - u8 *func; - unsigned int true_cond; - int i; - - /* Start of epilogue code */ - unsigned int exit_addr = addrs[flen]; - - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; - u16 code = bpf_anc_helper(&filter[i]); + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; - /* - * addrs[] maps a BPF bytecode address into a real offset from - * the start of the body code. - */ - addrs[i] = ctx->idx * 4; - - switch (code) { - /*** ALU ops ***/ - case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); - break; - case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); - break; - case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ - if (K < 32768) - EMIT(PPC_RAW_MULI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ - case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPWI(r_X, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); - EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - } else { - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); - } - break; - case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ - PPC_LI32(r_scratch2, K); - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); - EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ - if (K == 1) - break; - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_AND | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_AND | BPF_K: - if (!IMM_H(K)) - EMIT(PPC_RAW_ANDI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_OR | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_OR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_OR | BPF_K: - if (IMM_L(K)) - EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ - if (IMM_L(K)) - EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_LSH | BPF_K: - if (K == 0) - break; - else - EMIT(PPC_RAW_SLWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ - if (K == 0) - break; - else - EMIT(PPC_RAW_SRWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_NEG: - EMIT(PPC_RAW_NEG(r_A, r_A)); - break; - case BPF_RET | BPF_K: - PPC_LI32(r_ret, K); - if (!K) { - if (ctx->pc_ret0 == -1) - ctx->pc_ret0 = i; - } - /* - * If this isn't the very last instruction, branch to - * the epilogue if we've stuff to clean up. Otherwise, - * if there's nothing to tidy, just return. If we /are/ - * the last instruction, we're about to fall through to - * the epilogue to return. - */ - if (i != flen - 1) { - /* - * Note: 'seen' is properly valid only on pass - * #2. Both parts of this conditional are the - * same instruction size though, meaning the - * first pass will still correctly determine the - * code size/addresses. - */ - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_RET | BPF_A: - EMIT(PPC_RAW_MR(r_ret, r_A)); - if (i != flen - 1) { - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_MISC | BPF_TAX: /* X = A */ - EMIT(PPC_RAW_MR(r_X, r_A)); - break; - case BPF_MISC | BPF_TXA: /* A = X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MR(r_A, r_X)); - break; - - /*** Constant loads/M[] access ***/ - case BPF_LD | BPF_IMM: /* A = K */ - PPC_LI32(r_A, K); - break; - case BPF_LDX | BPF_IMM: /* X = K */ - PPC_LI32(r_X, K); - break; - case BPF_LD | BPF_MEM: /* A = mem[K] */ - EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LDX | BPF_MEM: /* X = mem[K] */ - EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_ST: /* mem[K] = A */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_STX: /* mem[K] = X */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); - ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); - break; - case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ - PPC_LWZ_OFFS(r_A, r_skb, K); - break; - case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ - PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); - break; - - /*** Ancillary info loads ***/ - case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, - protocol) != 2); - PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, - protocol)); - break; - case BPF_ANC | SKF_AD_IFINDEX: - case BPF_ANC | SKF_AD_HATYPE: - BUILD_BUG_ON(sizeof_field(struct net_device, - ifindex) != 4); - BUILD_BUG_ON(sizeof_field(struct net_device, - type) != 2); - PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - dev)); - EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - /* Exit, returning 0; first pass hits here. */ - PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - PPC_LWZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, ifindex)); - } else { - PPC_LHZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, type)); - } - - break; - case BPF_ANC | SKF_AD_MARK: - BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - mark)); - break; - case BPF_ANC | SKF_AD_RXHASH: - BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - hash)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); - - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - vlan_tci)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); - if (PKT_VLAN_PRESENT_BIT) - EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); - if (PKT_VLAN_PRESENT_BIT < 7) - EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); - break; - case BPF_ANC | SKF_AD_QUEUE: - BUILD_BUG_ON(sizeof_field(struct sk_buff, - queue_mapping) != 2); - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - queue_mapping)); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); - EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); - break; - case BPF_ANC | SKF_AD_CPU: - PPC_BPF_LOAD_CPU(r_A); - break; - /*** Absolute loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); - goto common_load; - case BPF_LD | BPF_H | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_LD | BPF_B | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - common_load: - /* Load from [K]. */ - ctx->seen |= SEEN_DATAREF; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - PPC_LI32(r_addr, K); - EMIT(PPC_RAW_BLRL()); /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in r3 + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. */ - PPC_BCC(COND_LT, exit_addr); - break; - - /*** Indirect loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_IND: - func = sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = sk_load_byte; - common_load_ind: + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + /* - * Load from [X + K]. Negative offsets are tested for - * in the helper functions. - */ - ctx->seen |= SEEN_DATAREF | SEEN_XREG; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); - EMIT(PPC_RAW_BLRL()); - /* If error, cr0.LT set */ - PPC_BCC(COND_LT, exit_addr); - break; - - case BPF_LDX | BPF_B | BPF_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - goto common_load; - break; - - /*** Jump and branches ***/ - case BPF_JMP | BPF_JA: - if (K != 0) - PPC_JMP(addrs[i + 1 + K]); - break; - - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - true_cond = COND_GT; - goto cond_branch; - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - true_cond = COND_GE; - goto cond_branch; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - true_cond = COND_EQ; - goto cond_branch; - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - true_cond = COND_NE; - cond_branch: - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - if (filter[i].jt > 0) - PPC_JMP(addrs[i + 1 + filter[i].jt]); - break; - } - - switch (code) { - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPLW(r_A, r_X)); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGE | BPF_K: - if (K < 32768) - EMIT(PPC_RAW_CMPLWI(r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - if (K < 32768) - /* PPC_ANDI is /only/ dot-form */ - EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, - r_scratch1)); - } - break; - } - /* Sometimes branches are constructed "backward", with - * the false path being the branch and true path being - * a fallthrough to the next instruction. + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. */ - if (filter[i].jt == 0) - /* Swap the sense of the branch */ - PPC_BCC(true_cond ^ COND_CMP_TRUE, - addrs[i + 1 + filter[i].jf]); - else { - PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); - if (filter[i].jf != 0) - PPC_JMP(addrs[i + 1 + filter[i].jf]); - } - break; - default: - /* The filter contains something cruel & unusual. - * We don't handle it, but also there shouldn't be - * anything missing from our list. - */ - if (printk_ratelimit()) - pr_err("BPF filter opcode %04x (@%d) unsupported\n", - filter[i].code, i); - return -ENOTSUPP; + ctx->idx = tmp_idx; } - } - /* Set end-of-body-code address for exit. */ - addrs[i] = ctx->idx * 4; return 0; } -void bpf_jit_compile(struct bpf_prog *fp) +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { - unsigned int proglen; - unsigned int alloclen; - u32 *image = NULL; + u32 proglen; + u32 alloclen; + u8 *image = NULL; u32 *code_base; - unsigned int *addrs; + u32 *addrs; + struct powerpc64_jit_data *jit_data; struct codegen_context cgctx; int pass; - int flen = fp->len; + int flen; + struct bpf_binary_header *bpf_hdr; + struct bpf_prog *org_fp = fp; + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; + bool extra_pass = false; + + if (!fp->jit_requested) + return org_fp; + + tmp_fp = bpf_jit_blind_constants(org_fp); + if (IS_ERR(tmp_fp)) + return org_fp; + + if (tmp_fp != org_fp) { + bpf_blinded = true; + fp = tmp_fp; + } + + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } - if (!bpf_jit_enable) - return; + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) - return; + if (addrs == NULL) { + fp = org_fp; + goto out_addrs; + } - /* - * There are multiple assembly passes as the generated code will change - * size as it settles down, figuring out the max branch offsets/exit - * paths required. - * - * The range of standard conditional branches is +/- 32Kbytes. Since - * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to - * finish with 8 bytes/instruction. Not feasible, so long jumps are - * used, distinct from short branches. - * - * Current: - * - * For now, both branch types assemble to 2 words (short branches padded - * with a NOP); this is less efficient, but assembly will always complete - * after exactly 3 passes: - * - * First pass: No code buffer; Program is "faux-generated" -- no code - * emitted but maximum size of output determined (and addrs[] filled - * in). Also, we note whether we use M[], whether we use skb data, etc. - * All generation choices assumed to be 'worst-case', e.g. branches all - * far (2 instructions), return path code reduction not available, etc. - * - * Second pass: Code buffer allocated with size determined previously. - * Prologue generated to support features we have seen used. Exit paths - * determined and addrs[] is filled in again, as code may be slightly - * smaller as a result. - * - * Third pass: Code generated 'for real', and branch destinations - * determined from now-accurate addrs[] map. - * - * Ideal: - * - * If we optimise this, near branches will be shorter. On the - * first assembly pass, we should err on the side of caution and - * generate the biggest code. On subsequent passes, branches will be - * generated short or long and code size will reduce. With smaller - * code, more branches may fall into the short category, and code will - * reduce more. - * - * Finally, if we see one pass generate code the same size as the - * previous pass we have converged and should now generate code for - * real. Allocating at the end will also save the memory that would - * otherwise be wasted by the (small) current code shrinkage. - * Preferably, we should do a small number of passes (e.g. 5) and if we - * haven't converged by then, get impatient and force code to generate - * as-is, even if the odd branch would be left long. The chances of a - * long jump are tiny with all but the most enormous of BPF filter - * inputs, so we should usually converge on the third pass. - */ + memset(&cgctx, 0, sizeof(struct codegen_context)); + memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + + /* Make sure that the stack is quadword aligned. */ + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - cgctx.idx = 0; - cgctx.seen = 0; - cgctx.pc_ret0 = -1; /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { /* We hit something illegal or unsupported. */ - goto out; + fp = org_fp; + goto out_addrs; + } + + /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + bpf_jit_realloc_regs(&cgctx); /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. */ - bpf_jit_build_prologue(fp, 0, &cgctx); + bpf_jit_build_prologue(0, &cgctx); bpf_jit_build_epilogue(0, &cgctx); proglen = cgctx.idx * 4; alloclen = proglen + FUNCTION_DESCR_SIZE; - image = module_alloc(alloclen); - if (!image) - goto out; - code_base = image + (FUNCTION_DESCR_SIZE/4); + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); + if (!bpf_hdr) { + fp = org_fp; + goto out_addrs; + } + +skip_init_ctx: + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; - bpf_jit_build_prologue(fp, code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_prologue(code_base, &cgctx); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) @@ -652,15 +218,15 @@ void bpf_jit_compile(struct bpf_prog *fp) proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) - /* Note that we output the base address of the code_base + /* + * Note that we output the base address of the code_base * rather than image, since opcodes are in code_base. */ bpf_jit_dump(flen, proglen, pass, code_base); - bpf_flush_icache(code_base, code_base + (proglen/4)); - -#ifdef CONFIG_PPC64 +#ifdef PPC64_ELF_ABI_v1 /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; @@ -668,16 +234,38 @@ void bpf_jit_compile(struct bpf_prog *fp) fp->bpf_func = (void *)image; fp->jited = 1; + fp->jited_len = alloclen; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } out: - kfree(addrs); - return; + if (bpf_blinded) + bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); + + return fp; } +/* Overriding bpf_jit_free() as we don't set images read-only. */ void bpf_jit_free(struct bpf_prog *fp) { + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *bpf_hdr = (void *)addr; + if (fp->jited) - module_memfree(fp->bpf_func); + bpf_jit_binary_free(bpf_hdr); bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..bbb16099e8c7 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -0,0 +1,1100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler for PPC32 + * + * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu> + * CS GROUP France + * + * Based on PPC64 eBPF JIT compiler by Naveen N. Rao + */ +#include <linux/moduleloader.h> +#include <asm/cacheflush.h> +#include <asm/asm-compat.h> +#include <linux/netdevice.h> +#include <linux/filter.h> +#include <linux/if_vlan.h> +#include <asm/kprobes.h> +#include <linux/bpf.h> + +#include "bpf_jit.h" + +/* + * Stack layout: + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 16 * 4 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 16 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers r17 to r31 (14) + tail call */ +#define BPF_PPC_STACK_SAVE (15 * 4 + 4) +/* stack frame, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) + +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +const int b2p[MAX_BPF_JIT_REG + 1] = { + /* function return value */ + [BPF_REG_0] = 12, + /* function arguments */ + [BPF_REG_1] = 4, + [BPF_REG_2] = 6, + [BPF_REG_3] = 8, + [BPF_REG_4] = 10, + [BPF_REG_5] = 22, + /* non volatile registers */ + [BPF_REG_6] = 24, + [BPF_REG_7] = 26, + [BPF_REG_8] = 28, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 18, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 20, + [TMP_REG] = 31, /* 32 bits */ +}; + +static int bpf_to_ppc(struct codegen_context *ctx, int reg) +{ + return ctx->b2p[reg]; +} + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN 17 +#define BPF_PPC_TC 16 + +static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) +{ + if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) + return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg); + + WARN(true, "BPF JIT is asking about unknown registers, will crash the stack"); + /* Use the hole we have left for alignment */ + return BPF_PPC_STACKFRAME(ctx) - 4; +} + +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ + if (ctx->seen & SEEN_FUNC) + return; + + while (ctx->seen & SEEN_NVREG_MASK && + (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { + int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); + int i; + + for (i = BPF_REG_0; i <= TMP_REG; i++) { + if (ctx->b2p[i] != old) + continue; + ctx->b2p[i] = new; + bpf_set_seen_register(ctx, new); + bpf_clear_seen_register(ctx, old); + if (i != TMP_REG) { + bpf_set_seen_register(ctx, new - 1); + bpf_clear_seen_register(ctx, old - 1); + } + break; + } + } +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + + /* + * Initialize tail_call_cnt in stack frame if we do tail calls. + * Otherwise, put in NOPs so that it can be skipped when we are + * invoked through a tail call. + */ + if (ctx->seen & SEEN_TAILCALL) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + } else { + EMIT(PPC_RAW_NOP()); + } + +#define BPF_TAILCALL_PROLOGUE_SIZE 16 + + /* + * We need a stack frame, but we don't necessarily need to + * save/restore LR unless we call other functions + */ + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MFLR(__REG_R0)); + + /* + * Back up non-volatile regs -- registers r18-r31 + */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + + /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + } + + /* Setup frame pointer to point to the bpf stack area */ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); + } + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); +} + +static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* Restore NVRs */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + + /* Tear down our stack frame */ + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_BLR()); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +{ + s32 rel = (s32)func - (s32)(image + ctx->idx); + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); + } +} + +static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +{ + /* + * By now, the eBPF program has already setup parameters in r3-r6 + * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program + * r5-r6/BPF_REG_2 - pointer to bpf_array + * r7-r8/BPF_REG_3 - index in bpf_array + */ + int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); + int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + PPC_BCC(COND_GE, out); + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + /* tail_call_cnt++; */ + EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + PPC_BCC(COND_GT, out); + + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + PPC_BCC(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_MTCTR(__REG_R3)); + + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + + /* tear restore NVRs, ... */ + bpf_jit_emit_common_epilogue(image, ctx); + + EMIT(PPC_RAW_BCTR()); + /* out: */ +} + +/* Assemble the body code between the prologue & epilogue */ +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) +{ + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; + int i, ret; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; + + for (i = 0; i < flen; i++) { + u32 code = insn[i].code; + u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg_h = dst_reg - 1; + u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg_h = src_reg - 1; + u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; + u32 true_cond; + + /* + * addrs[] maps a BPF bytecode address into a real offset from + * the start of the body code. + */ + addrs[i] = ctx->idx * 4; + + /* + * As an optimization, we note down which registers + * are used so that we can only save/restore those in our + * prologue and epilogue. We do this here regardless of whether + * the actual BPF instruction uses src/dst registers or not + * (for instance, BPF_CALL does not use them). The expectation + * is that those instructions will have src_reg/dst_reg set to + * 0. Even otherwise, we just lose some prologue/epilogue + * optimization but everything else should work without + * any issues. + */ + if (dst_reg >= 3 && dst_reg < 32) { + bpf_set_seen_register(ctx, dst_reg); + bpf_set_seen_register(ctx, dst_reg_h); + } + + if (src_reg >= 3 && src_reg < 32) { + bpf_set_seen_register(ctx, src_reg); + bpf_set_seen_register(ctx, src_reg_h); + } + + switch (code) { + /* + * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG + */ + case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ + EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + imm = -imm; + fallthrough; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ + if (IMM_HA(imm) & 0xffff) + EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm))); + if (IMM_L(imm)) + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ + imm = -imm; + fallthrough; + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) + break; + + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + } + if (imm >= 0) + EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); + else + EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ + if (!imm) { + PPC_LI32(dst_reg, 0); + PPC_LI32(dst_reg_h, 0); + break; + } + if (imm == 1) + break; + if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + } + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); + if (imm < 0) + EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ + return -EOPNOTSUPP; + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ + return -EOPNOTSUPP; + case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ + if (!imm) + return -EINVAL; + if (imm == 1) + break; + + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ + if (!imm) + return -EINVAL; + + if (!is_power_of_2((u32)imm)) { + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + } + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31)); + + break; + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ + if (!imm) + return -EINVAL; + if (imm < 0) + imm = -imm; + if (!is_power_of_2(imm)) + return -EOPNOTSUPP; + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ + if (!imm) + return -EINVAL; + if (!is_power_of_2(abs(imm))) + return -EOPNOTSUPP; + + if (imm < 0) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + imm = -imm; + } + if (imm == 1) + break; + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); + break; + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + + /* + * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH + */ + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + if (imm >= 0) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + fallthrough; + case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ + if (!IMM_H(imm)) { + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); + } else if (!IMM_L(imm)) { + EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm))); + } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, + 32 - fls(imm), 32 - ffs(imm))); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ + /* Sign-extended */ + if (imm < 0) + EMIT(PPC_RAW_LI(dst_reg_h, -1)); + fallthrough; + case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ + if (dst_reg == src_reg) { + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h)); + } + break; + case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ + if (dst_reg == src_reg) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ + if (imm < 0) + EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h)); + fallthrough; + case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm)); + else + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31)); + else + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); + break; + case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32)); + else + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + break; + + /* + * MOV + */ + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (dst_reg == src_reg) + break; + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ + /* special mov32 for zext */ + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ + PPC_LI32(dst_reg, imm); + PPC_EX32(dst_reg_h, imm); + break; + case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ + PPC_LI32(dst_reg, imm); + break; + + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* Copy 16 bits to upper part */ + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15)); + /* Rotate 8 bits right & mask */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31)); + break; + case 32: + /* + * Rotate word left by 8 bits: + * 2 bytes are already in their final position + * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) + */ + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + break; + case 64: + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 32 bits */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31)); + break; + case 32: + case 64: + /* nop */ + break; + } + break; + + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); + break; + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + + /* + * BPF_STX XADD (atomic_add) + */ + case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ + bpf_set_seen_register(ctx, tmp_reg); + /* Get offset into TMP_REG */ + EMIT(PPC_RAW_LI(tmp_reg, off)); + /* load value from memory into r0 */ + EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + /* add value from src_reg into this */ + EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + /* store result back */ + EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + /* we're done if this succeeded */ + PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + break; + + case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ + return -EOPNOTSUPP; + + /* + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + + /* + * Doubleword load + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); + PPC_LI32(dst_reg, (u32)insn[i].imm); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + break; + + /* + * Return/Exit + */ + case BPF_JMP | BPF_EXIT: + /* + * If this isn't the very last instruction, branch to + * the epilogue. If we _are_ the last instruction, + * we'll just fall through to the epilogue. + */ + if (i != flen - 1) + PPC_JMP(exit_addr); + /* else fall through to the epilogue */ + break; + + /* + * Call kernel helper or bpf function + */ + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + } + + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + break; + + /* + * Jumps and branches + */ + case BPF_JMP | BPF_JA: + PPC_JMP(addrs[i + 1 + off]); + break; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + true_cond = COND_GT; + goto cond_branch; + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + true_cond = COND_LT; + goto cond_branch; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + true_cond = COND_GE; + goto cond_branch; + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + true_cond = COND_LE; + goto cond_branch; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + true_cond = COND_EQ; + goto cond_branch; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + true_cond = COND_NE; + goto cond_branch; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + true_cond = COND_NE; + /* fallthrough; */ + +cond_branch: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSET | BPF_X: + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSET | BPF_X: { + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + /* + * Need sign-extended load, so only positive + * values can be used as imm in cmplwi + */ + if (imm >= 0 && imm < 32768) { + EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load ... but unsigned comparison */ + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load */ + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + /* + * signed comparison, so any 16-bit value + * can be used in cmpwi + */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); + } else { + /* sign-extending load */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= 0 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + if (imm < 0) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + } + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= -32768 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + } + PPC_BCC(true_cond, addrs[i + 1 + off]); + break; + + /* + * Tail call + */ + case BPF_JMP | BPF_TAIL_CALL: + ctx->seen |= SEEN_TAILCALL; + bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + break; + + default: + /* + * The filter contains something cruel & unusual. + * We don't handle it, but also there shouldn't be + * anything missing from our list. + */ + pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i); + return -EOPNOTSUPP; + } + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext && + !insn_is_zext(&insn[i + 1])) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } + + /* Set end-of-body-code address for exit. */ + addrs[i] = ctx->idx * 4; + + return 0; +} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index aaf1a887f653..57a8c1153851 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -18,27 +18,6 @@ #include "bpf_jit64.h" -static void bpf_jit_fill_ill_insns(void *area, unsigned int size) -{ - memset32(area, BREAKPOINT_INSTRUCTION, size/4); -} - -static inline void bpf_flush_icache(void *start, void *end) -{ - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) -{ - return (ctx->seen & (1 << (31 - b2p[i]))); -} - -static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) -{ - ctx->seen |= (1 << (31 - b2p[i])); -} - static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { /* @@ -47,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); } /* @@ -85,7 +64,11 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -124,11 +107,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, BPF_REG_FP)) + if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -139,7 +122,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Tear down our stack frame */ @@ -152,7 +135,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx } } -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { bpf_jit_emit_common_epilogue(image, ctx); @@ -187,8 +170,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, EMIT(PPC_RAW_BLRL()); } -static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, - u64 func) +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -289,9 +271,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 } /* Assemble the body code between the prologue & epilogue */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - u32 *addrs, bool extra_pass) +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -330,9 +311,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * any issues. */ if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) - bpf_set_seen_register(ctx, insn[i].dst_reg); + bpf_set_seen_register(ctx, dst_reg); if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) - bpf_set_seen_register(ctx, insn[i].src_reg); + bpf_set_seen_register(ctx, src_reg); switch (code) { /* @@ -1026,249 +1007,3 @@ cond_branch: return 0; } - -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) -{ - const struct bpf_insn *insn = fp->insnsi; - bool func_addr_fixed; - u64 func_addr; - u32 tmp_idx; - int i, ret; - - for (i = 0; i < fp->len; i++) { - /* - * During the extra pass, only the branch target addresses for - * the subprog calls need to be fixed. All other instructions - * can left untouched. - * - * The JITed image length does not change because we already - * ensure that the JITed instruction sequence for these calls - * are of fixed length by padding them with NOPs. - */ - if (insn[i].code == (BPF_JMP | BPF_CALL) && - insn[i].src_reg == BPF_PSEUDO_CALL) { - ret = bpf_jit_get_func_addr(fp, &insn[i], true, - &func_addr, - &func_addr_fixed); - if (ret < 0) - return ret; - - /* - * Save ctx->idx as this would currently point to the - * end of the JITed image and set it to the offset of - * the instruction sequence corresponding to the - * subprog call temporarily. - */ - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); - - /* - * Restore ctx->idx here. This is safe as the length - * of the JITed sequence remains unchanged. - */ - ctx->idx = tmp_idx; - } - } - - return 0; -} - -struct powerpc64_jit_data { - struct bpf_binary_header *header; - u32 *addrs; - u8 *image; - u32 proglen; - struct codegen_context ctx; -}; - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) -{ - u32 proglen; - u32 alloclen; - u8 *image = NULL; - u32 *code_base; - u32 *addrs; - struct powerpc64_jit_data *jit_data; - struct codegen_context cgctx; - int pass; - int flen; - struct bpf_binary_header *bpf_hdr; - struct bpf_prog *org_fp = fp; - struct bpf_prog *tmp_fp; - bool bpf_blinded = false; - bool extra_pass = false; - - if (!fp->jit_requested) - return org_fp; - - tmp_fp = bpf_jit_blind_constants(org_fp); - if (IS_ERR(tmp_fp)) - return org_fp; - - if (tmp_fp != org_fp) { - bpf_blinded = true; - fp = tmp_fp; - } - - jit_data = fp->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - fp = org_fp; - goto out; - } - fp->aux->jit_data = jit_data; - } - - flen = fp->len; - addrs = jit_data->addrs; - if (addrs) { - cgctx = jit_data->ctx; - image = jit_data->image; - bpf_hdr = jit_data->header; - proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; - extra_pass = true; - goto skip_init_ctx; - } - - addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) { - fp = org_fp; - goto out_addrs; - } - - memset(&cgctx, 0, sizeof(struct codegen_context)); - - /* Make sure that the stack is quadword aligned. */ - cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - /* We hit something illegal or unsupported. */ - fp = org_fp; - goto out_addrs; - } - - /* - * If we have seen a tail call, we need a second pass. - * This is because bpf_jit_emit_common_epilogue() is called - * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. - */ - if (cgctx.seen & SEEN_TAILCALL) { - cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - fp = org_fp; - goto out_addrs; - } - } - - /* - * Pretend to build prologue, given the features we've seen. This will - * update ctgtx.idx as it pretends to output instructions, then we can - * calculate total size from idx. - */ - bpf_jit_build_prologue(0, &cgctx); - bpf_jit_build_epilogue(0, &cgctx); - - proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; - - bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, - bpf_jit_fill_ill_insns); - if (!bpf_hdr) { - fp = org_fp; - goto out_addrs; - } - -skip_init_ctx: - code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); - - if (extra_pass) { - /* - * Do not touch the prologue and epilogue as they will remain - * unchanged. Only fix the branch target address for subprog - * calls in the body. - * - * This does not change the offsets and lengths of the subprog - * call instruction sequences and hence, the size of the JITed - * image as well. - */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); - - /* There is no need to perform the usual passes. */ - goto skip_codegen_passes; - } - - /* Code generation passes 1-2 */ - for (pass = 1; pass < 3; pass++) { - /* Now build the prologue, body code & epilogue for real. */ - cgctx.idx = 0; - bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); - bpf_jit_build_epilogue(code_base, &cgctx); - - if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, - proglen - (cgctx.idx * 4), cgctx.seen); - } - -skip_codegen_passes: - if (bpf_jit_enable > 1) - /* - * Note that we output the base address of the code_base - * rather than image, since opcodes are in code_base. - */ - bpf_jit_dump(flen, proglen, pass, code_base); - -#ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; -#endif - - fp->bpf_func = (void *)image; - fp->jited = 1; - fp->jited_len = alloclen; - - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); - if (!fp->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(fp, addrs); -out_addrs: - kfree(addrs); - kfree(jit_data); - fp->aux->jit_data = NULL; - } else { - jit_data->addrs = addrs; - jit_data->ctx = cgctx; - jit_data->proglen = proglen; - jit_data->image = image; - jit_data->header = bpf_hdr; - } - -out: - if (bpf_blinded) - bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); - - return fp; -} - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} |