summaryrefslogtreecommitdiff
path: root/arch/powerpc/net
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/net')
-rw-r--r--arch/powerpc/net/Makefile6
-rw-r--r--arch/powerpc/net/bpf_jit.h64
-rw-r--r--arch/powerpc/net/bpf_jit32.h139
-rw-r--r--arch/powerpc/net/bpf_jit64.h21
-rw-r--r--arch/powerpc/net/bpf_jit_asm.S226
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c782
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c1100
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c295
8 files changed, 1366 insertions, 1267 deletions
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index c2dec3a68d4c..8e60af32e51e 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,8 +2,4 @@
#
# Arch-specific network modules
#
-ifdef CONFIG_PPC64
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
-else
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
-endif
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d0a67a1bbaf1..99fad093f43e 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -26,6 +26,9 @@
/* Long jump; (unconditional 'branch') */
#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \
(((dest) - (ctx->idx * 4)) & 0x03fffffc))
+/* blr; (unconditional 'branch' with link) to absolute address */
+#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \
+ (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
/* "cond" here covers BO:BI fields. */
#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \
(((cond) & 0x3ff) << 16) | \
@@ -42,6 +45,10 @@
EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
} } while(0)
+#ifdef CONFIG_PPC32
+#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+#endif
+
#define PPC_LI64(d, i) do { \
if ((long)(i) >= -2147483648 && \
(long)(i) < 2147483648) \
@@ -108,6 +115,63 @@ static inline bool is_nearbranch(int offset)
#define COND_LT (CR0_LT | COND_CMP_TRUE)
#define COND_LE (CR0_GT | COND_CMP_FALSE)
+#define SEEN_FUNC 0x20000000 /* might call external helpers */
+#define SEEN_STACK 0x40000000 /* uses BPF stack */
+#define SEEN_TAILCALL 0x80000000 /* uses tail calls */
+
+#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
+
+#ifdef CONFIG_PPC64
+extern const int b2p[MAX_BPF_JIT_REG + 2];
+#else
+extern const int b2p[MAX_BPF_JIT_REG + 1];
+#endif
+
+struct codegen_context {
+ /*
+ * This is used to track register usage as well
+ * as calls to external helpers.
+ * - register usage is tracked with corresponding
+ * bits (r3-r31)
+ * - rest of the bits can be used to track other
+ * things -- for now, we use bits 0 to 2
+ * encoded in SEEN_* macros above
+ */
+ unsigned int seen;
+ unsigned int idx;
+ unsigned int stack_size;
+ int b2p[ARRAY_SIZE(b2p)];
+};
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+ smp_wmb(); /* smp write barrier */
+ flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+ return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen |= 1 << (31 - i);
+}
+
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen &= ~(1 << (31 - i));
+}
+
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+ u32 *addrs, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
+
#endif
#endif
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
deleted file mode 100644
index 448dfd4d98e1..000000000000
--- a/arch/powerpc/net/bpf_jit32.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * bpf_jit32.h: BPF JIT compiler for PPC
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- *
- * Split from bpf_jit.h
- */
-#ifndef _BPF_JIT32_H
-#define _BPF_JIT32_H
-
-#include <asm/asm-compat.h>
-#include "bpf_jit.h"
-
-#ifdef CONFIG_PPC64
-#define BPF_PPC_STACK_R3_OFF 48
-#define BPF_PPC_STACK_LOCALS 32
-#define BPF_PPC_STACK_BASIC (48+64)
-#define BPF_PPC_STACK_SAVE (18*8)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
- BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (48+64)
-#else
-#define BPF_PPC_STACK_R3_OFF 24
-#define BPF_PPC_STACK_LOCALS 16
-#define BPF_PPC_STACK_BASIC (24+32)
-#define BPF_PPC_STACK_SAVE (18*4)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
- BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (24+32)
-#endif
-
-#define REG_SZ (BITS_PER_LONG/8)
-
-/*
- * Generated code register usage:
- *
- * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
- *
- * skb r3 (Entry parameter)
- * A register r4
- * X register r5
- * addr param r6
- * r7-r10 scratch
- * skb->data r14
- * skb headlen r15 (skb->len - skb->data_len)
- * m[0] r16
- * m[...] ...
- * m[15] r31
- */
-#define r_skb 3
-#define r_ret 3
-#define r_A 4
-#define r_X 5
-#define r_addr 6
-#define r_scratch1 7
-#define r_scratch2 8
-#define r_D 14
-#define r_HL 15
-#define r_M 16
-
-#ifndef __ASSEMBLY__
-
-/*
- * Assembly helpers from arch/powerpc/net/bpf_jit.S:
- */
-#define DECLARE_LOAD_FUNC(func) \
- extern u8 func[], func##_negative_offset[], func##_positive_offset[]
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-DECLARE_LOAD_FUNC(sk_load_byte_msh);
-
-#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \
- else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \
- EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \
- else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \
- EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \
- else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \
- EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \
- else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \
- EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0)
-
-#ifdef CONFIG_PPC64
-#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
-#else
-#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0)
-#endif
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \
- PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \
- } while (0)
-#else
-#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \
- PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \
- } while(0)
-#endif
-#else
-#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0)
-#endif
-
-#define PPC_LHBRX_OFFS(r, base, i) \
- do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0)
-#ifdef __LITTLE_ENDIAN__
-#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i)
-#else
-#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
-#endif
-
-#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0)
-#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0)
-
-#define SEEN_DATAREF 0x10000 /* might call external helpers */
-#define SEEN_XREG 0x20000 /* X reg is used */
-#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
- * storage */
-#define SEEN_MEM_MSK 0x0ffff
-
-struct codegen_context {
- unsigned int seen;
- unsigned int idx;
- int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
-};
-
-#endif
-
-#endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 2e33c6673ff9..7b713edfa7e2 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -39,7 +39,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
/* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 2] = {
/* function return value */
[BPF_REG_0] = 8,
/* function arguments */
@@ -86,25 +86,6 @@ static const int b2p[] = {
} while(0)
#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
-#define SEEN_FUNC 0x1000 /* might call external helpers */
-#define SEEN_STACK 0x2000 /* uses BPF stack */
-#define SEEN_TAILCALL 0x4000 /* uses tail calls */
-
-struct codegen_context {
- /*
- * This is used to track register usage as well
- * as calls to external helpers.
- * - register usage is tracked with corresponding
- * bits (r3-r10 and r27-r31)
- * - rest of the bits can be used to track other
- * things -- for now, we use bits 16 to 23
- * encoded in SEEN_* macros above
- */
- unsigned int seen;
- unsigned int idx;
- unsigned int stack_size;
-};
-
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S
deleted file mode 100644
index 2f5030d8383f..000000000000
--- a/arch/powerpc/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,226 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* bpf_jit.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-#include "bpf_jit32.h"
-
-/*
- * All of these routines are called directly from generated code,
- * whose register usage is:
- *
- * r3 skb
- * r4,r5 A,X
- * r6 *** address parameter to helper ***
- * r7-r10 scratch
- * r14 skb->data
- * r15 skb headlen
- * r16-31 M[]
- */
-
-/*
- * To consider: These helpers are so small it could be better to just
- * generate them inline. Inline code can do the simple headlen check
- * then branch directly to slow_path_XXX if required. (In fact, could
- * load a spare GPR with the address of slow_path_generic and pass size
- * as an argument, making the call site a mtlr, li and bllr.)
- */
- .globl sk_load_word
-sk_load_word:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_word_neg
- .globl sk_load_word_positive_offset
-sk_load_word_positive_offset:
- /* Are we accessing past headlen? */
- subi r_scratch1, r_HL, 4
- PPC_LCMP r_scratch1, r_addr
- blt bpf_slow_path_word
- /* Nope, just hitting the header. cr0 here is eq or gt! */
-#ifdef __LITTLE_ENDIAN__
- lwbrx r_A, r_D, r_addr
-#else
- lwzx r_A, r_D, r_addr
-#endif
- blr /* Return success, cr0 != LT */
-
- .globl sk_load_half
-sk_load_half:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_half_neg
- .globl sk_load_half_positive_offset
-sk_load_half_positive_offset:
- subi r_scratch1, r_HL, 2
- PPC_LCMP r_scratch1, r_addr
- blt bpf_slow_path_half
-#ifdef __LITTLE_ENDIAN__
- lhbrx r_A, r_D, r_addr
-#else
- lhzx r_A, r_D, r_addr
-#endif
- blr
-
- .globl sk_load_byte
-sk_load_byte:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_byte_neg
- .globl sk_load_byte_positive_offset
-sk_load_byte_positive_offset:
- PPC_LCMP r_HL, r_addr
- ble bpf_slow_path_byte
- lbzx r_A, r_D, r_addr
- blr
-
-/*
- * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value
- */
- .globl sk_load_byte_msh
-sk_load_byte_msh:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_byte_msh_neg
- .globl sk_load_byte_msh_positive_offset
-sk_load_byte_msh_positive_offset:
- PPC_LCMP r_HL, r_addr
- ble bpf_slow_path_byte_msh
- lbzx r_X, r_D, r_addr
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-/* Call out to skb_copy_bits:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE) \
- mflr r0; \
- PPC_STL r0, PPC_LR_STKOFF(r1); \
- /* R3 goes in parameter space of caller's frame */ \
- PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- addi r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ); \
- PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
- /* R3 = r_skb, as passed */ \
- mr r4, r_addr; \
- li r6, SIZE; \
- bl skb_copy_bits; \
- nop; \
- /* R3 = 0 on success */ \
- addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
- PPC_LL r0, PPC_LR_STKOFF(r1); \
- PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- mtlr r0; \
- PPC_LCMPI r3, 0; \
- blt bpf_error; /* cr0 = LT */ \
- PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- /* Great success! */
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- /* Data value is on stack, and cr0 != LT */
- lwz r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1)
- blr
-
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_byte_msh:
- bpf_slow_path_common(1)
- lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-/* Call out to bpf_internal_load_pointer_neg_helper:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define sk_negative_common(SIZE) \
- mflr r0; \
- PPC_STL r0, PPC_LR_STKOFF(r1); \
- /* R3 goes in parameter space of caller's frame */ \
- PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
- /* R3 = r_skb, as passed */ \
- mr r4, r_addr; \
- li r5, SIZE; \
- bl bpf_internal_load_pointer_neg_helper; \
- nop; \
- /* R3 != 0 on success */ \
- addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
- PPC_LL r0, PPC_LR_STKOFF(r1); \
- PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- mtlr r0; \
- PPC_LCMPLI r3, 0; \
- beq bpf_error_slow; /* cr0 = EQ */ \
- mr r_addr, r3; \
- PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- /* Great success! */
-
-bpf_slow_path_word_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_word_negative_offset
-sk_load_word_negative_offset:
- sk_negative_common(4)
- lwz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_half_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_half_negative_offset
-sk_load_half_negative_offset:
- sk_negative_common(2)
- lhz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_byte_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_byte_negative_offset
-sk_load_byte_negative_offset:
- sk_negative_common(1)
- lbz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_byte_msh_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_byte_msh_negative_offset
-sk_load_byte_msh_negative_offset:
- sk_negative_common(1)
- lbz r_X, 0(r_addr)
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-bpf_error_slow:
- /* fabricate a cr0 = lt */
- li r_scratch1, -1
- PPC_LCMPI r_scratch1, 0
-bpf_error:
- /* Entered with cr0 = lt */
- li r3, 0
- /* Generated code will 'blt epilogue', returning 0. */
- blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index e809cb5a1631..798ac4350a82 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* bpf_jit_comp.c: BPF JIT compiler
+/*
+ * eBPF JIT compiler
*
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
*
- * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
- * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org>
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
*/
#include <linux/moduleloader.h>
#include <asm/cacheflush.h>
@@ -12,639 +13,204 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
-#include "bpf_jit32.h"
+#include "bpf_jit.h"
-static inline void bpf_flush_icache(void *start, void *end)
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
{
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
+ memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
-static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx)
+/* Fix the branch target addresses for subprog calls */
+static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+ struct codegen_context *ctx, u32 *addrs)
{
- int i;
- const struct sock_filter *filter = fp->insns;
-
- if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
- /* Make stackframe */
- if (ctx->seen & SEEN_DATAREF) {
- /* If we call any helpers (for loads), save LR */
- EMIT(PPC_INST_MFLR | __PPC_RT(R0));
- PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
-
- /* Back up non-volatile regs. */
- PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D)));
- PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL)));
- }
- if (ctx->seen & SEEN_MEM) {
- /*
- * Conditionally save regs r15-r31 as some will be used
- * for M[] data.
- */
- for (i = r_M; i < (r_M+16); i++) {
- if (ctx->seen & (1 << (i-r_M)))
- PPC_BPF_STL(i, 1, -(REG_SZ*(32-i)));
- }
- }
- PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
- }
-
- if (ctx->seen & SEEN_DATAREF) {
- /*
- * If this filter needs to access skb data,
- * prepare r_D and r_HL:
- * r_HL = skb->len - skb->data_len
- * r_D = skb->data
- */
- PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
- data_len));
- PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
- EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1));
- PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
- }
+ const struct bpf_insn *insn = fp->insnsi;
+ bool func_addr_fixed;
+ u64 func_addr;
+ u32 tmp_idx;
+ int i, ret;
- if (ctx->seen & SEEN_XREG) {
+ for (i = 0; i < fp->len; i++) {
/*
- * TODO: Could also detect whether first instr. sets X and
- * avoid this (as below, with A).
+ * During the extra pass, only the branch target addresses for
+ * the subprog calls need to be fixed. All other instructions
+ * can left untouched.
+ *
+ * The JITed image length does not change because we already
+ * ensure that the JITed instruction sequence for these calls
+ * are of fixed length by padding them with NOPs.
*/
- EMIT(PPC_RAW_LI(r_X, 0));
- }
-
- /* make sure we dont leak kernel information to user */
- if (bpf_needs_clear_a(&filter[0]))
- EMIT(PPC_RAW_LI(r_A, 0));
-}
-
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
- int i;
-
- if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
- EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME));
- if (ctx->seen & SEEN_DATAREF) {
- PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
- EMIT(PPC_RAW_MTLR(0));
- PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D)));
- PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL)));
- }
- if (ctx->seen & SEEN_MEM) {
- /* Restore any saved non-vol registers */
- for (i = r_M; i < (r_M+16); i++) {
- if (ctx->seen & (1 << (i-r_M)))
- PPC_BPF_LL(i, 1, -(REG_SZ*(32-i)));
- }
- }
- }
- /* The RETs have left a return value in R3. */
-
- EMIT(PPC_RAW_BLR());
-}
-
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx,
- unsigned int *addrs)
-{
- const struct sock_filter *filter = fp->insns;
- int flen = fp->len;
- u8 *func;
- unsigned int true_cond;
- int i;
-
- /* Start of epilogue code */
- unsigned int exit_addr = addrs[flen];
-
- for (i = 0; i < flen; i++) {
- unsigned int K = filter[i].k;
- u16 code = bpf_anc_helper(&filter[i]);
+ if (insn[i].code == (BPF_JMP | BPF_CALL) &&
+ insn[i].src_reg == BPF_PSEUDO_CALL) {
+ ret = bpf_jit_get_func_addr(fp, &insn[i], true,
+ &func_addr,
+ &func_addr_fixed);
+ if (ret < 0)
+ return ret;
- /*
- * addrs[] maps a BPF bytecode address into a real offset from
- * the start of the body code.
- */
- addrs[i] = ctx->idx * 4;
-
- switch (code) {
- /*** ALU ops ***/
- case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_ADD(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
- if (!K)
- break;
- EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K)));
- if (K >= 32768)
- EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K)));
- break;
- case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_SUB(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
- if (!K)
- break;
- EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K)));
- if (K >= 32768)
- EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K)));
- break;
- case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_MULW(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
- if (K < 32768)
- EMIT(PPC_RAW_MULI(r_A, r_A, K));
- else {
- PPC_LI32(r_scratch1, K);
- EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1));
- }
- break;
- case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
- case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_CMPWI(r_X, 0));
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
- EMIT(PPC_RAW_LI(r_ret, 0));
- PPC_JMP(exit_addr);
- }
- if (code == (BPF_ALU | BPF_MOD | BPF_X)) {
- EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X));
- EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1));
- EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
- } else {
- EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X));
- }
- break;
- case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
- PPC_LI32(r_scratch2, K);
- EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2));
- EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1));
- EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
- break;
- case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
- if (K == 1)
- break;
- PPC_LI32(r_scratch1, K);
- EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1));
- break;
- case BPF_ALU | BPF_AND | BPF_X:
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_AND(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_AND | BPF_K:
- if (!IMM_H(K))
- EMIT(PPC_RAW_ANDI(r_A, r_A, K));
- else {
- PPC_LI32(r_scratch1, K);
- EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1));
- }
- break;
- case BPF_ALU | BPF_OR | BPF_X:
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_OR(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_OR | BPF_K:
- if (IMM_L(K))
- EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K)));
- if (K >= 65536)
- EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K)));
- break;
- case BPF_ANC | SKF_AD_ALU_XOR_X:
- case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_XOR(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
- if (IMM_L(K))
- EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K)));
- if (K >= 65536)
- EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K)));
- break;
- case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_SLW(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_LSH | BPF_K:
- if (K == 0)
- break;
- else
- EMIT(PPC_RAW_SLWI(r_A, r_A, K));
- break;
- case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_SRW(r_A, r_A, r_X));
- break;
- case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
- if (K == 0)
- break;
- else
- EMIT(PPC_RAW_SRWI(r_A, r_A, K));
- break;
- case BPF_ALU | BPF_NEG:
- EMIT(PPC_RAW_NEG(r_A, r_A));
- break;
- case BPF_RET | BPF_K:
- PPC_LI32(r_ret, K);
- if (!K) {
- if (ctx->pc_ret0 == -1)
- ctx->pc_ret0 = i;
- }
- /*
- * If this isn't the very last instruction, branch to
- * the epilogue if we've stuff to clean up. Otherwise,
- * if there's nothing to tidy, just return. If we /are/
- * the last instruction, we're about to fall through to
- * the epilogue to return.
- */
- if (i != flen - 1) {
- /*
- * Note: 'seen' is properly valid only on pass
- * #2. Both parts of this conditional are the
- * same instruction size though, meaning the
- * first pass will still correctly determine the
- * code size/addresses.
- */
- if (ctx->seen)
- PPC_JMP(exit_addr);
- else
- EMIT(PPC_RAW_BLR());
- }
- break;
- case BPF_RET | BPF_A:
- EMIT(PPC_RAW_MR(r_ret, r_A));
- if (i != flen - 1) {
- if (ctx->seen)
- PPC_JMP(exit_addr);
- else
- EMIT(PPC_RAW_BLR());
- }
- break;
- case BPF_MISC | BPF_TAX: /* X = A */
- EMIT(PPC_RAW_MR(r_X, r_A));
- break;
- case BPF_MISC | BPF_TXA: /* A = X */
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_MR(r_A, r_X));
- break;
-
- /*** Constant loads/M[] access ***/
- case BPF_LD | BPF_IMM: /* A = K */
- PPC_LI32(r_A, K);
- break;
- case BPF_LDX | BPF_IMM: /* X = K */
- PPC_LI32(r_X, K);
- break;
- case BPF_LD | BPF_MEM: /* A = mem[K] */
- EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf)));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_LDX | BPF_MEM: /* X = mem[K] */
- EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf)));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_ST: /* mem[K] = A */
- EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_STX: /* mem[K] = X */
- EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X));
- ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
- BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
- break;
- case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
- PPC_LWZ_OFFS(r_A, r_skb, K);
- break;
- case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
- PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
- break;
-
- /*** Ancillary info loads ***/
- case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(sizeof_field(struct sk_buff,
- protocol) != 2);
- PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- protocol));
- break;
- case BPF_ANC | SKF_AD_IFINDEX:
- case BPF_ANC | SKF_AD_HATYPE:
- BUILD_BUG_ON(sizeof_field(struct net_device,
- ifindex) != 4);
- BUILD_BUG_ON(sizeof_field(struct net_device,
- type) != 2);
- PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
- dev));
- EMIT(PPC_RAW_CMPDI(r_scratch1, 0));
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- /* Exit, returning 0; first pass hits here. */
- PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12);
- EMIT(PPC_RAW_LI(r_ret, 0));
- PPC_JMP(exit_addr);
- }
- if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
- PPC_LWZ_OFFS(r_A, r_scratch1,
- offsetof(struct net_device, ifindex));
- } else {
- PPC_LHZ_OFFS(r_A, r_scratch1,
- offsetof(struct net_device, type));
- }
-
- break;
- case BPF_ANC | SKF_AD_MARK:
- BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- mark));
- break;
- case BPF_ANC | SKF_AD_RXHASH:
- BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- hash));
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
-
- PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- vlan_tci));
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET());
- if (PKT_VLAN_PRESENT_BIT)
- EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT));
- if (PKT_VLAN_PRESENT_BIT < 7)
- EMIT(PPC_RAW_ANDI(r_A, r_A, 1));
- break;
- case BPF_ANC | SKF_AD_QUEUE:
- BUILD_BUG_ON(sizeof_field(struct sk_buff,
- queue_mapping) != 2);
- PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- queue_mapping));
- break;
- case BPF_ANC | SKF_AD_PKTTYPE:
- PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
- EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX));
- EMIT(PPC_RAW_SRWI(r_A, r_A, 5));
- break;
- case BPF_ANC | SKF_AD_CPU:
- PPC_BPF_LOAD_CPU(r_A);
- break;
- /*** Absolute loads from packet header/data ***/
- case BPF_LD | BPF_W | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_word);
- goto common_load;
- case BPF_LD | BPF_H | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_half);
- goto common_load;
- case BPF_LD | BPF_B | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
- common_load:
- /* Load from [K]. */
- ctx->seen |= SEEN_DATAREF;
- PPC_FUNC_ADDR(r_scratch1, func);
- EMIT(PPC_RAW_MTLR(r_scratch1));
- PPC_LI32(r_addr, K);
- EMIT(PPC_RAW_BLRL());
/*
- * Helper returns 'lt' condition on error, and an
- * appropriate return value in r3
+ * Save ctx->idx as this would currently point to the
+ * end of the JITed image and set it to the offset of
+ * the instruction sequence corresponding to the
+ * subprog call temporarily.
*/
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- /*** Indirect loads from packet header/data ***/
- case BPF_LD | BPF_W | BPF_IND:
- func = sk_load_word;
- goto common_load_ind;
- case BPF_LD | BPF_H | BPF_IND:
- func = sk_load_half;
- goto common_load_ind;
- case BPF_LD | BPF_B | BPF_IND:
- func = sk_load_byte;
- common_load_ind:
+ tmp_idx = ctx->idx;
+ ctx->idx = addrs[i] / 4;
+ bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
/*
- * Load from [X + K]. Negative offsets are tested for
- * in the helper functions.
- */
- ctx->seen |= SEEN_DATAREF | SEEN_XREG;
- PPC_FUNC_ADDR(r_scratch1, func);
- EMIT(PPC_RAW_MTLR(r_scratch1));
- EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K)));
- if (K >= 32768)
- EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K)));
- EMIT(PPC_RAW_BLRL());
- /* If error, cr0.LT set */
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- case BPF_LDX | BPF_B | BPF_MSH:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
- goto common_load;
- break;
-
- /*** Jump and branches ***/
- case BPF_JMP | BPF_JA:
- if (K != 0)
- PPC_JMP(addrs[i + 1 + K]);
- break;
-
- case BPF_JMP | BPF_JGT | BPF_K:
- case BPF_JMP | BPF_JGT | BPF_X:
- true_cond = COND_GT;
- goto cond_branch;
- case BPF_JMP | BPF_JGE | BPF_K:
- case BPF_JMP | BPF_JGE | BPF_X:
- true_cond = COND_GE;
- goto cond_branch;
- case BPF_JMP | BPF_JEQ | BPF_K:
- case BPF_JMP | BPF_JEQ | BPF_X:
- true_cond = COND_EQ;
- goto cond_branch;
- case BPF_JMP | BPF_JSET | BPF_K:
- case BPF_JMP | BPF_JSET | BPF_X:
- true_cond = COND_NE;
- cond_branch:
- /* same targets, can avoid doing the test :) */
- if (filter[i].jt == filter[i].jf) {
- if (filter[i].jt > 0)
- PPC_JMP(addrs[i + 1 + filter[i].jt]);
- break;
- }
-
- switch (code) {
- case BPF_JMP | BPF_JGT | BPF_X:
- case BPF_JMP | BPF_JGE | BPF_X:
- case BPF_JMP | BPF_JEQ | BPF_X:
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_CMPLW(r_A, r_X));
- break;
- case BPF_JMP | BPF_JSET | BPF_X:
- ctx->seen |= SEEN_XREG;
- EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X));
- break;
- case BPF_JMP | BPF_JEQ | BPF_K:
- case BPF_JMP | BPF_JGT | BPF_K:
- case BPF_JMP | BPF_JGE | BPF_K:
- if (K < 32768)
- EMIT(PPC_RAW_CMPLWI(r_A, K));
- else {
- PPC_LI32(r_scratch1, K);
- EMIT(PPC_RAW_CMPLW(r_A, r_scratch1));
- }
- break;
- case BPF_JMP | BPF_JSET | BPF_K:
- if (K < 32768)
- /* PPC_ANDI is /only/ dot-form */
- EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K));
- else {
- PPC_LI32(r_scratch1, K);
- EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A,
- r_scratch1));
- }
- break;
- }
- /* Sometimes branches are constructed "backward", with
- * the false path being the branch and true path being
- * a fallthrough to the next instruction.
+ * Restore ctx->idx here. This is safe as the length
+ * of the JITed sequence remains unchanged.
*/
- if (filter[i].jt == 0)
- /* Swap the sense of the branch */
- PPC_BCC(true_cond ^ COND_CMP_TRUE,
- addrs[i + 1 + filter[i].jf]);
- else {
- PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
- if (filter[i].jf != 0)
- PPC_JMP(addrs[i + 1 + filter[i].jf]);
- }
- break;
- default:
- /* The filter contains something cruel & unusual.
- * We don't handle it, but also there shouldn't be
- * anything missing from our list.
- */
- if (printk_ratelimit())
- pr_err("BPF filter opcode %04x (@%d) unsupported\n",
- filter[i].code, i);
- return -ENOTSUPP;
+ ctx->idx = tmp_idx;
}
-
}
- /* Set end-of-body-code address for exit. */
- addrs[i] = ctx->idx * 4;
return 0;
}
-void bpf_jit_compile(struct bpf_prog *fp)
+struct powerpc64_jit_data {
+ struct bpf_binary_header *header;
+ u32 *addrs;
+ u8 *image;
+ u32 proglen;
+ struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
- unsigned int proglen;
- unsigned int alloclen;
- u32 *image = NULL;
+ u32 proglen;
+ u32 alloclen;
+ u8 *image = NULL;
u32 *code_base;
- unsigned int *addrs;
+ u32 *addrs;
+ struct powerpc64_jit_data *jit_data;
struct codegen_context cgctx;
int pass;
- int flen = fp->len;
+ int flen;
+ struct bpf_binary_header *bpf_hdr;
+ struct bpf_prog *org_fp = fp;
+ struct bpf_prog *tmp_fp;
+ bool bpf_blinded = false;
+ bool extra_pass = false;
+
+ if (!fp->jit_requested)
+ return org_fp;
+
+ tmp_fp = bpf_jit_blind_constants(org_fp);
+ if (IS_ERR(tmp_fp))
+ return org_fp;
+
+ if (tmp_fp != org_fp) {
+ bpf_blinded = true;
+ fp = tmp_fp;
+ }
+
+ jit_data = fp->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ fp = org_fp;
+ goto out;
+ }
+ fp->aux->jit_data = jit_data;
+ }
- if (!bpf_jit_enable)
- return;
+ flen = fp->len;
+ addrs = jit_data->addrs;
+ if (addrs) {
+ cgctx = jit_data->ctx;
+ image = jit_data->image;
+ bpf_hdr = jit_data->header;
+ proglen = jit_data->proglen;
+ alloclen = proglen + FUNCTION_DESCR_SIZE;
+ extra_pass = true;
+ goto skip_init_ctx;
+ }
addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL)
- return;
+ if (addrs == NULL) {
+ fp = org_fp;
+ goto out_addrs;
+ }
- /*
- * There are multiple assembly passes as the generated code will change
- * size as it settles down, figuring out the max branch offsets/exit
- * paths required.
- *
- * The range of standard conditional branches is +/- 32Kbytes. Since
- * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
- * finish with 8 bytes/instruction. Not feasible, so long jumps are
- * used, distinct from short branches.
- *
- * Current:
- *
- * For now, both branch types assemble to 2 words (short branches padded
- * with a NOP); this is less efficient, but assembly will always complete
- * after exactly 3 passes:
- *
- * First pass: No code buffer; Program is "faux-generated" -- no code
- * emitted but maximum size of output determined (and addrs[] filled
- * in). Also, we note whether we use M[], whether we use skb data, etc.
- * All generation choices assumed to be 'worst-case', e.g. branches all
- * far (2 instructions), return path code reduction not available, etc.
- *
- * Second pass: Code buffer allocated with size determined previously.
- * Prologue generated to support features we have seen used. Exit paths
- * determined and addrs[] is filled in again, as code may be slightly
- * smaller as a result.
- *
- * Third pass: Code generated 'for real', and branch destinations
- * determined from now-accurate addrs[] map.
- *
- * Ideal:
- *
- * If we optimise this, near branches will be shorter. On the
- * first assembly pass, we should err on the side of caution and
- * generate the biggest code. On subsequent passes, branches will be
- * generated short or long and code size will reduce. With smaller
- * code, more branches may fall into the short category, and code will
- * reduce more.
- *
- * Finally, if we see one pass generate code the same size as the
- * previous pass we have converged and should now generate code for
- * real. Allocating at the end will also save the memory that would
- * otherwise be wasted by the (small) current code shrinkage.
- * Preferably, we should do a small number of passes (e.g. 5) and if we
- * haven't converged by then, get impatient and force code to generate
- * as-is, even if the odd branch would be left long. The chances of a
- * long jump are tiny with all but the most enormous of BPF filter
- * inputs, so we should usually converge on the third pass.
- */
+ memset(&cgctx, 0, sizeof(struct codegen_context));
+ memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
+
+ /* Make sure that the stack is quadword aligned. */
+ cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
- cgctx.idx = 0;
- cgctx.seen = 0;
- cgctx.pc_ret0 = -1;
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
/* We hit something illegal or unsupported. */
- goto out;
+ fp = org_fp;
+ goto out_addrs;
+ }
+
+ /*
+ * If we have seen a tail call, we need a second pass.
+ * This is because bpf_jit_emit_common_epilogue() is called
+ * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+ */
+ if (cgctx.seen & SEEN_TAILCALL) {
+ cgctx.idx = 0;
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+ }
+ bpf_jit_realloc_regs(&cgctx);
/*
* Pretend to build prologue, given the features we've seen. This will
* update ctgtx.idx as it pretends to output instructions, then we can
* calculate total size from idx.
*/
- bpf_jit_build_prologue(fp, 0, &cgctx);
+ bpf_jit_build_prologue(0, &cgctx);
bpf_jit_build_epilogue(0, &cgctx);
proglen = cgctx.idx * 4;
alloclen = proglen + FUNCTION_DESCR_SIZE;
- image = module_alloc(alloclen);
- if (!image)
- goto out;
- code_base = image + (FUNCTION_DESCR_SIZE/4);
+ bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
+ if (!bpf_hdr) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+
+skip_init_ctx:
+ code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+
+ if (extra_pass) {
+ /*
+ * Do not touch the prologue and epilogue as they will remain
+ * unchanged. Only fix the branch target address for subprog
+ * calls in the body.
+ *
+ * This does not change the offsets and lengths of the subprog
+ * call instruction sequences and hence, the size of the JITed
+ * image as well.
+ */
+ bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+
+ /* There is no need to perform the usual passes. */
+ goto skip_codegen_passes;
+ }
/* Code generation passes 1-2 */
for (pass = 1; pass < 3; pass++) {
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
- bpf_jit_build_prologue(fp, code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+ bpf_jit_build_prologue(code_base, &cgctx);
+ bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
@@ -652,15 +218,15 @@ void bpf_jit_compile(struct bpf_prog *fp)
proglen - (cgctx.idx * 4), cgctx.seen);
}
+skip_codegen_passes:
if (bpf_jit_enable > 1)
- /* Note that we output the base address of the code_base
+ /*
+ * Note that we output the base address of the code_base
* rather than image, since opcodes are in code_base.
*/
bpf_jit_dump(flen, proglen, pass, code_base);
- bpf_flush_icache(code_base, code_base + (proglen/4));
-
-#ifdef CONFIG_PPC64
+#ifdef PPC64_ELF_ABI_v1
/* Function descriptor nastiness: Address + TOC */
((u64 *)image)[0] = (u64)code_base;
((u64 *)image)[1] = local_paca->kernel_toc;
@@ -668,16 +234,38 @@ void bpf_jit_compile(struct bpf_prog *fp)
fp->bpf_func = (void *)image;
fp->jited = 1;
+ fp->jited_len = alloclen;
+
+ bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+ if (!fp->is_func || extra_pass) {
+ bpf_prog_fill_jited_linfo(fp, addrs);
+out_addrs:
+ kfree(addrs);
+ kfree(jit_data);
+ fp->aux->jit_data = NULL;
+ } else {
+ jit_data->addrs = addrs;
+ jit_data->ctx = cgctx;
+ jit_data->proglen = proglen;
+ jit_data->image = image;
+ jit_data->header = bpf_hdr;
+ }
out:
- kfree(addrs);
- return;
+ if (bpf_blinded)
+ bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
+ return fp;
}
+/* Overriding bpf_jit_free() as we don't set images read-only. */
void bpf_jit_free(struct bpf_prog *fp)
{
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *bpf_hdr = (void *)addr;
+
if (fp->jited)
- module_memfree(fp->bpf_func);
+ bpf_jit_binary_free(bpf_hdr);
bpf_prog_unlock_free(fp);
}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
new file mode 100644
index 000000000000..bbb16099e8c7
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -0,0 +1,1100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler for PPC32
+ *
+ * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu>
+ * CS GROUP France
+ *
+ * Based on PPC64 eBPF JIT compiler by Naveen N. Rao
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ * [ prev sp ] <-------------
+ * [ nv gpr save area ] 16 * 4 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
+ * [ frame header ] 16 |
+ * sp (r1) ---> [ stack pointer ] --------------
+ */
+
+/* for gpr non volatile registers r17 to r31 (14) + tail call */
+#define BPF_PPC_STACK_SAVE (15 * 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+/* BPF register usage */
+#define TMP_REG (MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+const int b2p[MAX_BPF_JIT_REG + 1] = {
+ /* function return value */
+ [BPF_REG_0] = 12,
+ /* function arguments */
+ [BPF_REG_1] = 4,
+ [BPF_REG_2] = 6,
+ [BPF_REG_3] = 8,
+ [BPF_REG_4] = 10,
+ [BPF_REG_5] = 22,
+ /* non volatile registers */
+ [BPF_REG_6] = 24,
+ [BPF_REG_7] = 26,
+ [BPF_REG_8] = 28,
+ [BPF_REG_9] = 30,
+ /* frame pointer aka BPF_REG_10 */
+ [BPF_REG_FP] = 18,
+ /* eBPF jit internal registers */
+ [BPF_REG_AX] = 20,
+ [TMP_REG] = 31, /* 32 bits */
+};
+
+static int bpf_to_ppc(struct codegen_context *ctx, int reg)
+{
+ return ctx->b2p[reg];
+}
+
+/* PPC NVR range -- update this if we ever use NVRs below r17 */
+#define BPF_PPC_NVR_MIN 17
+#define BPF_PPC_TC 16
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+ if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC)
+ return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg);
+
+ WARN(true, "BPF JIT is asking about unknown registers, will crash the stack");
+ /* Use the hole we have left for alignment */
+ return BPF_PPC_STACKFRAME(ctx) - 4;
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+ if (ctx->seen & SEEN_FUNC)
+ return;
+
+ while (ctx->seen & SEEN_NVREG_MASK &&
+ (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+ int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
+ int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+ int i;
+
+ for (i = BPF_REG_0; i <= TMP_REG; i++) {
+ if (ctx->b2p[i] != old)
+ continue;
+ ctx->b2p[i] = new;
+ bpf_set_seen_register(ctx, new);
+ bpf_clear_seen_register(ctx, old);
+ if (i != TMP_REG) {
+ bpf_set_seen_register(ctx, new - 1);
+ bpf_clear_seen_register(ctx, old - 1);
+ }
+ break;
+ }
+ }
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* First arg comes in as a 32 bits pointer. */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3));
+ EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0));
+ EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx)));
+
+ /*
+ * Initialize tail_call_cnt in stack frame if we do tail calls.
+ * Otherwise, put in NOPs so that it can be skipped when we are
+ * invoked through a tail call.
+ */
+ if (ctx->seen & SEEN_TAILCALL) {
+ EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ } else {
+ EMIT(PPC_RAW_NOP());
+ }
+
+#define BPF_TAILCALL_PROLOGUE_SIZE 16
+
+ /*
+ * We need a stack frame, but we don't necessarily need to
+ * save/restore LR unless we call other functions
+ */
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MFLR(__REG_R0));
+
+ /*
+ * Back up non-volatile regs -- registers r18-r31
+ */
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+
+ /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
+ EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8);
+ EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12);
+ }
+
+ /* Setup frame pointer to point to the bpf stack area */
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) {
+ EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0));
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1,
+ STACK_FRAME_MIN_SIZE + ctx->stack_size));
+ }
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Restore NVRs */
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i)));
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0)));
+
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ /* Tear down our stack frame */
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+ EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx)));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MTLR(__REG_R0));
+
+ EMIT(PPC_RAW_BLR());
+}
+
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
+{
+ s32 rel = (s32)func - (s32)(image + ctx->idx);
+
+ if (image && rel < 0x2000000 && rel >= -0x2000000) {
+ PPC_BL_ABS(func);
+ } else {
+ /* Load function address into r0 */
+ EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func)));
+ EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func)));
+ EMIT(PPC_RAW_MTLR(__REG_R0));
+ EMIT(PPC_RAW_BLRL());
+ }
+}
+
+static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+ /*
+ * By now, the eBPF program has already setup parameters in r3-r6
+ * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+ * r5-r6/BPF_REG_2 - pointer to bpf_array
+ * r7-r8/BPF_REG_3 - index in bpf_array
+ */
+ int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2);
+ int b2p_index = bpf_to_ppc(ctx, BPF_REG_3);
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+ EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0));
+ EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ PPC_BCC(COND_GE, out);
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT));
+ /* tail_call_cnt++; */
+ EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1));
+ PPC_BCC(COND_GT, out);
+
+ /* prog = array->ptrs[index]; */
+ EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29));
+ EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array));
+ EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs)));
+ EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLWI(__REG_R3, 0));
+ PPC_BCC(COND_EQ, out);
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func)));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+ EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MTLR(__REG_R0));
+
+ EMIT(PPC_RAW_MTCTR(__REG_R3));
+
+ EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1)));
+
+ /* tear restore NVRs, ... */
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ EMIT(PPC_RAW_BCTR());
+ /* out: */
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+ u32 *addrs, bool extra_pass)
+{
+ const struct bpf_insn *insn = fp->insnsi;
+ int flen = fp->len;
+ int i, ret;
+
+ /* Start of epilogue code - will only be valid 2nd pass onwards */
+ u32 exit_addr = addrs[flen];
+
+ for (i = 0; i < flen; i++) {
+ u32 code = insn[i].code;
+ u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg);
+ u32 dst_reg_h = dst_reg - 1;
+ u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
+ u32 src_reg_h = src_reg - 1;
+ u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+ s16 off = insn[i].off;
+ s32 imm = insn[i].imm;
+ bool func_addr_fixed;
+ u64 func_addr;
+ u32 true_cond;
+
+ /*
+ * addrs[] maps a BPF bytecode address into a real offset from
+ * the start of the body code.
+ */
+ addrs[i] = ctx->idx * 4;
+
+ /*
+ * As an optimization, we note down which registers
+ * are used so that we can only save/restore those in our
+ * prologue and epilogue. We do this here regardless of whether
+ * the actual BPF instruction uses src/dst registers or not
+ * (for instance, BPF_CALL does not use them). The expectation
+ * is that those instructions will have src_reg/dst_reg set to
+ * 0. Even otherwise, we just lose some prologue/epilogue
+ * optimization but everything else should work without
+ * any issues.
+ */
+ if (dst_reg >= 3 && dst_reg < 32) {
+ bpf_set_seen_register(ctx, dst_reg);
+ bpf_set_seen_register(ctx, dst_reg_h);
+ }
+
+ if (src_reg >= 3 && src_reg < 32) {
+ bpf_set_seen_register(ctx, src_reg);
+ bpf_set_seen_register(ctx, src_reg_h);
+ }
+
+ switch (code) {
+ /*
+ * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+ */
+ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+ EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+ EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+ EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg));
+ EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+ imm = -imm;
+ fallthrough;
+ case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+ if (IMM_HA(imm) & 0xffff)
+ EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm)));
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+ imm = -imm;
+ fallthrough;
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+ if (!imm)
+ break;
+
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm));
+ } else {
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0));
+ }
+ if (imm >= 0)
+ EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h));
+ else
+ EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h));
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h));
+ EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm));
+ } else {
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0));
+ }
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+ if (!imm) {
+ PPC_LI32(dst_reg, 0);
+ PPC_LI32(dst_reg_h, 0);
+ break;
+ }
+ if (imm == 1)
+ break;
+ if (imm == -1) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+ break;
+ }
+ bpf_set_seen_register(ctx, tmp_reg);
+ PPC_LI32(tmp_reg, imm);
+ EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg));
+ if (imm < 0)
+ EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg));
+ EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0));
+ break;
+ case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+ EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg));
+ EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+ return -EOPNOTSUPP;
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+ return -EOPNOTSUPP;
+ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+ if (!imm)
+ return -EINVAL;
+ if (imm == 1)
+ break;
+
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0));
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+ if (!imm)
+ return -EINVAL;
+
+ if (!is_power_of_2((u32)imm)) {
+ bpf_set_seen_register(ctx, tmp_reg);
+ PPC_LI32(tmp_reg, imm);
+ EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0));
+ break;
+ }
+ if (imm == 1)
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ else
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31));
+
+ break;
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+ if (!imm)
+ return -EINVAL;
+ if (imm < 0)
+ imm = -imm;
+ if (!is_power_of_2(imm))
+ return -EOPNOTSUPP;
+ if (imm == 1)
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ else
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+ if (!imm)
+ return -EINVAL;
+ if (!is_power_of_2(abs(imm)))
+ return -EOPNOTSUPP;
+
+ if (imm < 0) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+ imm = -imm;
+ }
+ if (imm == 1)
+ break;
+ imm = ilog2(imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm));
+ break;
+ case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+ EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
+ break;
+ case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+ EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h));
+ break;
+
+ /*
+ * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+ */
+ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+ if (imm >= 0)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ fallthrough;
+ case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+ if (!IMM_H(imm)) {
+ EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
+ } else if (!IMM_L(imm)) {
+ EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm)));
+ } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0,
+ 32 - fls(imm), 32 - ffs(imm)));
+ } else {
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0));
+ }
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+ /* Sign-extended */
+ if (imm < 0)
+ EMIT(PPC_RAW_LI(dst_reg_h, -1));
+ fallthrough;
+ case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+ if (dst_reg == src_reg) {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else {
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h));
+ }
+ break;
+ case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+ if (dst_reg == src_reg)
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ else
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+ if (imm < 0)
+ EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h));
+ fallthrough;
+ case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+ EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+ EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
+ if (!imm)
+ break;
+ EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm)
+ break;
+ if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31));
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm));
+ break;
+ }
+ if (imm < 64)
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm));
+ else
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+ EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+ EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+ if (!imm)
+ break;
+ EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm)
+ break;
+ if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31));
+ break;
+ }
+ if (imm < 64)
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31));
+ else
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+ EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32));
+ EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0));
+ EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26));
+ EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg));
+ EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg));
+ EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+ if (!imm)
+ break;
+ EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm)
+ break;
+ if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm));
+ break;
+ }
+ if (imm < 64)
+ EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32));
+ else
+ EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31));
+ break;
+
+ /*
+ * MOV
+ */
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ if (dst_reg == src_reg)
+ break;
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+ /* special mov32 for zext */
+ if (imm == 1)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else if (dst_reg != src_reg)
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+ PPC_LI32(dst_reg, imm);
+ PPC_EX32(dst_reg_h, imm);
+ break;
+ case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+ PPC_LI32(dst_reg, imm);
+ break;
+
+ /*
+ * BPF_FROM_BE/LE
+ */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ switch (imm) {
+ case 16:
+ /* Copy 16 bits to upper part */
+ EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15));
+ /* Rotate 8 bits right & mask */
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31));
+ break;
+ case 32:
+ /*
+ * Rotate word left by 8 bits:
+ * 2 bytes are already in their final position
+ * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+ */
+ EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+ break;
+ case 64:
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31));
+ EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7));
+ EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23));
+ EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, __REG_R0));
+ EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
+ break;
+ }
+ break;
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ switch (imm) {
+ case 16:
+ /* zero-extend 16 bits into 32 bits */
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31));
+ break;
+ case 32:
+ case 64:
+ /* nop */
+ break;
+ }
+ break;
+
+ /*
+ * BPF_ST(X)
+ */
+ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+ EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
+ break;
+ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4));
+ PPC_EX32(__REG_R0, imm);
+ EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off));
+ break;
+
+ /*
+ * BPF_STX XADD (atomic_add)
+ */
+ case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ /* Get offset into TMP_REG */
+ EMIT(PPC_RAW_LI(tmp_reg, off));
+ /* load value from memory into r0 */
+ EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0));
+ /* add value from src_reg into this */
+ EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg));
+ /* store result back */
+ EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg));
+ /* we're done if this succeeded */
+ PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4);
+ break;
+
+ case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+ return -EOPNOTSUPP;
+
+ /*
+ * BPF_LDX
+ */
+ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ break;
+
+ /*
+ * Doubleword load
+ * 16 byte instruction that uses two 'struct bpf_insn'
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+ PPC_LI32(dst_reg, (u32)insn[i].imm);
+ /* Adjust for two bpf instructions */
+ addrs[++i] = ctx->idx * 4;
+ break;
+
+ /*
+ * Return/Exit
+ */
+ case BPF_JMP | BPF_EXIT:
+ /*
+ * If this isn't the very last instruction, branch to
+ * the epilogue. If we _are_ the last instruction,
+ * we'll just fall through to the epilogue.
+ */
+ if (i != flen - 1)
+ PPC_JMP(exit_addr);
+ /* else fall through to the epilogue */
+ break;
+
+ /*
+ * Call kernel helper or bpf function
+ */
+ case BPF_JMP | BPF_CALL:
+ ctx->seen |= SEEN_FUNC;
+
+ ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ &func_addr, &func_addr_fixed);
+ if (ret < 0)
+ return ret;
+
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) {
+ EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8));
+ EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12));
+ }
+
+ bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
+ EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3));
+ EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4));
+ break;
+
+ /*
+ * Jumps and branches
+ */
+ case BPF_JMP | BPF_JA:
+ PPC_JMP(addrs[i + 1 + off]);
+ break;
+
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ true_cond = COND_GT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ true_cond = COND_LT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ true_cond = COND_GE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ true_cond = COND_LE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ true_cond = COND_EQ;
+ goto cond_branch;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ true_cond = COND_NE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ true_cond = COND_NE;
+ /* fallthrough; */
+
+cond_branch:
+ switch (code) {
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSET | BPF_X:
+ EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JSET | BPF_X: {
+ EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ /*
+ * Need sign-extended load, so only positive
+ * values can be used as imm in cmplwi
+ */
+ if (imm >= 0 && imm < 32768) {
+ EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ /* sign-extending load ... but unsigned comparison */
+ PPC_EX32(__REG_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0));
+ PPC_LI32(__REG_R0, imm);
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ if (imm >= 0 && imm < 65536) {
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+ }
+ break;
+ }
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ if (imm >= 0 && imm < 65536) {
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+ PPC_LI32(__REG_R0, imm);
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /*
+ * signed comparison, so any 16-bit value
+ * can be used in cmpwi
+ */
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0));
+ }
+ break;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= 0 && imm < 32768) {
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+ } else {
+ PPC_LI32(__REG_R0, imm);
+ if (imm < 0) {
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ }
+ EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= -32768 && imm < 32768) {
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm));
+ } else {
+ PPC_LI32(__REG_R0, imm);
+ EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0));
+ }
+ break;
+ }
+ PPC_BCC(true_cond, addrs[i + 1 + off]);
+ break;
+
+ /*
+ * Tail call
+ */
+ case BPF_JMP | BPF_TAIL_CALL:
+ ctx->seen |= SEEN_TAILCALL;
+ bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ break;
+
+ default:
+ /*
+ * The filter contains something cruel & unusual.
+ * We don't handle it, but also there shouldn't be
+ * anything missing from our list.
+ */
+ pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i);
+ return -EOPNOTSUPP;
+ }
+ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext &&
+ !insn_is_zext(&insn[i + 1]))
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+
+ /* Set end-of-body-code address for exit. */
+ addrs[i] = ctx->idx * 4;
+
+ return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index aaf1a887f653..57a8c1153851 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -18,27 +18,6 @@
#include "bpf_jit64.h"
-static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
-{
- memset32(area, BREAKPOINT_INSTRUCTION, size/4);
-}
-
-static inline void bpf_flush_icache(void *start, void *end)
-{
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
-}
-
-static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
-{
- return (ctx->seen & (1 << (31 - b2p[i])));
-}
-
-static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
-{
- ctx->seen |= (1 << (31 - b2p[i]));
-}
-
static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
{
/*
@@ -47,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
* - the bpf program uses its stack area
* The latter condition is deduced from the usage of BPF_REG_FP
*/
- return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
+ return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]);
}
/*
@@ -85,7 +64,11 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
BUG();
}
-static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
@@ -124,11 +107,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
* in the protected zone below the previous stack frame
*/
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
- if (bpf_is_seen_register(ctx, i))
+ if (bpf_is_seen_register(ctx, b2p[i]))
PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
/* Setup frame pointer to point to the bpf stack area */
- if (bpf_is_seen_register(ctx, BPF_REG_FP))
+ if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP]))
EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
STACK_FRAME_MIN_SIZE + ctx->stack_size));
}
@@ -139,7 +122,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
/* Restore NVRs */
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
- if (bpf_is_seen_register(ctx, i))
+ if (bpf_is_seen_register(ctx, b2p[i]))
PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
/* Tear down our stack frame */
@@ -152,7 +135,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
}
}
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
{
bpf_jit_emit_common_epilogue(image, ctx);
@@ -187,8 +170,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
EMIT(PPC_RAW_BLRL());
}
-static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
- u64 func)
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func)
{
unsigned int i, ctx_idx = ctx->idx;
@@ -289,9 +271,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
}
/* Assemble the body code between the prologue & epilogue */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx,
- u32 *addrs, bool extra_pass)
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+ u32 *addrs, bool extra_pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -330,9 +311,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
* any issues.
*/
if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
- bpf_set_seen_register(ctx, insn[i].dst_reg);
+ bpf_set_seen_register(ctx, dst_reg);
if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
- bpf_set_seen_register(ctx, insn[i].src_reg);
+ bpf_set_seen_register(ctx, src_reg);
switch (code) {
/*
@@ -1026,249 +1007,3 @@ cond_branch:
return 0;
}
-
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx, u32 *addrs)
-{
- const struct bpf_insn *insn = fp->insnsi;
- bool func_addr_fixed;
- u64 func_addr;
- u32 tmp_idx;
- int i, ret;
-
- for (i = 0; i < fp->len; i++) {
- /*
- * During the extra pass, only the branch target addresses for
- * the subprog calls need to be fixed. All other instructions
- * can left untouched.
- *
- * The JITed image length does not change because we already
- * ensure that the JITed instruction sequence for these calls
- * are of fixed length by padding them with NOPs.
- */
- if (insn[i].code == (BPF_JMP | BPF_CALL) &&
- insn[i].src_reg == BPF_PSEUDO_CALL) {
- ret = bpf_jit_get_func_addr(fp, &insn[i], true,
- &func_addr,
- &func_addr_fixed);
- if (ret < 0)
- return ret;
-
- /*
- * Save ctx->idx as this would currently point to the
- * end of the JITed image and set it to the offset of
- * the instruction sequence corresponding to the
- * subprog call temporarily.
- */
- tmp_idx = ctx->idx;
- ctx->idx = addrs[i] / 4;
- bpf_jit_emit_func_call_rel(image, ctx, func_addr);
-
- /*
- * Restore ctx->idx here. This is safe as the length
- * of the JITed sequence remains unchanged.
- */
- ctx->idx = tmp_idx;
- }
- }
-
- return 0;
-}
-
-struct powerpc64_jit_data {
- struct bpf_binary_header *header;
- u32 *addrs;
- u8 *image;
- u32 proglen;
- struct codegen_context ctx;
-};
-
-bool bpf_jit_needs_zext(void)
-{
- return true;
-}
-
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
-{
- u32 proglen;
- u32 alloclen;
- u8 *image = NULL;
- u32 *code_base;
- u32 *addrs;
- struct powerpc64_jit_data *jit_data;
- struct codegen_context cgctx;
- int pass;
- int flen;
- struct bpf_binary_header *bpf_hdr;
- struct bpf_prog *org_fp = fp;
- struct bpf_prog *tmp_fp;
- bool bpf_blinded = false;
- bool extra_pass = false;
-
- if (!fp->jit_requested)
- return org_fp;
-
- tmp_fp = bpf_jit_blind_constants(org_fp);
- if (IS_ERR(tmp_fp))
- return org_fp;
-
- if (tmp_fp != org_fp) {
- bpf_blinded = true;
- fp = tmp_fp;
- }
-
- jit_data = fp->aux->jit_data;
- if (!jit_data) {
- jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
- if (!jit_data) {
- fp = org_fp;
- goto out;
- }
- fp->aux->jit_data = jit_data;
- }
-
- flen = fp->len;
- addrs = jit_data->addrs;
- if (addrs) {
- cgctx = jit_data->ctx;
- image = jit_data->image;
- bpf_hdr = jit_data->header;
- proglen = jit_data->proglen;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
- extra_pass = true;
- goto skip_init_ctx;
- }
-
- addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL) {
- fp = org_fp;
- goto out_addrs;
- }
-
- memset(&cgctx, 0, sizeof(struct codegen_context));
-
- /* Make sure that the stack is quadword aligned. */
- cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
-
- /* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
- /* We hit something illegal or unsupported. */
- fp = org_fp;
- goto out_addrs;
- }
-
- /*
- * If we have seen a tail call, we need a second pass.
- * This is because bpf_jit_emit_common_epilogue() is called
- * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
- */
- if (cgctx.seen & SEEN_TAILCALL) {
- cgctx.idx = 0;
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
- fp = org_fp;
- goto out_addrs;
- }
- }
-
- /*
- * Pretend to build prologue, given the features we've seen. This will
- * update ctgtx.idx as it pretends to output instructions, then we can
- * calculate total size from idx.
- */
- bpf_jit_build_prologue(0, &cgctx);
- bpf_jit_build_epilogue(0, &cgctx);
-
- proglen = cgctx.idx * 4;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
-
- bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
- bpf_jit_fill_ill_insns);
- if (!bpf_hdr) {
- fp = org_fp;
- goto out_addrs;
- }
-
-skip_init_ctx:
- code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
-
- if (extra_pass) {
- /*
- * Do not touch the prologue and epilogue as they will remain
- * unchanged. Only fix the branch target address for subprog
- * calls in the body.
- *
- * This does not change the offsets and lengths of the subprog
- * call instruction sequences and hence, the size of the JITed
- * image as well.
- */
- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
-
- /* There is no need to perform the usual passes. */
- goto skip_codegen_passes;
- }
-
- /* Code generation passes 1-2 */
- for (pass = 1; pass < 3; pass++) {
- /* Now build the prologue, body code & epilogue for real. */
- cgctx.idx = 0;
- bpf_jit_build_prologue(code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
- bpf_jit_build_epilogue(code_base, &cgctx);
-
- if (bpf_jit_enable > 1)
- pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
- proglen - (cgctx.idx * 4), cgctx.seen);
- }
-
-skip_codegen_passes:
- if (bpf_jit_enable > 1)
- /*
- * Note that we output the base address of the code_base
- * rather than image, since opcodes are in code_base.
- */
- bpf_jit_dump(flen, proglen, pass, code_base);
-
-#ifdef PPC64_ELF_ABI_v1
- /* Function descriptor nastiness: Address + TOC */
- ((u64 *)image)[0] = (u64)code_base;
- ((u64 *)image)[1] = local_paca->kernel_toc;
-#endif
-
- fp->bpf_func = (void *)image;
- fp->jited = 1;
- fp->jited_len = alloclen;
-
- bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
- if (!fp->is_func || extra_pass) {
- bpf_prog_fill_jited_linfo(fp, addrs);
-out_addrs:
- kfree(addrs);
- kfree(jit_data);
- fp->aux->jit_data = NULL;
- } else {
- jit_data->addrs = addrs;
- jit_data->ctx = cgctx;
- jit_data->proglen = proglen;
- jit_data->image = image;
- jit_data->header = bpf_hdr;
- }
-
-out:
- if (bpf_blinded)
- bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
-
- return fp;
-}
-
-/* Overriding bpf_jit_free() as we don't set images read-only. */
-void bpf_jit_free(struct bpf_prog *fp)
-{
- unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
- struct bpf_binary_header *bpf_hdr = (void *)addr;
-
- if (fp->jited)
- bpf_jit_binary_free(bpf_hdr);
-
- bpf_prog_unlock_free(fp);
-}