summaryrefslogtreecommitdiff
path: root/arch/powerpc/net
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/net')
-rw-r--r--arch/powerpc/net/Makefile6
-rw-r--r--arch/powerpc/net/bpf_jit.h284
-rw-r--r--arch/powerpc/net/bpf_jit32.h139
-rw-r--r--arch/powerpc/net/bpf_jit64.h108
-rw-r--r--arch/powerpc/net/bpf_jit_asm.S226
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c893
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c1302
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c1077
8 files changed, 2209 insertions, 1826 deletions
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index c2dec3a68d4c..8e60af32e51e 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,8 +2,4 @@
#
# Arch-specific network modules
#
-ifdef CONFIG_PPC64
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
-else
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
-endif
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 55d4377ccfae..cdea5dccaefe 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -11,225 +11,78 @@
#ifndef __ASSEMBLY__
#include <asm/types.h>
+#include <asm/ppc-opcode.h>
-#ifdef PPC64_ELF_ABI_v1
+#ifdef CONFIG_PPC64_ELF_ABI_V1
#define FUNCTION_DESCR_SIZE 24
#else
#define FUNCTION_DESCR_SIZE 0
#endif
-/*
- * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
- * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the
- * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
- */
-#define IMM_H(i) ((uintptr_t)(i)>>16)
-#define IMM_HA(i) (((uintptr_t)(i)>>16) + \
- (((uintptr_t)(i) & 0x8000) >> 15))
-#define IMM_L(i) ((uintptr_t)(i) & 0xffff)
+#define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4)
#define PLANT_INSTR(d, idx, instr) \
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
-#define PPC_NOP() EMIT(PPC_INST_NOP)
-#define PPC_BLR() EMIT(PPC_INST_BLR)
-#define PPC_BLRL() EMIT(PPC_INST_BLRL)
-#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r))
-#define PPC_BCTR() EMIT(PPC_INST_BCTR)
-#define PPC_MTCTR(r) EMIT(PPC_INST_MTCTR | ___PPC_RT(r))
-#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \
- ___PPC_RA(a) | IMM_L(i))
-#define PPC_MR(d, a) PPC_OR(d, a, a)
-#define PPC_LI(r, i) PPC_ADDI(r, 0, i)
-#define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \
- ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
-#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i)
-#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \
- ___PPC_RA(base) | ((i) & 0xfffc))
-#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \
- ___PPC_RA(base) | ___PPC_RB(b))
-#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \
- ___PPC_RA(base) | ((i) & 0xfffc))
-#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \
- ___PPC_RA(base) | IMM_L(i))
-
-#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \
- ___PPC_RA(base) | ((i) & 0xfffc))
-#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \
- ___PPC_RA(base) | ___PPC_RB(b))
-#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \
- ___PPC_RA(base) | ___PPC_RB(b))
-#define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \
- ___PPC_RA(base) | ___PPC_RB(b))
-
-#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b) | \
- __PPC_EH(eh))
-#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \
- ___PPC_RA(a) | ___PPC_RB(b) | \
- __PPC_EH(eh))
-#define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \
- ___PPC_RB(b))
-#define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \
- ___PPC_RB(b))
-#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \
- ___PPC_RB(b))
-#define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \
- ___PPC_RB(b))
-
-#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \
- ___PPC_RB(a) | ___PPC_RA(b))
-#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | ___PPC_RT(d) | \
- ___PPC_RA(a) | IMM_L(i))
-#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_MR(d, a) PPC_OR(d, a, a)
-#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_XOR(d, a, b) EMIT(PPC_INST_XOR | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_XORI(d, a, i) EMIT(PPC_INST_XORI | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \
- ___PPC_RS(a))
-#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRAW(d, a, s) EMIT(PPC_INST_SRAW | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRAWI(d, a, i) EMIT(PPC_INST_SRAWI | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH(i))
-#define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH64(i))
-#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH(i) | \
- __PPC_MB(mb) | __PPC_ME(me))
-#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \
- ___PPC_RA(d) | ___PPC_RS(a) | \
- __PPC_SH(i) | __PPC_MB(mb) | \
- __PPC_ME(me))
-#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH(i) | \
- __PPC_MB(mb) | __PPC_ME(me))
-#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH64(i) | \
- __PPC_MB64(mb))
-#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH64(i) | \
- __PPC_ME64(me))
-
-/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
-#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i))
-/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
-#define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31)
-/* sldi = rldicr Rx, Ry, n, 63-n */
-#define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i))
-/* sldi = rldicl Rx, Ry, 64-n, n */
-#define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i)
-
-#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a))
-
/* Long jump; (unconditional 'branch') */
-#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \
- (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+#define PPC_JMP(dest) \
+ do { \
+ long offset = (long)(dest) - CTX_NIA(ctx); \
+ if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \
+ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
+ return -ERANGE; \
+ } \
+ EMIT(PPC_RAW_BRANCH(offset)); \
+ } while (0)
+
/* "cond" here covers BO:BI fields. */
-#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \
- (((cond) & 0x3ff) << 16) | \
- (((dest) - (ctx->idx * 4)) & \
- 0xfffc))
+#define PPC_BCC_SHORT(cond, dest) \
+ do { \
+ long offset = (long)(dest) - CTX_NIA(ctx); \
+ if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \
+ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
+ return -ERANGE; \
+ } \
+ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
+ } while (0)
+
/* Sign-extended 32-bit immediate load */
#define PPC_LI32(d, i) do { \
if ((int)(uintptr_t)(i) >= -32768 && \
(int)(uintptr_t)(i) < 32768) \
- PPC_LI(d, i); \
+ EMIT(PPC_RAW_LI(d, i)); \
else { \
- PPC_LIS(d, IMM_H(i)); \
+ EMIT(PPC_RAW_LIS(d, IMM_H(i))); \
if (IMM_L(i)) \
- PPC_ORI(d, d, IMM_L(i)); \
+ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
} } while(0)
+#ifdef CONFIG_PPC64
#define PPC_LI64(d, i) do { \
if ((long)(i) >= -2147483648 && \
(long)(i) < 2147483648) \
PPC_LI32(d, i); \
else { \
if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \
- PPC_LI(d, ((uintptr_t)(i) >> 32) & 0xffff); \
+ EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \
+ 0xffff)); \
else { \
- PPC_LIS(d, ((uintptr_t)(i) >> 48)); \
+ EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \
if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \
- PPC_ORI(d, d, \
- ((uintptr_t)(i) >> 32) & 0xffff); \
+ EMIT(PPC_RAW_ORI(d, d, \
+ ((uintptr_t)(i) >> 32) & 0xffff)); \
} \
- PPC_SLDI(d, d, 32); \
+ EMIT(PPC_RAW_SLDI(d, d, 32)); \
if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \
- PPC_ORIS(d, d, \
- ((uintptr_t)(i) >> 16) & 0xffff); \
+ EMIT(PPC_RAW_ORIS(d, d, \
+ ((uintptr_t)(i) >> 16) & 0xffff)); \
if ((uintptr_t)(i) & 0x000000000000ffffULL) \
- PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \
+ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
+ 0xffff)); \
} } while (0)
-
-#ifdef CONFIG_PPC64
-#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0)
-#else
-#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0)
#endif
-static inline bool is_nearbranch(int offset)
-{
- return (offset < 32768) && (offset >= -32768);
-}
-
/*
* The fly in the ointment of code size changing from pass to pass is
* avoided by padding the short branch case with a NOP. If code size differs
@@ -238,12 +91,12 @@ static inline bool is_nearbranch(int offset)
* state.
*/
#define PPC_BCC(cond, dest) do { \
- if (is_nearbranch((dest) - (ctx->idx * 4))) { \
+ if (is_offset_in_cond_branch_range((long)(dest) - CTX_NIA(ctx))) { \
PPC_BCC_SHORT(cond, dest); \
- PPC_NOP(); \
+ EMIT(PPC_RAW_NOP()); \
} else { \
/* Flip the 'T or F' bit to invert comparison */ \
- PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \
+ PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, CTX_NIA(ctx) + 2*4); \
PPC_JMP(dest); \
} } while(0)
@@ -262,6 +115,63 @@ static inline bool is_nearbranch(int offset)
#define COND_LT (CR0_LT | COND_CMP_TRUE)
#define COND_LE (CR0_GT | COND_CMP_FALSE)
+#define SEEN_FUNC 0x20000000 /* might call external helpers */
+#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
+
+struct codegen_context {
+ /*
+ * This is used to track register usage as well
+ * as calls to external helpers.
+ * - register usage is tracked with corresponding
+ * bits (r3-r31)
+ * - rest of the bits can be used to track other
+ * things -- for now, we use bits 0 to 2
+ * encoded in SEEN_* macros above
+ */
+ unsigned int seen;
+ unsigned int idx;
+ unsigned int stack_size;
+ int b2p[MAX_BPF_JIT_REG + 2];
+ unsigned int exentry_idx;
+ unsigned int alt_exit_addr;
+};
+
+#define bpf_to_ppc(r) (ctx->b2p[r])
+
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */
+#else
+#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */
+#endif
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+ return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen |= 1 << (31 - i);
+}
+
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen &= ~(1 << (31 - i));
+}
+
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
+ u32 *addrs, int pass, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
+
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+ struct codegen_context *ctx, int insn_idx,
+ int jmp_off, int dst_reg);
+
#endif
#endif
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
deleted file mode 100644
index 6e5a2a4faeab..000000000000
--- a/arch/powerpc/net/bpf_jit32.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * bpf_jit32.h: BPF JIT compiler for PPC
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- *
- * Split from bpf_jit.h
- */
-#ifndef _BPF_JIT32_H
-#define _BPF_JIT32_H
-
-#include <asm/asm-compat.h>
-#include "bpf_jit.h"
-
-#ifdef CONFIG_PPC64
-#define BPF_PPC_STACK_R3_OFF 48
-#define BPF_PPC_STACK_LOCALS 32
-#define BPF_PPC_STACK_BASIC (48+64)
-#define BPF_PPC_STACK_SAVE (18*8)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
- BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (48+64)
-#else
-#define BPF_PPC_STACK_R3_OFF 24
-#define BPF_PPC_STACK_LOCALS 16
-#define BPF_PPC_STACK_BASIC (24+32)
-#define BPF_PPC_STACK_SAVE (18*4)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
- BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (24+32)
-#endif
-
-#define REG_SZ (BITS_PER_LONG/8)
-
-/*
- * Generated code register usage:
- *
- * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
- *
- * skb r3 (Entry parameter)
- * A register r4
- * X register r5
- * addr param r6
- * r7-r10 scratch
- * skb->data r14
- * skb headlen r15 (skb->len - skb->data_len)
- * m[0] r16
- * m[...] ...
- * m[15] r31
- */
-#define r_skb 3
-#define r_ret 3
-#define r_A 4
-#define r_X 5
-#define r_addr 6
-#define r_scratch1 7
-#define r_scratch2 8
-#define r_D 14
-#define r_HL 15
-#define r_M 16
-
-#ifndef __ASSEMBLY__
-
-/*
- * Assembly helpers from arch/powerpc/net/bpf_jit.S:
- */
-#define DECLARE_LOAD_FUNC(func) \
- extern u8 func[], func##_negative_offset[], func##_positive_offset[]
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-DECLARE_LOAD_FUNC(sk_load_byte_msh);
-
-#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LBZ(r, r, IMM_L(i)); } } while(0)
-
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LD(r, r, IMM_L(i)); } } while(0)
-
-#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LWZ(r, r, IMM_L(i)); } } while(0)
-
-#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LHZ(r, r, IMM_L(i)); } } while(0)
-
-#ifdef CONFIG_PPC64
-#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
-#else
-#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0)
-#endif
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC64
-#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \
- PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \
- } while (0)
-#else
-#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \
- PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \
- } while(0)
-#endif
-#else
-#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0)
-#endif
-
-#define PPC_LHBRX_OFFS(r, base, i) \
- do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
-#ifdef __LITTLE_ENDIAN__
-#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i)
-#else
-#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
-#endif
-
-#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
-
-#define SEEN_DATAREF 0x10000 /* might call external helpers */
-#define SEEN_XREG 0x20000 /* X reg is used */
-#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
- * storage */
-#define SEEN_MEM_MSK 0x0ffff
-
-struct codegen_context {
- unsigned int seen;
- unsigned int idx;
- int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
-};
-
-#endif
-
-#endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
deleted file mode 100644
index cf3a7e337f02..000000000000
--- a/arch/powerpc/net/bpf_jit64.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * bpf_jit64.h: BPF JIT compiler for PPC64
- *
- * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
- * IBM Corporation
- */
-#ifndef _BPF_JIT64_H
-#define _BPF_JIT64_H
-
-#include "bpf_jit.h"
-
-/*
- * Stack layout:
- * Ensure the top half (upto local_tmp_var) stays consistent
- * with our redzone usage.
- *
- * [ prev sp ] <-------------
- * [ nv gpr save area ] 6*8 |
- * [ tail_call_cnt ] 8 |
- * [ local_tmp_var ] 8 |
- * fp (r31) --> [ ebpf stack space ] upto 512 |
- * [ frame header ] 32/112 |
- * sp (r1) ---> [ stack pointer ] --------------
- */
-
-/* for gpr non volatile registers BPG_REG_6 to 10 */
-#define BPF_PPC_STACK_SAVE (6*8)
-/* for bpf JIT code internal usage */
-#define BPF_PPC_STACK_LOCALS 16
-/* stack frame excluding BPF stack, ensure this is quadword aligned */
-#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
- BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
-
-#ifndef __ASSEMBLY__
-
-/* BPF register usage */
-#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
-#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
-
-/* BPF to ppc register mappings */
-static const int b2p[] = {
- /* function return value */
- [BPF_REG_0] = 8,
- /* function arguments */
- [BPF_REG_1] = 3,
- [BPF_REG_2] = 4,
- [BPF_REG_3] = 5,
- [BPF_REG_4] = 6,
- [BPF_REG_5] = 7,
- /* non volatile registers */
- [BPF_REG_6] = 27,
- [BPF_REG_7] = 28,
- [BPF_REG_8] = 29,
- [BPF_REG_9] = 30,
- /* frame pointer aka BPF_REG_10 */
- [BPF_REG_FP] = 31,
- /* eBPF jit internal registers */
- [BPF_REG_AX] = 2,
- [TMP_REG_1] = 9,
- [TMP_REG_2] = 10
-};
-
-/* PPC NVR range -- update this if we ever use NVRs below r27 */
-#define BPF_PPC_NVR_MIN 27
-
-/*
- * WARNING: These can use TMP_REG_2 if the offset is not at word boundary,
- * so ensure that it isn't in use already.
- */
-#define PPC_BPF_LL(r, base, i) do { \
- if ((i) % 4) { \
- PPC_LI(b2p[TMP_REG_2], (i)); \
- PPC_LDX(r, base, b2p[TMP_REG_2]); \
- } else \
- PPC_LD(r, base, i); \
- } while(0)
-#define PPC_BPF_STL(r, base, i) do { \
- if ((i) % 4) { \
- PPC_LI(b2p[TMP_REG_2], (i)); \
- PPC_STDX(r, base, b2p[TMP_REG_2]); \
- } else \
- PPC_STD(r, base, i); \
- } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
-
-#define SEEN_FUNC 0x1000 /* might call external helpers */
-#define SEEN_STACK 0x2000 /* uses BPF stack */
-#define SEEN_TAILCALL 0x4000 /* uses tail calls */
-
-struct codegen_context {
- /*
- * This is used to track register usage as well
- * as calls to external helpers.
- * - register usage is tracked with corresponding
- * bits (r3-r10 and r27-r31)
- * - rest of the bits can be used to track other
- * things -- for now, we use bits 16 to 23
- * encoded in SEEN_* macros above
- */
- unsigned int seen;
- unsigned int idx;
- unsigned int stack_size;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S
deleted file mode 100644
index 2f5030d8383f..000000000000
--- a/arch/powerpc/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,226 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* bpf_jit.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-#include "bpf_jit32.h"
-
-/*
- * All of these routines are called directly from generated code,
- * whose register usage is:
- *
- * r3 skb
- * r4,r5 A,X
- * r6 *** address parameter to helper ***
- * r7-r10 scratch
- * r14 skb->data
- * r15 skb headlen
- * r16-31 M[]
- */
-
-/*
- * To consider: These helpers are so small it could be better to just
- * generate them inline. Inline code can do the simple headlen check
- * then branch directly to slow_path_XXX if required. (In fact, could
- * load a spare GPR with the address of slow_path_generic and pass size
- * as an argument, making the call site a mtlr, li and bllr.)
- */
- .globl sk_load_word
-sk_load_word:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_word_neg
- .globl sk_load_word_positive_offset
-sk_load_word_positive_offset:
- /* Are we accessing past headlen? */
- subi r_scratch1, r_HL, 4
- PPC_LCMP r_scratch1, r_addr
- blt bpf_slow_path_word
- /* Nope, just hitting the header. cr0 here is eq or gt! */
-#ifdef __LITTLE_ENDIAN__
- lwbrx r_A, r_D, r_addr
-#else
- lwzx r_A, r_D, r_addr
-#endif
- blr /* Return success, cr0 != LT */
-
- .globl sk_load_half
-sk_load_half:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_half_neg
- .globl sk_load_half_positive_offset
-sk_load_half_positive_offset:
- subi r_scratch1, r_HL, 2
- PPC_LCMP r_scratch1, r_addr
- blt bpf_slow_path_half
-#ifdef __LITTLE_ENDIAN__
- lhbrx r_A, r_D, r_addr
-#else
- lhzx r_A, r_D, r_addr
-#endif
- blr
-
- .globl sk_load_byte
-sk_load_byte:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_byte_neg
- .globl sk_load_byte_positive_offset
-sk_load_byte_positive_offset:
- PPC_LCMP r_HL, r_addr
- ble bpf_slow_path_byte
- lbzx r_A, r_D, r_addr
- blr
-
-/*
- * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value
- */
- .globl sk_load_byte_msh
-sk_load_byte_msh:
- PPC_LCMPI r_addr, 0
- blt bpf_slow_path_byte_msh_neg
- .globl sk_load_byte_msh_positive_offset
-sk_load_byte_msh_positive_offset:
- PPC_LCMP r_HL, r_addr
- ble bpf_slow_path_byte_msh
- lbzx r_X, r_D, r_addr
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-/* Call out to skb_copy_bits:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE) \
- mflr r0; \
- PPC_STL r0, PPC_LR_STKOFF(r1); \
- /* R3 goes in parameter space of caller's frame */ \
- PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- addi r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ); \
- PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
- /* R3 = r_skb, as passed */ \
- mr r4, r_addr; \
- li r6, SIZE; \
- bl skb_copy_bits; \
- nop; \
- /* R3 = 0 on success */ \
- addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
- PPC_LL r0, PPC_LR_STKOFF(r1); \
- PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- mtlr r0; \
- PPC_LCMPI r3, 0; \
- blt bpf_error; /* cr0 = LT */ \
- PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- /* Great success! */
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- /* Data value is on stack, and cr0 != LT */
- lwz r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1)
- blr
-
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_byte_msh:
- bpf_slow_path_common(1)
- lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-/* Call out to bpf_internal_load_pointer_neg_helper:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define sk_negative_common(SIZE) \
- mflr r0; \
- PPC_STL r0, PPC_LR_STKOFF(r1); \
- /* R3 goes in parameter space of caller's frame */ \
- PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
- /* R3 = r_skb, as passed */ \
- mr r4, r_addr; \
- li r5, SIZE; \
- bl bpf_internal_load_pointer_neg_helper; \
- nop; \
- /* R3 != 0 on success */ \
- addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
- PPC_LL r0, PPC_LR_STKOFF(r1); \
- PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \
- PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \
- mtlr r0; \
- PPC_LCMPLI r3, 0; \
- beq bpf_error_slow; /* cr0 = EQ */ \
- mr r_addr, r3; \
- PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \
- /* Great success! */
-
-bpf_slow_path_word_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_word_negative_offset
-sk_load_word_negative_offset:
- sk_negative_common(4)
- lwz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_half_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_half_negative_offset
-sk_load_half_negative_offset:
- sk_negative_common(2)
- lhz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_byte_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_byte_negative_offset
-sk_load_byte_negative_offset:
- sk_negative_common(1)
- lbz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_byte_msh_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_byte_msh_negative_offset
-sk_load_byte_msh_negative_offset:
- sk_negative_common(1)
- lbz r_X, 0(r_addr)
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-bpf_error_slow:
- /* fabricate a cr0 = lt */
- li r_scratch1, -1
- PPC_LCMPI r_scratch1, 0
-bpf_error:
- /* Entered with cr0 = lt */
- li r3, 0
- /* Generated code will 'blt epilogue', returning 0. */
- blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d57b46e0dd60..0f9a21783329 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* bpf_jit_comp.c: BPF JIT compiler
+/*
+ * eBPF JIT compiler
*
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
*
- * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
- * Ported to ppc32 by Denis Kirjanov <kda@linux-powerpc.org>
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
*/
#include <linux/moduleloader.h>
#include <asm/cacheflush.h>
@@ -12,640 +13,193 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/bpf.h>
-#include "bpf_jit32.h"
+#include <asm/kprobes.h>
+#include <asm/code-patching.h>
-static inline void bpf_flush_icache(void *start, void *end)
+#include "bpf_jit.h"
+
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
{
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
+ memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
-static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx)
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr)
{
- int i;
- const struct sock_filter *filter = fp->insns;
-
- if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
- /* Make stackframe */
- if (ctx->seen & SEEN_DATAREF) {
- /* If we call any helpers (for loads), save LR */
- EMIT(PPC_INST_MFLR | __PPC_RT(R0));
- PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
-
- /* Back up non-volatile regs. */
- PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D)));
- PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL)));
- }
- if (ctx->seen & SEEN_MEM) {
- /*
- * Conditionally save regs r15-r31 as some will be used
- * for M[] data.
- */
- for (i = r_M; i < (r_M+16); i++) {
- if (ctx->seen & (1 << (i-r_M)))
- PPC_BPF_STL(i, 1, -(REG_SZ*(32-i)));
- }
- }
- PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
- }
-
- if (ctx->seen & SEEN_DATAREF) {
- /*
- * If this filter needs to access skb data,
- * prepare r_D and r_HL:
- * r_HL = skb->len - skb->data_len
- * r_D = skb->data
- */
- PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
- data_len));
- PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
- PPC_SUB(r_HL, r_HL, r_scratch1);
- PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
- }
-
- if (ctx->seen & SEEN_XREG) {
- /*
- * TODO: Could also detect whether first instr. sets X and
- * avoid this (as below, with A).
- */
- PPC_LI(r_X, 0);
+ if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) {
+ PPC_JMP(exit_addr);
+ } else if (ctx->alt_exit_addr) {
+ if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4))))
+ return -1;
+ PPC_JMP(ctx->alt_exit_addr);
+ } else {
+ ctx->alt_exit_addr = ctx->idx * 4;
+ bpf_jit_build_epilogue(image, ctx);
}
- /* make sure we dont leak kernel information to user */
- if (bpf_needs_clear_a(&filter[0]))
- PPC_LI(r_A, 0);
+ return 0;
}
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+struct powerpc_jit_data {
+ /* address of rw header */
+ struct bpf_binary_header *hdr;
+ /* address of ro final header */
+ struct bpf_binary_header *fhdr;
+ u32 *addrs;
+ u8 *fimage;
+ u32 proglen;
+ struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
{
- int i;
-
- if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
- PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
- if (ctx->seen & SEEN_DATAREF) {
- PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
- PPC_MTLR(0);
- PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D)));
- PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL)));
- }
- if (ctx->seen & SEEN_MEM) {
- /* Restore any saved non-vol registers */
- for (i = r_M; i < (r_M+16); i++) {
- if (ctx->seen & (1 << (i-r_M)))
- PPC_BPF_LL(i, 1, -(REG_SZ*(32-i)));
- }
- }
- }
- /* The RETs have left a return value in R3. */
-
- PPC_BLR();
+ return true;
}
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx,
- unsigned int *addrs)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
- const struct sock_filter *filter = fp->insns;
- int flen = fp->len;
- u8 *func;
- unsigned int true_cond;
- int i;
-
- /* Start of epilogue code */
- unsigned int exit_addr = addrs[flen];
+ u32 proglen;
+ u32 alloclen;
+ u8 *image = NULL;
+ u32 *code_base;
+ u32 *addrs;
+ struct powerpc_jit_data *jit_data;
+ struct codegen_context cgctx;
+ int pass;
+ int flen;
+ struct bpf_binary_header *fhdr = NULL;
+ struct bpf_binary_header *hdr = NULL;
+ struct bpf_prog *org_fp = fp;
+ struct bpf_prog *tmp_fp;
+ bool bpf_blinded = false;
+ bool extra_pass = false;
+ u8 *fimage = NULL;
+ u32 *fcode_base;
+ u32 extable_len;
+ u32 fixup_len;
+
+ if (!fp->jit_requested)
+ return org_fp;
+
+ tmp_fp = bpf_jit_blind_constants(org_fp);
+ if (IS_ERR(tmp_fp))
+ return org_fp;
+
+ if (tmp_fp != org_fp) {
+ bpf_blinded = true;
+ fp = tmp_fp;
+ }
- for (i = 0; i < flen; i++) {
- unsigned int K = filter[i].k;
- u16 code = bpf_anc_helper(&filter[i]);
+ jit_data = fp->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ fp = org_fp;
+ goto out;
+ }
+ fp->aux->jit_data = jit_data;
+ }
+ flen = fp->len;
+ addrs = jit_data->addrs;
+ if (addrs) {
+ cgctx = jit_data->ctx;
/*
- * addrs[] maps a BPF bytecode address into a real offset from
- * the start of the body code.
+ * JIT compiled to a writable location (image/code_base) first.
+ * It is then moved to the readonly final location (fimage/fcode_base)
+ * using instruction patching.
*/
- addrs[i] = ctx->idx * 4;
-
- switch (code) {
- /*** ALU ops ***/
- case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
- ctx->seen |= SEEN_XREG;
- PPC_ADD(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
- if (!K)
- break;
- PPC_ADDI(r_A, r_A, IMM_L(K));
- if (K >= 32768)
- PPC_ADDIS(r_A, r_A, IMM_HA(K));
- break;
- case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
- ctx->seen |= SEEN_XREG;
- PPC_SUB(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
- if (!K)
- break;
- PPC_ADDI(r_A, r_A, IMM_L(-K));
- if (K >= 32768)
- PPC_ADDIS(r_A, r_A, IMM_HA(-K));
- break;
- case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
- ctx->seen |= SEEN_XREG;
- PPC_MULW(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
- if (K < 32768)
- PPC_MULI(r_A, r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_MULW(r_A, r_A, r_scratch1);
- }
- break;
- case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
- case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
- ctx->seen |= SEEN_XREG;
- PPC_CMPWI(r_X, 0);
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
- PPC_LI(r_ret, 0);
- PPC_JMP(exit_addr);
- }
- if (code == (BPF_ALU | BPF_MOD | BPF_X)) {
- PPC_DIVWU(r_scratch1, r_A, r_X);
- PPC_MULW(r_scratch1, r_X, r_scratch1);
- PPC_SUB(r_A, r_A, r_scratch1);
- } else {
- PPC_DIVWU(r_A, r_A, r_X);
- }
- break;
- case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
- PPC_LI32(r_scratch2, K);
- PPC_DIVWU(r_scratch1, r_A, r_scratch2);
- PPC_MULW(r_scratch1, r_scratch2, r_scratch1);
- PPC_SUB(r_A, r_A, r_scratch1);
- break;
- case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
- if (K == 1)
- break;
- PPC_LI32(r_scratch1, K);
- PPC_DIVWU(r_A, r_A, r_scratch1);
- break;
- case BPF_ALU | BPF_AND | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_AND(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_AND | BPF_K:
- if (!IMM_H(K))
- PPC_ANDI(r_A, r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_AND(r_A, r_A, r_scratch1);
- }
- break;
- case BPF_ALU | BPF_OR | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_OR(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_OR | BPF_K:
- if (IMM_L(K))
- PPC_ORI(r_A, r_A, IMM_L(K));
- if (K >= 65536)
- PPC_ORIS(r_A, r_A, IMM_H(K));
- break;
- case BPF_ANC | SKF_AD_ALU_XOR_X:
- case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
- ctx->seen |= SEEN_XREG;
- PPC_XOR(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
- if (IMM_L(K))
- PPC_XORI(r_A, r_A, IMM_L(K));
- if (K >= 65536)
- PPC_XORIS(r_A, r_A, IMM_H(K));
- break;
- case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
- ctx->seen |= SEEN_XREG;
- PPC_SLW(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_LSH | BPF_K:
- if (K == 0)
- break;
- else
- PPC_SLWI(r_A, r_A, K);
- break;
- case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
- ctx->seen |= SEEN_XREG;
- PPC_SRW(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
- if (K == 0)
- break;
- else
- PPC_SRWI(r_A, r_A, K);
- break;
- case BPF_ALU | BPF_NEG:
- PPC_NEG(r_A, r_A);
- break;
- case BPF_RET | BPF_K:
- PPC_LI32(r_ret, K);
- if (!K) {
- if (ctx->pc_ret0 == -1)
- ctx->pc_ret0 = i;
- }
- /*
- * If this isn't the very last instruction, branch to
- * the epilogue if we've stuff to clean up. Otherwise,
- * if there's nothing to tidy, just return. If we /are/
- * the last instruction, we're about to fall through to
- * the epilogue to return.
- */
- if (i != flen - 1) {
- /*
- * Note: 'seen' is properly valid only on pass
- * #2. Both parts of this conditional are the
- * same instruction size though, meaning the
- * first pass will still correctly determine the
- * code size/addresses.
- */
- if (ctx->seen)
- PPC_JMP(exit_addr);
- else
- PPC_BLR();
- }
- break;
- case BPF_RET | BPF_A:
- PPC_MR(r_ret, r_A);
- if (i != flen - 1) {
- if (ctx->seen)
- PPC_JMP(exit_addr);
- else
- PPC_BLR();
- }
- break;
- case BPF_MISC | BPF_TAX: /* X = A */
- PPC_MR(r_X, r_A);
- break;
- case BPF_MISC | BPF_TXA: /* A = X */
- ctx->seen |= SEEN_XREG;
- PPC_MR(r_A, r_X);
- break;
-
- /*** Constant loads/M[] access ***/
- case BPF_LD | BPF_IMM: /* A = K */
- PPC_LI32(r_A, K);
- break;
- case BPF_LDX | BPF_IMM: /* X = K */
- PPC_LI32(r_X, K);
- break;
- case BPF_LD | BPF_MEM: /* A = mem[K] */
- PPC_MR(r_A, r_M + (K & 0xf));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_LDX | BPF_MEM: /* X = mem[K] */
- PPC_MR(r_X, r_M + (K & 0xf));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_ST: /* mem[K] = A */
- PPC_MR(r_M + (K & 0xf), r_A);
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_STX: /* mem[K] = X */
- PPC_MR(r_M + (K & 0xf), r_X);
- ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
- break;
- case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
- PPC_LWZ_OFFS(r_A, r_skb, K);
- break;
- case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
- PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
- break;
-
- /*** Ancillary info loads ***/
- case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- protocol) != 2);
- PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- protocol));
- break;
- case BPF_ANC | SKF_AD_IFINDEX:
- case BPF_ANC | SKF_AD_HATYPE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
- ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
- type) != 2);
- PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
- dev));
- PPC_CMPDI(r_scratch1, 0);
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- /* Exit, returning 0; first pass hits here. */
- PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12);
- PPC_LI(r_ret, 0);
- PPC_JMP(exit_addr);
- }
- if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
- PPC_LWZ_OFFS(r_A, r_scratch1,
- offsetof(struct net_device, ifindex));
- } else {
- PPC_LHZ_OFFS(r_A, r_scratch1,
- offsetof(struct net_device, type));
- }
-
- break;
- case BPF_ANC | SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- mark));
- break;
- case BPF_ANC | SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- hash));
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
- PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- vlan_tci));
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET());
- if (PKT_VLAN_PRESENT_BIT)
- PPC_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT);
- if (PKT_VLAN_PRESENT_BIT < 7)
- PPC_ANDI(r_A, r_A, 1);
- break;
- case BPF_ANC | SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- queue_mapping) != 2);
- PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- queue_mapping));
- break;
- case BPF_ANC | SKF_AD_PKTTYPE:
- PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
- PPC_ANDI(r_A, r_A, PKT_TYPE_MAX);
- PPC_SRWI(r_A, r_A, 5);
- break;
- case BPF_ANC | SKF_AD_CPU:
- PPC_BPF_LOAD_CPU(r_A);
- break;
- /*** Absolute loads from packet header/data ***/
- case BPF_LD | BPF_W | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_word);
- goto common_load;
- case BPF_LD | BPF_H | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_half);
- goto common_load;
- case BPF_LD | BPF_B | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
- common_load:
- /* Load from [K]. */
- ctx->seen |= SEEN_DATAREF;
- PPC_FUNC_ADDR(r_scratch1, func);
- PPC_MTLR(r_scratch1);
- PPC_LI32(r_addr, K);
- PPC_BLRL();
- /*
- * Helper returns 'lt' condition on error, and an
- * appropriate return value in r3
- */
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- /*** Indirect loads from packet header/data ***/
- case BPF_LD | BPF_W | BPF_IND:
- func = sk_load_word;
- goto common_load_ind;
- case BPF_LD | BPF_H | BPF_IND:
- func = sk_load_half;
- goto common_load_ind;
- case BPF_LD | BPF_B | BPF_IND:
- func = sk_load_byte;
- common_load_ind:
- /*
- * Load from [X + K]. Negative offsets are tested for
- * in the helper functions.
- */
- ctx->seen |= SEEN_DATAREF | SEEN_XREG;
- PPC_FUNC_ADDR(r_scratch1, func);
- PPC_MTLR(r_scratch1);
- PPC_ADDI(r_addr, r_X, IMM_L(K));
- if (K >= 32768)
- PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
- PPC_BLRL();
- /* If error, cr0.LT set */
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- case BPF_LDX | BPF_B | BPF_MSH:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
- goto common_load;
- break;
-
- /*** Jump and branches ***/
- case BPF_JMP | BPF_JA:
- if (K != 0)
- PPC_JMP(addrs[i + 1 + K]);
- break;
-
- case BPF_JMP | BPF_JGT | BPF_K:
- case BPF_JMP | BPF_JGT | BPF_X:
- true_cond = COND_GT;
- goto cond_branch;
- case BPF_JMP | BPF_JGE | BPF_K:
- case BPF_JMP | BPF_JGE | BPF_X:
- true_cond = COND_GE;
- goto cond_branch;
- case BPF_JMP | BPF_JEQ | BPF_K:
- case BPF_JMP | BPF_JEQ | BPF_X:
- true_cond = COND_EQ;
- goto cond_branch;
- case BPF_JMP | BPF_JSET | BPF_K:
- case BPF_JMP | BPF_JSET | BPF_X:
- true_cond = COND_NE;
- /* Fall through */
- cond_branch:
- /* same targets, can avoid doing the test :) */
- if (filter[i].jt == filter[i].jf) {
- if (filter[i].jt > 0)
- PPC_JMP(addrs[i + 1 + filter[i].jt]);
- break;
- }
-
- switch (code) {
- case BPF_JMP | BPF_JGT | BPF_X:
- case BPF_JMP | BPF_JGE | BPF_X:
- case BPF_JMP | BPF_JEQ | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_CMPLW(r_A, r_X);
- break;
- case BPF_JMP | BPF_JSET | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_AND_DOT(r_scratch1, r_A, r_X);
- break;
- case BPF_JMP | BPF_JEQ | BPF_K:
- case BPF_JMP | BPF_JGT | BPF_K:
- case BPF_JMP | BPF_JGE | BPF_K:
- if (K < 32768)
- PPC_CMPLWI(r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_CMPLW(r_A, r_scratch1);
- }
- break;
- case BPF_JMP | BPF_JSET | BPF_K:
- if (K < 32768)
- /* PPC_ANDI is /only/ dot-form */
- PPC_ANDI(r_scratch1, r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_AND_DOT(r_scratch1, r_A,
- r_scratch1);
- }
- break;
- }
- /* Sometimes branches are constructed "backward", with
- * the false path being the branch and true path being
- * a fallthrough to the next instruction.
- */
- if (filter[i].jt == 0)
- /* Swap the sense of the branch */
- PPC_BCC(true_cond ^ COND_CMP_TRUE,
- addrs[i + 1 + filter[i].jf]);
- else {
- PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
- if (filter[i].jf != 0)
- PPC_JMP(addrs[i + 1 + filter[i].jf]);
- }
- break;
- default:
- /* The filter contains something cruel & unusual.
- * We don't handle it, but also there shouldn't be
- * anything missing from our list.
- */
- if (printk_ratelimit())
- pr_err("BPF filter opcode %04x (@%d) unsupported\n",
- filter[i].code, i);
- return -ENOTSUPP;
- }
-
+ fimage = jit_data->fimage;
+ fhdr = jit_data->fhdr;
+ proglen = jit_data->proglen;
+ hdr = jit_data->hdr;
+ image = (void *)hdr + ((void *)fimage - (void *)fhdr);
+ extra_pass = true;
+ /* During extra pass, ensure index is reset before repopulating extable entries */
+ cgctx.exentry_idx = 0;
+ goto skip_init_ctx;
}
- /* Set end-of-body-code address for exit. */
- addrs[i] = ctx->idx * 4;
- return 0;
-}
+ addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
+ if (addrs == NULL) {
+ fp = org_fp;
+ goto out_addrs;
+ }
-void bpf_jit_compile(struct bpf_prog *fp)
-{
- unsigned int proglen;
- unsigned int alloclen;
- u32 *image = NULL;
- u32 *code_base;
- unsigned int *addrs;
- struct codegen_context cgctx;
- int pass;
- int flen = fp->len;
+ memset(&cgctx, 0, sizeof(struct codegen_context));
+ bpf_jit_init_reg_mapping(&cgctx);
- if (!bpf_jit_enable)
- return;
+ /* Make sure that the stack is quadword aligned. */
+ cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
- addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL)
- return;
+ /* Scouting faux-generate pass 0 */
+ if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
+ /* We hit something illegal or unsupported. */
+ fp = org_fp;
+ goto out_addrs;
+ }
/*
- * There are multiple assembly passes as the generated code will change
- * size as it settles down, figuring out the max branch offsets/exit
- * paths required.
- *
- * The range of standard conditional branches is +/- 32Kbytes. Since
- * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
- * finish with 8 bytes/instruction. Not feasible, so long jumps are
- * used, distinct from short branches.
- *
- * Current:
- *
- * For now, both branch types assemble to 2 words (short branches padded
- * with a NOP); this is less efficient, but assembly will always complete
- * after exactly 3 passes:
- *
- * First pass: No code buffer; Program is "faux-generated" -- no code
- * emitted but maximum size of output determined (and addrs[] filled
- * in). Also, we note whether we use M[], whether we use skb data, etc.
- * All generation choices assumed to be 'worst-case', e.g. branches all
- * far (2 instructions), return path code reduction not available, etc.
- *
- * Second pass: Code buffer allocated with size determined previously.
- * Prologue generated to support features we have seen used. Exit paths
- * determined and addrs[] is filled in again, as code may be slightly
- * smaller as a result.
- *
- * Third pass: Code generated 'for real', and branch destinations
- * determined from now-accurate addrs[] map.
- *
- * Ideal:
- *
- * If we optimise this, near branches will be shorter. On the
- * first assembly pass, we should err on the side of caution and
- * generate the biggest code. On subsequent passes, branches will be
- * generated short or long and code size will reduce. With smaller
- * code, more branches may fall into the short category, and code will
- * reduce more.
- *
- * Finally, if we see one pass generate code the same size as the
- * previous pass we have converged and should now generate code for
- * real. Allocating at the end will also save the memory that would
- * otherwise be wasted by the (small) current code shrinkage.
- * Preferably, we should do a small number of passes (e.g. 5) and if we
- * haven't converged by then, get impatient and force code to generate
- * as-is, even if the odd branch would be left long. The chances of a
- * long jump are tiny with all but the most enormous of BPF filter
- * inputs, so we should usually converge on the third pass.
+ * If we have seen a tail call, we need a second pass.
+ * This is because bpf_jit_emit_common_epilogue() is called
+ * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+ * We also need a second pass if we ended up with too large
+ * a program so as to ensure BPF_EXIT branches are in range.
*/
+ if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
+ cgctx.idx = 0;
+ if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+ }
- cgctx.idx = 0;
- cgctx.seen = 0;
- cgctx.pc_ret0 = -1;
- /* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
- /* We hit something illegal or unsupported. */
- goto out;
-
+ bpf_jit_realloc_regs(&cgctx);
/*
* Pretend to build prologue, given the features we've seen. This will
* update ctgtx.idx as it pretends to output instructions, then we can
* calculate total size from idx.
*/
- bpf_jit_build_prologue(fp, 0, &cgctx);
- bpf_jit_build_epilogue(0, &cgctx);
+ bpf_jit_build_prologue(NULL, &cgctx);
+ addrs[fp->len] = cgctx.idx * 4;
+ bpf_jit_build_epilogue(NULL, &cgctx);
+
+ fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
+ extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
proglen = cgctx.idx * 4;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
- image = module_alloc(alloclen);
- if (!image)
- goto out;
+ alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
- code_base = image + (FUNCTION_DESCR_SIZE/4);
+ fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
+ bpf_jit_fill_ill_insns);
+ if (!fhdr) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+
+ if (extable_len)
+ fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+
+skip_init_ctx:
+ code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+ fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
/* Code generation passes 1-2 */
for (pass = 1; pass < 3; pass++) {
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
- bpf_jit_build_prologue(fp, code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+ cgctx.alt_exit_addr = 0;
+ bpf_jit_build_prologue(code_base, &cgctx);
+ if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass,
+ extra_pass)) {
+ bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
+ bpf_jit_binary_pack_free(fhdr, hdr);
+ fp = org_fp;
+ goto out_addrs;
+ }
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
@@ -654,31 +208,154 @@ void bpf_jit_compile(struct bpf_prog *fp)
}
if (bpf_jit_enable > 1)
- /* Note that we output the base address of the code_base
+ /*
+ * Note that we output the base address of the code_base
* rather than image, since opcodes are in code_base.
*/
bpf_jit_dump(flen, proglen, pass, code_base);
- bpf_flush_icache(code_base, code_base + (proglen/4));
-
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_ELF_ABI_V1
/* Function descriptor nastiness: Address + TOC */
- ((u64 *)image)[0] = (u64)code_base;
+ ((u64 *)image)[0] = (u64)fcode_base;
((u64 *)image)[1] = local_paca->kernel_toc;
#endif
- fp->bpf_func = (void *)image;
+ fp->bpf_func = (void *)fimage;
fp->jited = 1;
+ fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
+
+ if (!fp->is_func || extra_pass) {
+ if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+ bpf_prog_fill_jited_linfo(fp, addrs);
+out_addrs:
+ kfree(addrs);
+ kfree(jit_data);
+ fp->aux->jit_data = NULL;
+ } else {
+ jit_data->addrs = addrs;
+ jit_data->ctx = cgctx;
+ jit_data->proglen = proglen;
+ jit_data->fimage = fimage;
+ jit_data->fhdr = fhdr;
+ jit_data->hdr = hdr;
+ }
out:
- kfree(addrs);
- return;
+ if (bpf_blinded)
+ bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
+ return fp;
+}
+
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+ struct codegen_context *ctx, int insn_idx, int jmp_off,
+ int dst_reg)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex, *ex_entry;
+ u32 *fixup;
+
+ /* Populate extable entries only in the last pass */
+ if (pass != 2)
+ return 0;
+
+ if (!fp->aux->extable ||
+ WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+ return -EINVAL;
+
+ /*
+ * Program is first written to image before copying to the
+ * final location (fimage). Accordingly, update in the image first.
+ * As all offsets used are relative, copying as is to the
+ * final location should be alright.
+ */
+ pc = (unsigned long)&image[insn_idx];
+ ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
+
+ fixup = (void *)ex -
+ (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
+ (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
+
+ fixup[0] = PPC_RAW_LI(dst_reg, 0);
+ if (IS_ENABLED(CONFIG_PPC32))
+ fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
+
+ fixup[BPF_FIXUP_LEN - 1] =
+ PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
+
+ ex_entry = &ex[ctx->exentry_idx];
+
+ offset = pc - (long)&ex_entry->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex_entry->insn = offset;
+
+ offset = (long)fixup - (long)&ex_entry->fixup;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex_entry->fixup = offset;
+
+ ctx->exentry_idx++;
+ return 0;
+}
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ int err;
+
+ if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&text_mutex);
+ err = patch_instructions(dst, src, len, false);
+ mutex_unlock(&text_mutex);
+
+ return err ? ERR_PTR(err) : dst;
+}
+
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ u32 insn = BREAKPOINT_INSTRUCTION;
+ int ret;
+
+ if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+ return -EINVAL;
+
+ mutex_lock(&text_mutex);
+ ret = patch_instructions(dst, &insn, len, true);
+ mutex_unlock(&text_mutex);
+
+ return ret;
}
void bpf_jit_free(struct bpf_prog *fp)
{
- if (fp->jited)
- module_memfree(fp->bpf_func);
+ if (fp->jited) {
+ struct powerpc_jit_data *jit_data = fp->aux->jit_data;
+ struct bpf_binary_header *hdr;
+
+ /*
+ * If we fail the final pass of JIT (from jit_subprogs),
+ * the program may not be finalized yet. Call finalize here
+ * before freeing it.
+ */
+ if (jit_data) {
+ bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
+ kvfree(jit_data->addrs);
+ kfree(jit_data);
+ }
+ hdr = bpf_jit_binary_pack_hdr(fp);
+ bpf_jit_binary_pack_free(hdr, NULL);
+ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+ }
bpf_prog_unlock_free(fp);
}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
new file mode 100644
index 000000000000..2f39c50ca729
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -0,0 +1,1302 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler for PPC32
+ *
+ * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu>
+ * CS GROUP France
+ *
+ * Based on PPC64 eBPF JIT compiler by Naveen N. Rao
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ * [ prev sp ] <-------------
+ * [ nv gpr save area ] 16 * 4 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
+ * [ frame header ] 16 |
+ * sp (r1) ---> [ stack pointer ] --------------
+ */
+
+/* for gpr non volatile registers r17 to r31 (14) + tail call */
+#define BPF_PPC_STACK_SAVE (15 * 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+
+/* PPC NVR range -- update this if we ever use NVRs below r17 */
+#define BPF_PPC_NVR_MIN _R17
+#define BPF_PPC_TC _R16
+
+/* BPF register usage */
+#define TMP_REG (MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
+{
+ /* function return value */
+ ctx->b2p[BPF_REG_0] = _R12;
+ /* function arguments */
+ ctx->b2p[BPF_REG_1] = _R4;
+ ctx->b2p[BPF_REG_2] = _R6;
+ ctx->b2p[BPF_REG_3] = _R8;
+ ctx->b2p[BPF_REG_4] = _R10;
+ ctx->b2p[BPF_REG_5] = _R22;
+ /* non volatile registers */
+ ctx->b2p[BPF_REG_6] = _R24;
+ ctx->b2p[BPF_REG_7] = _R26;
+ ctx->b2p[BPF_REG_8] = _R28;
+ ctx->b2p[BPF_REG_9] = _R30;
+ /* frame pointer aka BPF_REG_10 */
+ ctx->b2p[BPF_REG_FP] = _R18;
+ /* eBPF jit internal registers */
+ ctx->b2p[BPF_REG_AX] = _R20;
+ ctx->b2p[TMP_REG] = _R31; /* 32 bits */
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+ if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC)
+ return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg);
+
+ WARN(true, "BPF JIT is asking about unknown registers, will crash the stack");
+ /* Use the hole we have left for alignment */
+ return BPF_PPC_STACKFRAME(ctx) - 4;
+}
+
+#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */
+#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */
+
+static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
+{
+ /*
+ * We only need a stack frame if:
+ * - we call other functions (kernel helpers), or
+ * - we use non volatile registers, or
+ * - we use tail call counter
+ * - the bpf program uses its stack area
+ * The latter condition is deduced from the usage of BPF_REG_FP
+ */
+ return ctx->seen & (SEEN_FUNC | SEEN_TAILCALL | SEEN_NVREG_FULL_MASK) ||
+ bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP));
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+ unsigned int nvreg_mask;
+
+ if (ctx->seen & SEEN_FUNC)
+ nvreg_mask = SEEN_NVREG_TEMP_MASK;
+ else
+ nvreg_mask = SEEN_NVREG_FULL_MASK;
+
+ while (ctx->seen & nvreg_mask &&
+ (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+ int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab));
+ int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+ int i;
+
+ for (i = BPF_REG_0; i <= TMP_REG; i++) {
+ if (ctx->b2p[i] != old)
+ continue;
+ ctx->b2p[i] = new;
+ bpf_set_seen_register(ctx, new);
+ bpf_clear_seen_register(ctx, old);
+ if (i != TMP_REG) {
+ bpf_set_seen_register(ctx, new - 1);
+ bpf_clear_seen_register(ctx, old - 1);
+ }
+ break;
+ }
+ }
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Initialize tail_call_cnt, to be skipped if we do tail calls. */
+ if (ctx->seen & SEEN_TAILCALL)
+ EMIT(PPC_RAW_LI(_R4, 0));
+ else
+ EMIT(PPC_RAW_NOP());
+
+#define BPF_TAILCALL_PROLOGUE_SIZE 4
+
+ if (bpf_has_stack_frame(ctx))
+ EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
+
+ if (ctx->seen & SEEN_TAILCALL)
+ EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+
+ /* First arg comes in as a 32 bits pointer. */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
+
+ /*
+ * We need a stack frame, but we don't necessarily need to
+ * save/restore LR unless we call other functions
+ */
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MFLR(_R0));
+
+ /*
+ * Back up non-volatile regs -- registers r18-r31
+ */
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
+
+ /* Setup frame pointer to point to the bpf stack area */
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0));
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
+ STACK_FRAME_MIN_SIZE + ctx->stack_size));
+ }
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Restore NVRs */
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+ /* Tear down our stack frame */
+ if (bpf_has_stack_frame(ctx))
+ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MTLR(_R0));
+
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ EMIT(PPC_RAW_BLR());
+}
+
+/* Relative offset needs to be calculated based on final image location */
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
+{
+ s32 rel = (s32)func - (s32)(fimage + ctx->idx);
+
+ if (image && rel < 0x2000000 && rel >= -0x2000000) {
+ EMIT(PPC_RAW_BL(rel));
+ } else {
+ /* Load function address into r0 */
+ EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
+ EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func)));
+ EMIT(PPC_RAW_MTCTR(_R0));
+ EMIT(PPC_RAW_BCTRL());
+ }
+
+ return 0;
+}
+
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+ /*
+ * By now, the eBPF program has already setup parameters in r3-r6
+ * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+ * r5-r6/BPF_REG_2 - pointer to bpf_array
+ * r7-r8/BPF_REG_3 - index in bpf_array
+ */
+ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2);
+ int b2p_index = bpf_to_ppc(BPF_REG_3);
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+ EMIT(PPC_RAW_CMPLW(b2p_index, _R0));
+ EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ PPC_BCC_SHORT(COND_GE, out);
+
+ /*
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT));
+ /* tail_call_cnt++; */
+ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1));
+ PPC_BCC_SHORT(COND_GE, out);
+
+ /* prog = array->ptrs[index]; */
+ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
+ EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
+ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLWI(_R3, 0));
+ PPC_BCC_SHORT(COND_EQ, out);
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
+ EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
+ EMIT(PPC_RAW_MTCTR(_R3));
+
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
+
+ /* Put tail_call_cnt in r4 */
+ EMIT(PPC_RAW_MR(_R4, _R0));
+
+ /* tear restore NVRs, ... */
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ EMIT(PPC_RAW_BCTR());
+
+ /* out: */
+ return 0;
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
+ u32 *addrs, int pass, bool extra_pass)
+{
+ const struct bpf_insn *insn = fp->insnsi;
+ int flen = fp->len;
+ int i, ret;
+
+ /* Start of epilogue code - will only be valid 2nd pass onwards */
+ u32 exit_addr = addrs[flen];
+
+ for (i = 0; i < flen; i++) {
+ u32 code = insn[i].code;
+ u32 prevcode = i ? insn[i - 1].code : 0;
+ u32 dst_reg = bpf_to_ppc(insn[i].dst_reg);
+ u32 dst_reg_h = dst_reg - 1;
+ u32 src_reg = bpf_to_ppc(insn[i].src_reg);
+ u32 src_reg_h = src_reg - 1;
+ u32 src2_reg = dst_reg;
+ u32 src2_reg_h = dst_reg_h;
+ u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
+ u32 tmp_reg = bpf_to_ppc(TMP_REG);
+ u32 size = BPF_SIZE(code);
+ u32 save_reg, ret_reg;
+ s16 off = insn[i].off;
+ s32 imm = insn[i].imm;
+ bool func_addr_fixed;
+ u64 func_addr;
+ u32 true_cond;
+ u32 tmp_idx;
+ int j;
+
+ if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) &&
+ (BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) &&
+ BPF_OP(prevcode) == BPF_MOV && BPF_SRC(prevcode) == BPF_X &&
+ insn[i - 1].dst_reg == insn[i].dst_reg && insn[i - 1].imm != 1) {
+ src2_reg = bpf_to_ppc(insn[i - 1].src_reg);
+ src2_reg_h = src2_reg - 1;
+ ctx->idx = addrs[i - 1] / 4;
+ }
+
+ /*
+ * addrs[] maps a BPF bytecode address into a real offset from
+ * the start of the body code.
+ */
+ addrs[i] = ctx->idx * 4;
+
+ /*
+ * As an optimization, we note down which registers
+ * are used so that we can only save/restore those in our
+ * prologue and epilogue. We do this here regardless of whether
+ * the actual BPF instruction uses src/dst registers or not
+ * (for instance, BPF_CALL does not use them). The expectation
+ * is that those instructions will have src_reg/dst_reg set to
+ * 0. Even otherwise, we just lose some prologue/epilogue
+ * optimization but everything else should work without
+ * any issues.
+ */
+ if (dst_reg >= 3 && dst_reg < 32) {
+ bpf_set_seen_register(ctx, dst_reg);
+ bpf_set_seen_register(ctx, dst_reg_h);
+ }
+
+ if (src_reg >= 3 && src_reg < 32) {
+ bpf_set_seen_register(ctx, src_reg);
+ bpf_set_seen_register(ctx, src_reg_h);
+ }
+
+ switch (code) {
+ /*
+ * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+ */
+ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+ EMIT(PPC_RAW_ADD(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+ EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_ADDE(dst_reg_h, src2_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+ EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, src2_reg));
+ EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, src2_reg_h));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+ imm = -imm;
+ fallthrough;
+ case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (IMM_HA(imm) & 0xffff) {
+ EMIT(PPC_RAW_ADDIS(dst_reg, src2_reg, IMM_HA(imm)));
+ src2_reg = dst_reg;
+ }
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_ADDI(dst_reg, src2_reg, IMM_L(imm)));
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+ imm = -imm;
+ fallthrough;
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ break;
+ }
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_ADDIC(dst_reg, src2_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, _R0));
+ }
+ if (imm >= 0 || (BPF_OP(code) == BPF_SUB && imm == 0x80000000))
+ EMIT(PPC_RAW_ADDZE(dst_reg_h, src2_reg_h));
+ else
+ EMIT(PPC_RAW_ADDME(dst_reg_h, src2_reg_h));
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_MULW(_R0, src2_reg, src_reg_h));
+ EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_MULHWU(tmp_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+ if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (imm == -1) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ } else if (is_power_of_2((u32)imm)) {
+ EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, ilog2(imm)));
+ } else if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_MULI(dst_reg, src2_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, _R0));
+ }
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+ if (!imm) {
+ PPC_LI32(dst_reg, 0);
+ PPC_LI32(dst_reg_h, 0);
+ } else if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else if (imm == -1) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+ } else if (imm > 0 && is_power_of_2(imm)) {
+ imm = ilog2(imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31));
+ EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm));
+ } else {
+ bpf_set_seen_register(ctx, tmp_reg);
+ PPC_LI32(tmp_reg, imm);
+ EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, tmp_reg));
+ if (imm < 0)
+ EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, src2_reg));
+ EMIT(PPC_RAW_MULHWU(_R0, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
+ }
+ break;
+ case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+ EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+ EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg));
+ EMIT(PPC_RAW_MULW(_R0, src_reg, _R0));
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+ return -EOPNOTSUPP;
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+ return -EOPNOTSUPP;
+ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+ if (!imm)
+ return -EINVAL;
+ if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (is_power_of_2((u32)imm)) {
+ EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm)));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0));
+ }
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+ if (!imm)
+ return -EINVAL;
+
+ if (!is_power_of_2((u32)imm)) {
+ bpf_set_seen_register(ctx, tmp_reg);
+ PPC_LI32(tmp_reg, imm);
+ EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0));
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+ } else if (imm == 1) {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ } else {
+ imm = ilog2((u32)imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - imm, 31));
+ }
+ break;
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+ if (!imm)
+ return -EINVAL;
+ if (imm < 0)
+ imm = -imm;
+ if (!is_power_of_2(imm))
+ return -EOPNOTSUPP;
+ if (imm == 1)
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ else
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - ilog2(imm), 31));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+ if (!imm)
+ return -EINVAL;
+ if (!is_power_of_2(abs(imm)))
+ return -EOPNOTSUPP;
+
+ if (imm < 0) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+ imm = -imm;
+ src2_reg = dst_reg;
+ }
+ if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else {
+ imm = ilog2(imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm));
+ }
+ break;
+ case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+ EMIT(PPC_RAW_NEG(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+ break;
+
+ /*
+ * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+ */
+ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_AND(dst_reg_h, src2_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+ if (imm >= 0)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ fallthrough;
+ case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+ if (!IMM_H(imm)) {
+ EMIT(PPC_RAW_ANDI(dst_reg, src2_reg, IMM_L(imm)));
+ } else if (!IMM_L(imm)) {
+ EMIT(PPC_RAW_ANDIS(dst_reg, src2_reg, IMM_H(imm)));
+ } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0,
+ 32 - fls(imm), 32 - ffs(imm)));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_AND(dst_reg, src2_reg, _R0));
+ }
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+ EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, src2_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+ EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+ /* Sign-extended */
+ if (imm < 0)
+ EMIT(PPC_RAW_LI(dst_reg_h, -1));
+ fallthrough;
+ case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+ if (IMM_L(imm)) {
+ EMIT(PPC_RAW_ORI(dst_reg, src2_reg, IMM_L(imm)));
+ src2_reg = dst_reg;
+ }
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_ORIS(dst_reg, src2_reg, IMM_H(imm)));
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+ if (dst_reg == src_reg) {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else {
+ EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_XOR(dst_reg_h, src2_reg_h, src_reg_h));
+ }
+ break;
+ case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+ if (dst_reg == src_reg)
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ else
+ EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+ if (imm < 0)
+ EMIT(PPC_RAW_NOR(dst_reg_h, src2_reg_h, src2_reg_h));
+ fallthrough;
+ case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+ if (IMM_L(imm)) {
+ EMIT(PPC_RAW_XORI(dst_reg, src2_reg, IMM_L(imm)));
+ src2_reg = dst_reg;
+ }
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_XORIS(dst_reg, src2_reg, IMM_H(imm)));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+ EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+ EMIT(PPC_RAW_SLW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_SRW(_R0, src2_reg, _R0));
+ EMIT(PPC_RAW_SLW(tmp_reg, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0));
+ EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
+ if (imm)
+ EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm));
+ else
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_RLWIMI(dst_reg_h, src2_reg, imm, 32 - imm, 31));
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, imm, 0, 31 - imm));
+ } else if (imm < 64) {
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ } else {
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ }
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+ EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+ EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0));
+ EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+ EMIT(PPC_RAW_SRW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+ if (imm)
+ EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, imm));
+ else
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, 32 - imm, imm, 31));
+ } else if (imm < 64) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg_h, 64 - imm, imm - 32, 31));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+ EMIT(PPC_RAW_SRAW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+ EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+ EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26));
+ EMIT(PPC_RAW_SRAW(tmp_reg, src2_reg_h, tmp_reg));
+ EMIT(PPC_RAW_SRAW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+ if (imm)
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, imm));
+ else
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm));
+ } else if (imm < 64) {
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, imm - 32));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+ } else {
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, 31));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+ }
+ break;
+
+ /*
+ * MOV
+ */
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ if (dst_reg == src_reg)
+ break;
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+ /* special mov32 for zext */
+ if (imm == 1)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else if (dst_reg != src_reg)
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+ PPC_LI32(dst_reg, imm);
+ PPC_EX32(dst_reg_h, imm);
+ break;
+ case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+ PPC_LI32(dst_reg, imm);
+ break;
+
+ /*
+ * BPF_FROM_BE/LE
+ */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ switch (imm) {
+ case 16:
+ /* Copy 16 bits to upper part */
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg, 16, 0, 15));
+ /* Rotate 8 bits right & mask */
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31));
+ break;
+ case 32:
+ /*
+ * Rotate word left by 8 bits:
+ * 2 bytes are already in their final position
+ * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+ */
+ EMIT(PPC_RAW_RLWINM(_R0, src2_reg, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, _R0));
+ break;
+ case 64:
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_RLWINM(tmp_reg, src2_reg, 8, 0, 31));
+ EMIT(PPC_RAW_RLWINM(_R0, src2_reg_h, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 0, 7));
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 16, 23));
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, _R0));
+ EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
+ break;
+ }
+ break;
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ switch (imm) {
+ case 16:
+ /* zero-extend 16 bits into 32 bits */
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 16, 31));
+ break;
+ case 32:
+ case 64:
+ /* nop */
+ break;
+ }
+ break;
+
+ /*
+ * BPF_ST NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
+ /*
+ * BPF_ST(X)
+ */
+ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STB(_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STH(_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+ EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
+ break;
+ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4));
+ PPC_EX32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off));
+ break;
+
+ /*
+ * BPF_STX ATOMIC (atomic ops)
+ */
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ save_reg = _R0;
+ ret_reg = src_reg;
+
+ bpf_set_seen_register(ctx, tmp_reg);
+ bpf_set_seen_register(ctx, ax_reg);
+
+ /* Get offset into TMP_REG */
+ EMIT(PPC_RAW_LI(tmp_reg, off));
+ tmp_idx = ctx->idx * 4;
+ /* load value from memory into r0 */
+ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
+
+ /* Save old value in BPF_REG_AX */
+ if (imm & BPF_FETCH)
+ EMIT(PPC_RAW_MR(ax_reg, _R0));
+
+ switch (imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
+ break;
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ EMIT(PPC_RAW_AND(_R0, _R0, src_reg));
+ break;
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ EMIT(PPC_RAW_OR(_R0, _R0, src_reg));
+ break;
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
+ break;
+ case BPF_CMPXCHG:
+ /*
+ * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+ * in src_reg for other cases.
+ */
+ ret_reg = bpf_to_ppc(BPF_REG_0);
+
+ /* Compare with old value in BPF_REG_0 */
+ EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
+ /* Don't set if different from old value */
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+ fallthrough;
+ case BPF_XCHG:
+ save_reg = src_reg;
+ break;
+ default:
+ pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
+ code, i);
+ return -EOPNOTSUPP;
+ }
+
+ /* store new value */
+ EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
+ /* we're done if this succeeded */
+ PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+ /* For the BPF_FETCH variant, get old data into src_reg */
+ if (imm & BPF_FETCH) {
+ EMIT(PPC_RAW_MR(ret_reg, ax_reg));
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
+ }
+ break;
+
+ case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */
+ return -EOPNOTSUPP;
+
+ /*
+ * BPF_LDX
+ */
+ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr is kernel address (see is_kernel_addr()), otherwise
+ * set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ PPC_LI32(_R0, TASK_SIZE - off);
+ EMIT(PPC_RAW_CMPLW(src_reg, _R0));
+ PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * For BPF_DW case, "li reg_h,0" would be needed when
+ * !fp->aux->verifier_zext. Emit NOP otherwise.
+ *
+ * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
+ * if necessary. So, jump there instead of emitting an
+ * additional "li reg_h,0" instruction.
+ */
+ if (size == BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else
+ EMIT(PPC_RAW_NOP());
+ /*
+ * Need to jump two instructions instead of one for BPF_DW case
+ * as there are two load instructions for dst_reg_h & dst_reg
+ * respectively.
+ */
+ if (size == BPF_DW)
+ PPC_JMP((ctx->idx + 3) * 4);
+ else
+ PPC_JMP((ctx->idx + 2) * 4);
+ }
+
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ break;
+ }
+
+ if (size != BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ int insn_idx = ctx->idx - 1;
+ int jmp_off = 4;
+
+ /*
+ * In case of BPF_DW, two lwz instructions are emitted, one
+ * for higher 32-bit and another for lower 32-bit. So, set
+ * ex->insn to the first of the two and jump over both
+ * instructions in fixup.
+ *
+ * Similarly, with !verifier_zext, two instructions are
+ * emitted for BPF_B/H/W case. So, set ex->insn to the
+ * instruction that could fault and skip over both
+ * instructions.
+ */
+ if (size == BPF_DW || !fp->aux->verifier_zext) {
+ insn_idx -= 1;
+ jmp_off += 4;
+ }
+
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
+ jmp_off, dst_reg);
+ if (ret)
+ return ret;
+ }
+ break;
+
+ /*
+ * Doubleword load
+ * 16 byte instruction that uses two 'struct bpf_insn'
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ tmp_idx = ctx->idx;
+ PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+ PPC_LI32(dst_reg, (u32)insn[i].imm);
+ /* padding to allow full 4 instructions for later patching */
+ if (!image)
+ for (j = ctx->idx - tmp_idx; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
+ /* Adjust for two bpf instructions */
+ addrs[++i] = ctx->idx * 4;
+ break;
+
+ /*
+ * Return/Exit
+ */
+ case BPF_JMP | BPF_EXIT:
+ /*
+ * If this isn't the very last instruction, branch to
+ * the epilogue. If we _are_ the last instruction,
+ * we'll just fall through to the epilogue.
+ */
+ if (i != flen - 1) {
+ ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr);
+ if (ret)
+ return ret;
+ }
+ /* else fall through to the epilogue */
+ break;
+
+ /*
+ * Call kernel helper or bpf function
+ */
+ case BPF_JMP | BPF_CALL:
+ ctx->seen |= SEEN_FUNC;
+
+ ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ &func_addr, &func_addr_fixed);
+ if (ret < 0)
+ return ret;
+
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) {
+ EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8));
+ EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12));
+ }
+
+ ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
+ if (ret)
+ return ret;
+
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3));
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4));
+ break;
+
+ /*
+ * Jumps and branches
+ */
+ case BPF_JMP | BPF_JA:
+ PPC_JMP(addrs[i + 1 + off]);
+ break;
+
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ true_cond = COND_GT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ true_cond = COND_LT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ true_cond = COND_GE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ true_cond = COND_LE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ true_cond = COND_EQ;
+ goto cond_branch;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ true_cond = COND_NE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ true_cond = COND_NE;
+ /* fallthrough; */
+
+cond_branch:
+ switch (code) {
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSET | BPF_X:
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JSET | BPF_X: {
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ /*
+ * Need sign-extended load, so only positive
+ * values can be used as imm in cmplwi
+ */
+ if (imm >= 0 && imm < 32768) {
+ EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ /* sign-extending load ... but unsigned comparison */
+ PPC_EX32(_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0));
+ PPC_LI32(_R0, imm);
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ if (imm >= 0 && imm < 65536) {
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+ }
+ break;
+ }
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ if (imm >= 0 && imm < 65536) {
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+ PPC_LI32(_R0, imm);
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /*
+ * signed comparison, so any 16-bit value
+ * can be used in cmpwi
+ */
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_CMPW(dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= 0 && imm < 32768) {
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ if (imm < 0) {
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ }
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= 0 && imm < 32768) {
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
+ }
+ break;
+ }
+ PPC_BCC(true_cond, addrs[i + 1 + off]);
+ break;
+
+ /*
+ * Tail call
+ */
+ case BPF_JMP | BPF_TAIL_CALL:
+ ctx->seen |= SEEN_TAILCALL;
+ ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ if (ret < 0)
+ return ret;
+ break;
+
+ default:
+ /*
+ * The filter contains something cruel & unusual.
+ * We don't handle it, but also there shouldn't be
+ * anything missing from our list.
+ */
+ pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i);
+ return -EOPNOTSUPP;
+ }
+ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext &&
+ !insn_is_zext(&insn[i + 1]) && !(BPF_OP(code) == BPF_END && imm == 64))
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+
+ /* Set end-of-body-code address for exit. */
+ addrs[i] = ctx->idx * 4;
+
+ return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index be3517ef0574..79f23974a320 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -15,30 +15,63 @@
#include <linux/if_vlan.h>
#include <asm/kprobes.h>
#include <linux/bpf.h>
+#include <asm/security_features.h>
-#include "bpf_jit64.h"
+#include "bpf_jit.h"
-static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
-{
- memset32(area, BREAKPOINT_INSTRUCTION, size/4);
-}
+/*
+ * Stack layout:
+ * Ensure the top half (upto local_tmp_var) stays consistent
+ * with our redzone usage.
+ *
+ * [ prev sp ] <-------------
+ * [ nv gpr save area ] 5*8 |
+ * [ tail_call_cnt ] 8 |
+ * [ local_tmp_var ] 16 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
+ * [ frame header ] 32/112 |
+ * sp (r1) ---> [ stack pointer ] --------------
+ */
-static inline void bpf_flush_icache(void *start, void *end)
-{
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
-}
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE (5*8)
+/* for bpf JIT code internal usage */
+#define BPF_PPC_STACK_LOCALS 24
+/* stack frame excluding BPF stack, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
+ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
-static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
-{
- return (ctx->seen & (1 << (31 - b2p[i])));
-}
+/* BPF register usage */
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
-static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+/* BPF to ppc register mappings */
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
{
- ctx->seen |= (1 << (31 - b2p[i]));
+ /* function return value */
+ ctx->b2p[BPF_REG_0] = _R8;
+ /* function arguments */
+ ctx->b2p[BPF_REG_1] = _R3;
+ ctx->b2p[BPF_REG_2] = _R4;
+ ctx->b2p[BPF_REG_3] = _R5;
+ ctx->b2p[BPF_REG_4] = _R6;
+ ctx->b2p[BPF_REG_5] = _R7;
+ /* non volatile registers */
+ ctx->b2p[BPF_REG_6] = _R27;
+ ctx->b2p[BPF_REG_7] = _R28;
+ ctx->b2p[BPF_REG_8] = _R29;
+ ctx->b2p[BPF_REG_9] = _R30;
+ /* frame pointer aka BPF_REG_10 */
+ ctx->b2p[BPF_REG_FP] = _R31;
+ /* eBPF jit internal registers */
+ ctx->b2p[BPF_REG_AX] = _R12;
+ ctx->b2p[TMP_REG_1] = _R9;
+ ctx->b2p[TMP_REG_2] = _R10;
}
+/* PPC NVR range -- update this if we ever use NVRs below r27 */
+#define BPF_PPC_NVR_MIN _R27
+
static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
{
/*
@@ -47,7 +80,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
* - the bpf program uses its stack area
* The latter condition is deduced from the usage of BPF_REG_FP
*/
- return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
+ return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP));
}
/*
@@ -56,9 +89,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
* [ prev sp ] <-------------
* [ ... ] |
* sp (r1) ---> [ stack pointer ] --------------
- * [ nv gpr save area ] 6*8
+ * [ nv gpr save area ] 5*8
* [ tail_call_cnt ] 8
- * [ local_tmp_var ] 8
+ * [ local_tmp_var ] 16
* [ unused red zone ] 208 bytes protected
*/
static int bpf_jit_stack_local(struct codegen_context *ctx)
@@ -66,12 +99,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx)
if (bpf_has_stack_frame(ctx))
return STACK_FRAME_MIN_SIZE + ctx->stack_size;
else
- return -(BPF_PPC_STACK_SAVE + 16);
+ return -(BPF_PPC_STACK_SAVE + 24);
}
static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
{
- return bpf_jit_stack_local(ctx) + 8;
+ return bpf_jit_stack_local(ctx) + 16;
}
static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
@@ -85,37 +118,44 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
BUG();
}
-static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)));
+#endif
+
/*
* Initialize tail_call_cnt if we do tail calls.
* Otherwise, put in NOPs so that it can be skipped when we are
* invoked through a tail call.
*/
if (ctx->seen & SEEN_TAILCALL) {
- PPC_LI(b2p[TMP_REG_1], 0);
+ EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0));
/* this goes in the redzone */
- PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8)));
} else {
- PPC_NOP();
- PPC_NOP();
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
}
-#define BPF_TAILCALL_PROLOGUE_SIZE 8
-
if (bpf_has_stack_frame(ctx)) {
/*
* We need a stack frame, but we don't necessarily need to
* save/restore LR unless we call other functions
*/
if (ctx->seen & SEEN_FUNC) {
- EMIT(PPC_INST_MFLR | __PPC_RT(R0));
- PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
+ EMIT(PPC_RAW_MFLR(_R0));
+ EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF));
}
- PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
+ EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size)));
}
/*
@@ -124,13 +164,13 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
* in the protected zone below the previous stack frame
*/
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
- if (bpf_is_seen_register(ctx, i))
- PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
+ EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
/* Setup frame pointer to point to the bpf stack area */
- if (bpf_is_seen_register(ctx, BPF_REG_FP))
- PPC_ADDI(b2p[BPF_REG_FP], 1,
- STACK_FRAME_MIN_SIZE + ctx->stack_size);
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
+ STACK_FRAME_MIN_SIZE + ctx->stack_size));
}
static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -139,92 +179,101 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
/* Restore NVRs */
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
- if (bpf_is_seen_register(ctx, i))
- PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
+ EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
/* Tear down our stack frame */
if (bpf_has_stack_frame(ctx)) {
- PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
+ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size));
if (ctx->seen & SEEN_FUNC) {
- PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
- PPC_MTLR(0);
+ EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
+ EMIT(PPC_RAW_MTLR(_R0));
}
}
}
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
{
bpf_jit_emit_common_epilogue(image, ctx);
/* Move result to r3 */
- PPC_MR(3, b2p[BPF_REG_0]);
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
- PPC_BLR();
+ EMIT(PPC_RAW_BLR());
}
-static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
- u64 func)
+static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func)
{
-#ifdef PPC64_ELF_ABI_v1
- /* func points to the function descriptor */
- PPC_LI64(b2p[TMP_REG_2], func);
- /* Load actual entry point from function descriptor */
- PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
- /* ... and move it to LR */
- PPC_MTLR(b2p[TMP_REG_1]);
- /*
- * Load TOC from function descriptor at offset 8.
- * We can clobber r2 since we get called through a
- * function pointer (so caller will save/restore r2)
- * and since we don't use a TOC ourself.
- */
- PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
- /* We can clobber r12 */
- PPC_FUNC_ADDR(12, func);
- PPC_MTLR(12);
-#endif
- PPC_BLRL();
+ unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
+ long reladdr;
+
+ if (WARN_ON_ONCE(!core_kernel_text(func_addr)))
+ return -EINVAL;
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ reladdr = func_addr - CTX_NIA(ctx);
+
+ if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+ pr_err("eBPF: address of %ps out of range of pcrel address.\n",
+ (void *)func);
+ return -ERANGE;
+ }
+ /* pla r12,addr */
+ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
+ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTR());
+
+ } else {
+ reladdr = func_addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func);
+ return -ERANGE;
+ }
+
+ EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr)));
+ EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ }
+
+ return 0;
}
-static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
- u64 func)
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
{
unsigned int i, ctx_idx = ctx->idx;
+ if (WARN_ON_ONCE(func && is_module_text_address(func)))
+ return -EINVAL;
+
+ /* skip past descriptor if elf v1 */
+ func += FUNCTION_DESCR_SIZE;
+
/* Load function address into r12 */
- PPC_LI64(12, func);
+ PPC_LI64(_R12, func);
/* For bpf-to-bpf function calls, the callee's address is unknown
* until the last extra pass. As seen above, we use PPC_LI64() to
* load the callee's address, but this may optimize the number of
* instructions required based on the nature of the address.
*
- * Since we don't want the number of instructions emitted to change,
+ * Since we don't want the number of instructions emitted to increase,
* we pad the optimized PPC_LI64() call with NOPs to guarantee that
* we always have a five-instruction sequence, which is the maximum
* that PPC_LI64() can emit.
*/
- for (i = ctx->idx - ctx_idx; i < 5; i++)
- PPC_NOP();
+ if (!image)
+ for (i = ctx->idx - ctx_idx; i < 5; i++)
+ EMIT(PPC_RAW_NOP());
-#ifdef PPC64_ELF_ABI_v1
- /*
- * Load TOC from function descriptor at offset 8.
- * We can clobber r2 since we get called through a
- * function pointer (so caller will save/restore r2)
- * and since we don't use a TOC ourself.
- */
- PPC_BPF_LL(2, 12, 8);
- /* Load actual entry point from function descriptor */
- PPC_BPF_LL(12, 12, 0);
-#endif
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
- PPC_MTLR(12);
- PPC_BLRL();
+ return 0;
}
-static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
{
/*
* By now, the eBPF program has already setup parameters in r3, r4 and r5
@@ -232,67 +281,90 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
* r4/BPF_REG_2 - pointer to bpf_array
* r5/BPF_REG_3 - index in bpf_array
*/
- int b2p_bpf_array = b2p[BPF_REG_2];
- int b2p_index = b2p[BPF_REG_3];
+ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2);
+ int b2p_index = bpf_to_ppc(BPF_REG_3);
+ int bpf_tailcall_prologue_size = 8;
+
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ bpf_tailcall_prologue_size += 4; /* skip past the toc load */
/*
* if (index >= array->map.max_entries)
* goto out;
*/
- PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
- PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
- PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
- PPC_BCC(COND_GE, out);
+ EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+ EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
+ EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1)));
+ PPC_BCC_SHORT(COND_GE, out);
/*
- * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
* goto out;
*/
- PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
- PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
- PPC_BCC(COND_GT, out);
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
+ EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ PPC_BCC_SHORT(COND_GE, out);
/*
* tail_call_cnt++;
*/
- PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
- PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1));
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
/* prog = array->ptrs[index]; */
- PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
- PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
- PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+ EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8));
+ EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array));
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs)));
/*
* if (prog == NULL)
* goto out;
*/
- PPC_CMPLDI(b2p[TMP_REG_1], 0);
- PPC_BCC(COND_EQ, out);
+ EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0));
+ PPC_BCC_SHORT(COND_EQ, out);
/* goto *(prog->bpf_func + prologue_size); */
- PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
-#ifdef PPC64_ELF_ABI_v1
- /* skip past the function descriptor */
- PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
- FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
-#else
- PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
-#endif
- PPC_MTCTR(b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func)));
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1),
+ FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size));
+ EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1)));
/* tear down stack, restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);
- PPC_BCTR();
+ EMIT(PPC_RAW_BCTR());
+
/* out: */
+ return 0;
}
+/*
+ * We spill into the redzone always, even if the bpf program has its own stackframe.
+ * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
+ */
+void bpf_stf_barrier(void);
+
+asm (
+" .global bpf_stf_barrier ;"
+" bpf_stf_barrier: ;"
+" std 21,-64(1) ;"
+" std 22,-56(1) ;"
+" sync ;"
+" ld 21,-64(1) ;"
+" ld 22,-56(1) ;"
+" ori 31,31,0 ;"
+" .rept 14 ;"
+" b 1f ;"
+" 1: ;"
+" .endr ;"
+" blr ;"
+);
+
/* Assemble the body code between the prologue & epilogue */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx,
- u32 *addrs, bool extra_pass)
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
+ u32 *addrs, int pass, bool extra_pass)
{
+ enum stf_barrier_type stf_barrier = stf_barrier_type_get();
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
int i, ret;
@@ -302,8 +374,12 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
for (i = 0; i < flen; i++) {
u32 code = insn[i].code;
- u32 dst_reg = b2p[insn[i].dst_reg];
- u32 src_reg = b2p[insn[i].src_reg];
+ u32 dst_reg = bpf_to_ppc(insn[i].dst_reg);
+ u32 src_reg = bpf_to_ppc(insn[i].src_reg);
+ u32 size = BPF_SIZE(code);
+ u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
+ u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
+ u32 save_reg, ret_reg;
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
@@ -311,6 +387,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
u64 imm64;
u32 true_cond;
u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -330,9 +407,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
* any issues.
*/
if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
- bpf_set_seen_register(ctx, insn[i].dst_reg);
+ bpf_set_seen_register(ctx, dst_reg);
if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
- bpf_set_seen_register(ctx, insn[i].src_reg);
+ bpf_set_seen_register(ctx, src_reg);
switch (code) {
/*
@@ -340,67 +417,70 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
*/
case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
- PPC_ADD(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
- PPC_SUB(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
- case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+ if (!imm) {
+ goto bpf_alu32_trunc;
+ } else if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
+ } else {
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
- if (BPF_OP(code) == BPF_SUB)
- imm = -imm;
- if (imm) {
- if (imm >= -32768 && imm < 32768)
- PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
- else {
- PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
- }
+ if (!imm) {
+ goto bpf_alu32_trunc;
+ } else if (imm > -32768 && imm <= 32768) {
+ EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm)));
+ } else {
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
}
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
if (BPF_CLASS(code) == BPF_ALU)
- PPC_MULW(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
else
- PPC_MULD(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
if (imm >= -32768 && imm < 32768)
- PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
+ EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
else {
- PPC_LI32(b2p[TMP_REG_1], imm);
+ PPC_LI32(tmp1_reg, imm);
if (BPF_CLASS(code) == BPF_ALU)
- PPC_MULW(dst_reg, dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg));
else
- PPC_MULD(dst_reg, dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg));
}
goto bpf_alu32_trunc;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
if (BPF_OP(code) == BPF_MOD) {
- PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
- PPC_MULW(b2p[TMP_REG_1], src_reg,
- b2p[TMP_REG_1]);
- PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
} else
- PPC_DIVWU(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
if (BPF_OP(code) == BPF_MOD) {
- PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg);
- PPC_MULD(b2p[TMP_REG_1], src_reg,
- b2p[TMP_REG_1]);
- PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
} else
- PPC_DIVDU(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
@@ -408,42 +488,38 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
if (imm == 0)
return -EINVAL;
- else if (imm == 1)
- goto bpf_alu32_trunc;
+ if (imm == 1) {
+ if (BPF_OP(code) == BPF_DIV) {
+ goto bpf_alu32_trunc;
+ } else {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ break;
+ }
+ }
- PPC_LI32(b2p[TMP_REG_1], imm);
+ PPC_LI32(tmp1_reg, imm);
switch (BPF_CLASS(code)) {
case BPF_ALU:
if (BPF_OP(code) == BPF_MOD) {
- PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
- b2p[TMP_REG_1]);
- PPC_MULW(b2p[TMP_REG_1],
- b2p[TMP_REG_1],
- b2p[TMP_REG_2]);
- PPC_SUB(dst_reg, dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
} else
- PPC_DIVWU(dst_reg, dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg));
break;
case BPF_ALU64:
if (BPF_OP(code) == BPF_MOD) {
- PPC_DIVDU(b2p[TMP_REG_2], dst_reg,
- b2p[TMP_REG_1]);
- PPC_MULD(b2p[TMP_REG_1],
- b2p[TMP_REG_1],
- b2p[TMP_REG_2]);
- PPC_SUB(dst_reg, dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
} else
- PPC_DIVDU(dst_reg, dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg));
break;
}
goto bpf_alu32_trunc;
case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
case BPF_ALU64 | BPF_NEG: /* dst = -dst */
- PPC_NEG(dst_reg, dst_reg);
+ EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
goto bpf_alu32_trunc;
/*
@@ -451,101 +527,101 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
*/
case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
- PPC_AND(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
if (!IMM_H(imm))
- PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
+ EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
else {
/* Sign-extended */
- PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
}
goto bpf_alu32_trunc;
case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
- PPC_OR(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
/* Sign-extended */
- PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
} else {
if (IMM_L(imm))
- PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
+ EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
if (IMM_H(imm))
- PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
+ EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
}
goto bpf_alu32_trunc;
case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
- PPC_XOR(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
/* Sign-extended */
- PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
} else {
if (IMM_L(imm))
- PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
+ EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
if (IMM_H(imm))
- PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
+ EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
}
goto bpf_alu32_trunc;
case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
/* slw clears top 32 bits */
- PPC_SLW(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
/* skip zero extension move, but set address map. */
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
- PPC_SLD(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
/* with imm 0, we still need to clear top 32 bits */
- PPC_SLWI(dst_reg, dst_reg, imm);
+ EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
if (imm != 0)
- PPC_SLDI(dst_reg, dst_reg, imm);
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm));
break;
case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
- PPC_SRW(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
- PPC_SRD(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
- PPC_SRWI(dst_reg, dst_reg, imm);
+ EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
if (imm != 0)
- PPC_SRDI(dst_reg, dst_reg, imm);
+ EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm));
break;
case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
- PPC_SRAW(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
- PPC_SRAD(dst_reg, dst_reg, src_reg);
+ EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg));
break;
case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
- PPC_SRAWI(dst_reg, dst_reg, imm);
+ EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
if (imm != 0)
- PPC_SRADI(dst_reg, dst_reg, imm);
+ EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm));
break;
/*
@@ -555,10 +631,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
if (imm == 1) {
/* special mov32 for zext */
- PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
break;
}
- PPC_MR(dst_reg, src_reg);
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
goto bpf_alu32_trunc;
case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
@@ -572,7 +648,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
bpf_alu32_trunc:
/* Truncate to 32-bits */
if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
- PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
break;
/*
@@ -590,11 +666,11 @@ bpf_alu32_trunc:
switch (imm) {
case 16:
/* Rotate 8 bits left & mask with 0x0000ff00 */
- PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
+ EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23));
/* Rotate 8 bits right & insert LSB to reg */
- PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
+ EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31));
/* Move result back to dst_reg */
- PPC_MR(dst_reg, b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
break;
case 32:
/*
@@ -602,25 +678,29 @@ bpf_alu32_trunc:
* 2 bytes are already in their final position
* -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
*/
- PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
+ EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31));
/* Rotate 24 bits and insert byte 1 */
- PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
+ EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7));
/* Rotate 24 bits and insert byte 3 */
- PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
- PPC_MR(dst_reg, b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
break;
case 64:
- /*
- * Way easier and faster(?) to store the value
- * into stack and then use ldbrx
- *
- * ctx->seen will be reliable in pass2, but
- * the instructions generated will remain the
- * same across all passes
- */
- PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
- PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
- PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
+ /* Store the value to stack and then use byte-reverse loads */
+ EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx)));
+ EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx)));
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg));
+ } else {
+ EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg));
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
+ EMIT(PPC_RAW_LI(tmp2_reg, 4));
+ EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg));
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg));
+ }
break;
}
break;
@@ -629,14 +709,14 @@ emit_clear:
switch (imm) {
case 16:
/* zero-extend 16 bits into 64 bits */
- PPC_RLDICL(dst_reg, dst_reg, 0, 48);
+ EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48));
if (insn_is_zext(&insn[i + 1]))
addrs[++i] = ctx->idx * 4;
break;
case 32:
if (!fp->aux->verifier_zext)
/* zero-extend 32 bits into 64 bits */
- PPC_RLDICL(dst_reg, dst_reg, 0, 32);
+ EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32));
break;
case 64:
/* nop */
@@ -645,66 +725,155 @@ emit_clear:
break;
/*
+ * BPF_ST NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) ||
+ !security_ftr_enabled(SEC_FTR_STF_BARRIER))
+ break;
+
+ switch (stf_barrier) {
+ case STF_BARRIER_EIEIO:
+ EMIT(PPC_RAW_EIEIO() | 0x02000000);
+ break;
+ case STF_BARRIER_SYNC_ORI:
+ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0));
+ EMIT(PPC_RAW_ORI(_R31, _R31, 0));
+ break;
+ case STF_BARRIER_FALLBACK:
+ ctx->seen |= SEEN_FUNC;
+ PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ break;
+ case STF_BARRIER_NONE:
+ break;
+ }
+ break;
+
+ /*
* BPF_ST(X)
*/
case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
- PPC_LI(b2p[TMP_REG_1], imm);
- src_reg = b2p[TMP_REG_1];
+ EMIT(PPC_RAW_LI(tmp1_reg, imm));
+ src_reg = tmp1_reg;
}
- PPC_STB(src_reg, dst_reg, off);
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
break;
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
- PPC_LI(b2p[TMP_REG_1], imm);
- src_reg = b2p[TMP_REG_1];
+ EMIT(PPC_RAW_LI(tmp1_reg, imm));
+ src_reg = tmp1_reg;
}
- PPC_STH(src_reg, dst_reg, off);
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
break;
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
- PPC_LI32(b2p[TMP_REG_1], imm);
- src_reg = b2p[TMP_REG_1];
+ PPC_LI32(tmp1_reg, imm);
+ src_reg = tmp1_reg;
}
- PPC_STW(src_reg, dst_reg, off);
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
break;
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
if (BPF_CLASS(code) == BPF_ST) {
- PPC_LI32(b2p[TMP_REG_1], imm);
- src_reg = b2p[TMP_REG_1];
+ PPC_LI32(tmp1_reg, imm);
+ src_reg = tmp1_reg;
+ }
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp2_reg, off));
+ EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg));
+ } else {
+ EMIT(PPC_RAW_STD(src_reg, dst_reg, off));
}
- PPC_BPF_STL(src_reg, dst_reg, off);
break;
/*
- * BPF_STX XADD (atomic_add)
+ * BPF_STX ATOMIC (atomic ops)
*/
- /* *(u32 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_W:
- /* Get EA into TMP_REG_1 */
- PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ save_reg = tmp2_reg;
+ ret_reg = src_reg;
+
+ /* Get offset into TMP_REG_1 */
+ EMIT(PPC_RAW_LI(tmp1_reg, off));
tmp_idx = ctx->idx * 4;
/* load value from memory into TMP_REG_2 */
- PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
- /* add value from src_reg into this */
- PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
- /* store result back */
- PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
+ if (size == BPF_DW)
+ EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0));
+ else
+ EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0));
+
+ /* Save old value in _R0 */
+ if (imm & BPF_FETCH)
+ EMIT(PPC_RAW_MR(_R0, tmp2_reg));
+
+ switch (imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_CMPXCHG:
+ /*
+ * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+ * in src_reg for other cases.
+ */
+ ret_reg = bpf_to_ppc(BPF_REG_0);
+
+ /* Compare with old value in BPF_R0 */
+ if (size == BPF_DW)
+ EMIT(PPC_RAW_CMPD(bpf_to_ppc(BPF_REG_0), tmp2_reg));
+ else
+ EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), tmp2_reg));
+ /* Don't set if different from old value */
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+ fallthrough;
+ case BPF_XCHG:
+ save_reg = src_reg;
+ break;
+ default:
+ pr_err_ratelimited(
+ "eBPF filter atomic op code %02x (@%d) unsupported\n",
+ code, i);
+ return -EOPNOTSUPP;
+ }
+
+ /* store new value */
+ if (size == BPF_DW)
+ EMIT(PPC_RAW_STDCX(save_reg, tmp1_reg, dst_reg));
+ else
+ EMIT(PPC_RAW_STWCX(save_reg, tmp1_reg, dst_reg));
/* we're done if this succeeded */
PPC_BCC_SHORT(COND_NE, tmp_idx);
- break;
- /* *(u64 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_DW:
- PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
- tmp_idx = ctx->idx * 4;
- PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
- PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
- PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
- PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+ if (imm & BPF_FETCH) {
+ EMIT(PPC_RAW_MR(ret_reg, _R0));
+ /*
+ * Skip unnecessary zero-extension for 32-bit cmpxchg.
+ * For context, see commit 39491867ace5.
+ */
+ if (size != BPF_DW && imm == BPF_CMPXCHG &&
+ insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ }
break;
/*
@@ -712,25 +881,70 @@ emit_clear:
*/
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- PPC_LBZ(dst_reg, src_reg, off);
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
- PPC_LHZ(dst_reg, src_reg, off);
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
- PPC_LWZ(dst_reg, src_reg, off);
- if (insn_is_zext(&insn[i + 1]))
- addrs[++i] = ctx->idx * 4;
- break;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
- PPC_BPF_LL(dst_reg, src_reg, off);
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr is kernel address (see is_kernel_addr()), otherwise
+ * set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off));
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64))
+ PPC_LI64(tmp2_reg, 0x8000000000000000ul);
+ else /* BOOK3S_64 */
+ PPC_LI64(tmp2_reg, PAGE_OFFSET);
+ EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg));
+ PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * Check if 'off' is word aligned for BPF_DW, because
+ * we might generate two instructions.
+ */
+ if (BPF_SIZE(code) == BPF_DW && (off & 3))
+ PPC_JMP((ctx->idx + 3) * 4);
+ else
+ PPC_JMP((ctx->idx + 2) * 4);
+ }
+
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp1_reg, off));
+ EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg));
+ } else {
+ EMIT(PPC_RAW_LD(dst_reg, src_reg, off));
+ }
+ break;
+ }
+
+ if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+ ctx->idx - 1, 4, dst_reg);
+ if (ret)
+ return ret;
+ }
break;
/*
@@ -740,9 +954,14 @@ emit_clear:
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
+ tmp_idx = ctx->idx;
+ PPC_LI64(dst_reg, imm64);
+ /* padding to allow full 5 instructions for later patching */
+ if (!image)
+ for (j = ctx->idx - tmp_idx; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
- PPC_LI64(dst_reg, imm64);
break;
/*
@@ -754,8 +973,11 @@ emit_clear:
* the epilogue. If we _are_ the last instruction,
* we'll just fall through to the epilogue.
*/
- if (i != flen - 1)
- PPC_JMP(exit_addr);
+ if (i != flen - 1) {
+ ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr);
+ if (ret)
+ return ret;
+ }
/* else fall through to the epilogue */
break;
@@ -771,11 +993,15 @@ emit_clear:
return ret;
if (func_addr_fixed)
- bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+ ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
else
- bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+ ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
+
+ if (ret)
+ return ret;
+
/* move return value from r3 to BPF_REG_0 */
- PPC_MR(b2p[BPF_REG_0], 3);
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3));
break;
/*
@@ -860,9 +1086,9 @@ cond_branch:
case BPF_JMP32 | BPF_JNE | BPF_X:
/* unsigned comparison */
if (BPF_CLASS(code) == BPF_JMP32)
- PPC_CMPLW(dst_reg, src_reg);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
else
- PPC_CMPLD(dst_reg, src_reg);
+ EMIT(PPC_RAW_CMPLD(dst_reg, src_reg));
break;
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_X:
@@ -874,21 +1100,17 @@ cond_branch:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* signed comparison */
if (BPF_CLASS(code) == BPF_JMP32)
- PPC_CMPW(dst_reg, src_reg);
+ EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
else
- PPC_CMPD(dst_reg, src_reg);
+ EMIT(PPC_RAW_CMPD(dst_reg, src_reg));
break;
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
if (BPF_CLASS(code) == BPF_JMP) {
- PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
- src_reg);
+ EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg));
} else {
- int tmp_reg = b2p[TMP_REG_1];
-
- PPC_AND(tmp_reg, dst_reg, src_reg);
- PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
- 31);
+ EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31));
}
break;
case BPF_JMP | BPF_JNE | BPF_K:
@@ -912,19 +1134,17 @@ cond_branch:
*/
if (imm >= 0 && imm < 32768) {
if (is_jmp32)
- PPC_CMPLWI(dst_reg, imm);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
else
- PPC_CMPLDI(dst_reg, imm);
+ EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
} else {
/* sign-extending load */
- PPC_LI32(b2p[TMP_REG_1], imm);
+ PPC_LI32(tmp1_reg, imm);
/* ... but unsigned comparison */
if (is_jmp32)
- PPC_CMPLW(dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg));
else
- PPC_CMPLD(dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg));
}
break;
}
@@ -945,17 +1165,15 @@ cond_branch:
*/
if (imm >= -32768 && imm < 32768) {
if (is_jmp32)
- PPC_CMPWI(dst_reg, imm);
+ EMIT(PPC_RAW_CMPWI(dst_reg, imm));
else
- PPC_CMPDI(dst_reg, imm);
+ EMIT(PPC_RAW_CMPDI(dst_reg, imm));
} else {
- PPC_LI32(b2p[TMP_REG_1], imm);
+ PPC_LI32(tmp1_reg, imm);
if (is_jmp32)
- PPC_CMPW(dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg));
else
- PPC_CMPD(dst_reg,
- b2p[TMP_REG_1]);
+ EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg));
}
break;
}
@@ -964,19 +1182,16 @@ cond_branch:
/* andi does not sign-extend the immediate */
if (imm >= 0 && imm < 32768)
/* PPC_ANDI is _only/always_ dot-form */
- PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
+ EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm));
else {
- int tmp_reg = b2p[TMP_REG_1];
-
- PPC_LI32(tmp_reg, imm);
+ PPC_LI32(tmp1_reg, imm);
if (BPF_CLASS(code) == BPF_JMP) {
- PPC_AND_DOT(tmp_reg, dst_reg,
- tmp_reg);
+ EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg,
+ tmp1_reg));
} else {
- PPC_AND(tmp_reg, dst_reg,
- tmp_reg);
- PPC_RLWINM_DOT(tmp_reg, tmp_reg,
- 0, 0, 31);
+ EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg,
+ 0, 0, 31));
}
}
break;
@@ -989,7 +1204,9 @@ cond_branch:
*/
case BPF_JMP | BPF_TAIL_CALL:
ctx->seen |= SEEN_TAILCALL;
- bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ if (ret < 0)
+ return ret;
break;
default:
@@ -1009,249 +1226,3 @@ cond_branch:
return 0;
}
-
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx, u32 *addrs)
-{
- const struct bpf_insn *insn = fp->insnsi;
- bool func_addr_fixed;
- u64 func_addr;
- u32 tmp_idx;
- int i, ret;
-
- for (i = 0; i < fp->len; i++) {
- /*
- * During the extra pass, only the branch target addresses for
- * the subprog calls need to be fixed. All other instructions
- * can left untouched.
- *
- * The JITed image length does not change because we already
- * ensure that the JITed instruction sequence for these calls
- * are of fixed length by padding them with NOPs.
- */
- if (insn[i].code == (BPF_JMP | BPF_CALL) &&
- insn[i].src_reg == BPF_PSEUDO_CALL) {
- ret = bpf_jit_get_func_addr(fp, &insn[i], true,
- &func_addr,
- &func_addr_fixed);
- if (ret < 0)
- return ret;
-
- /*
- * Save ctx->idx as this would currently point to the
- * end of the JITed image and set it to the offset of
- * the instruction sequence corresponding to the
- * subprog call temporarily.
- */
- tmp_idx = ctx->idx;
- ctx->idx = addrs[i] / 4;
- bpf_jit_emit_func_call_rel(image, ctx, func_addr);
-
- /*
- * Restore ctx->idx here. This is safe as the length
- * of the JITed sequence remains unchanged.
- */
- ctx->idx = tmp_idx;
- }
- }
-
- return 0;
-}
-
-struct powerpc64_jit_data {
- struct bpf_binary_header *header;
- u32 *addrs;
- u8 *image;
- u32 proglen;
- struct codegen_context ctx;
-};
-
-bool bpf_jit_needs_zext(void)
-{
- return true;
-}
-
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
-{
- u32 proglen;
- u32 alloclen;
- u8 *image = NULL;
- u32 *code_base;
- u32 *addrs;
- struct powerpc64_jit_data *jit_data;
- struct codegen_context cgctx;
- int pass;
- int flen;
- struct bpf_binary_header *bpf_hdr;
- struct bpf_prog *org_fp = fp;
- struct bpf_prog *tmp_fp;
- bool bpf_blinded = false;
- bool extra_pass = false;
-
- if (!fp->jit_requested)
- return org_fp;
-
- tmp_fp = bpf_jit_blind_constants(org_fp);
- if (IS_ERR(tmp_fp))
- return org_fp;
-
- if (tmp_fp != org_fp) {
- bpf_blinded = true;
- fp = tmp_fp;
- }
-
- jit_data = fp->aux->jit_data;
- if (!jit_data) {
- jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
- if (!jit_data) {
- fp = org_fp;
- goto out;
- }
- fp->aux->jit_data = jit_data;
- }
-
- flen = fp->len;
- addrs = jit_data->addrs;
- if (addrs) {
- cgctx = jit_data->ctx;
- image = jit_data->image;
- bpf_hdr = jit_data->header;
- proglen = jit_data->proglen;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
- extra_pass = true;
- goto skip_init_ctx;
- }
-
- addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL) {
- fp = org_fp;
- goto out_addrs;
- }
-
- memset(&cgctx, 0, sizeof(struct codegen_context));
-
- /* Make sure that the stack is quadword aligned. */
- cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
-
- /* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
- /* We hit something illegal or unsupported. */
- fp = org_fp;
- goto out_addrs;
- }
-
- /*
- * If we have seen a tail call, we need a second pass.
- * This is because bpf_jit_emit_common_epilogue() is called
- * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
- */
- if (cgctx.seen & SEEN_TAILCALL) {
- cgctx.idx = 0;
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
- fp = org_fp;
- goto out_addrs;
- }
- }
-
- /*
- * Pretend to build prologue, given the features we've seen. This will
- * update ctgtx.idx as it pretends to output instructions, then we can
- * calculate total size from idx.
- */
- bpf_jit_build_prologue(0, &cgctx);
- bpf_jit_build_epilogue(0, &cgctx);
-
- proglen = cgctx.idx * 4;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
-
- bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
- bpf_jit_fill_ill_insns);
- if (!bpf_hdr) {
- fp = org_fp;
- goto out_addrs;
- }
-
-skip_init_ctx:
- code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
-
- if (extra_pass) {
- /*
- * Do not touch the prologue and epilogue as they will remain
- * unchanged. Only fix the branch target address for subprog
- * calls in the body.
- *
- * This does not change the offsets and lengths of the subprog
- * call instruction sequences and hence, the size of the JITed
- * image as well.
- */
- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
-
- /* There is no need to perform the usual passes. */
- goto skip_codegen_passes;
- }
-
- /* Code generation passes 1-2 */
- for (pass = 1; pass < 3; pass++) {
- /* Now build the prologue, body code & epilogue for real. */
- cgctx.idx = 0;
- bpf_jit_build_prologue(code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
- bpf_jit_build_epilogue(code_base, &cgctx);
-
- if (bpf_jit_enable > 1)
- pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
- proglen - (cgctx.idx * 4), cgctx.seen);
- }
-
-skip_codegen_passes:
- if (bpf_jit_enable > 1)
- /*
- * Note that we output the base address of the code_base
- * rather than image, since opcodes are in code_base.
- */
- bpf_jit_dump(flen, proglen, pass, code_base);
-
-#ifdef PPC64_ELF_ABI_v1
- /* Function descriptor nastiness: Address + TOC */
- ((u64 *)image)[0] = (u64)code_base;
- ((u64 *)image)[1] = local_paca->kernel_toc;
-#endif
-
- fp->bpf_func = (void *)image;
- fp->jited = 1;
- fp->jited_len = alloclen;
-
- bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
- if (!fp->is_func || extra_pass) {
- bpf_prog_fill_jited_linfo(fp, addrs);
-out_addrs:
- kfree(addrs);
- kfree(jit_data);
- fp->aux->jit_data = NULL;
- } else {
- jit_data->addrs = addrs;
- jit_data->ctx = cgctx;
- jit_data->proglen = proglen;
- jit_data->image = image;
- jit_data->header = bpf_hdr;
- }
-
-out:
- if (bpf_blinded)
- bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
-
- return fp;
-}
-
-/* Overriding bpf_jit_free() as we don't set images read-only. */
-void bpf_jit_free(struct bpf_prog *fp)
-{
- unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
- struct bpf_binary_header *bpf_hdr = (void *)addr;
-
- if (fp->jited)
- bpf_jit_binary_free(bpf_hdr);
-
- bpf_prog_unlock_free(fp);
-}