diff options
| author | David S. Miller <davem@davemloft.net> | 2016-05-16 13:49:33 -0400 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-05-16 13:49:33 -0400 | 
| commit | 485b777855ed74dfcde5c46cfc88e2bc1b7c0714 (patch) | |
| tree | ec3eef1f80ae2f1d5fdbb9a5c12df5bf06025ad7 | |
| parent | 553eb544444e28749e2d752dee11e2ae4a3ecfb6 (diff) | |
| parent | d93a47f735f3455a896e46b18d0ac26fa19639e6 (diff) | |
Merge branch 'bpf-blinding'
Daniel Borkmann says:
====================
BPF updates
This set implements constant blinding for BPF, first couple of
patches are some preparatory cleanups, followed by the blinding.
Please see individual patches for details.
Thanks a lot!
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | Documentation/sysctl/net.txt | 11 | ||||
| -rw-r--r-- | arch/arm/Kconfig | 2 | ||||
| -rw-r--r-- | arch/arm64/Kconfig | 2 | ||||
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 56 | ||||
| -rw-r--r-- | arch/mips/Kconfig | 2 | ||||
| -rw-r--r-- | arch/powerpc/Kconfig | 2 | ||||
| -rw-r--r-- | arch/s390/Kconfig | 2 | ||||
| -rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 77 | ||||
| -rw-r--r-- | arch/sparc/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 2 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 70 | ||||
| -rw-r--r-- | include/linux/filter.h | 52 | ||||
| -rw-r--r-- | include/linux/netdevice.h | 1 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 294 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 53 | ||||
| -rw-r--r-- | lib/test_bpf.c | 5 | ||||
| -rw-r--r-- | net/Kconfig | 21 | ||||
| -rw-r--r-- | net/core/filter.c | 40 | ||||
| -rw-r--r-- | net/core/sysctl_net_core.c | 9 | 
20 files changed, 569 insertions, 136 deletions
| diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 809ab6efcc74..f0480f7ea740 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -43,6 +43,17 @@ Values :  	1 - enable the JIT  	2 - enable the JIT and ask the compiler to emit traces on kernel log. +bpf_jit_harden +-------------- + +This enables hardening for the Berkeley Packet Filter Just in Time compiler. +Supported are eBPF JIT backends. Enabling hardening trades off performance, +but can mitigate JIT spraying. +Values : +	0 - disable JIT hardening (default value) +	1 - enable JIT hardening for unprivileged users only +	2 - enable JIT hardening for all users +  dev_weight  -------------- diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cdfa6c2b7626..2315b0d1b4f4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -41,7 +41,7 @@ config ARM  	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)  	select HAVE_ARCH_TRACEHOOK  	select HAVE_ARM_SMCCC if CPU_V7 -	select HAVE_BPF_JIT +	select HAVE_CBPF_JIT  	select HAVE_CC_STACKPROTECTOR  	select HAVE_CONTEXT_TRACKING  	select HAVE_C_RECORDMCOUNT diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4f436220384f..e6761ea2feec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -58,7 +58,7 @@ config ARM64  	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT  	select HAVE_ARCH_SECCOMP_FILTER  	select HAVE_ARCH_TRACEHOOK -	select HAVE_BPF_JIT +	select HAVE_EBPF_JIT  	select HAVE_C_RECORDMCOUNT  	select HAVE_CC_STACKPROTECTOR  	select HAVE_CMPXCHG_DOUBLE diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b405bbb54431..d0d51903c7e0 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -31,8 +31,8 @@  int bpf_jit_enable __read_mostly; -#define TMP_REG_1 (MAX_BPF_REG + 0) -#define TMP_REG_2 (MAX_BPF_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)  /* Map BPF registers to A64 registers */  static const int bpf2a64[] = { @@ -54,6 +54,8 @@ static const int bpf2a64[] = {  	/* temporary register for internal BPF JIT */  	[TMP_REG_1] = A64_R(23),  	[TMP_REG_2] = A64_R(24), +	/* temporary register for blinding constants */ +	[BPF_REG_AX] = A64_R(9),  };  struct jit_ctx { @@ -762,31 +764,45 @@ void bpf_jit_compile(struct bpf_prog *prog)  	/* Nothing to do here. We support Internal BPF. */  } -void bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)  { +	struct bpf_prog *tmp, *orig_prog = prog;  	struct bpf_binary_header *header; +	bool tmp_blinded = false;  	struct jit_ctx ctx;  	int image_size;  	u8 *image_ptr;  	if (!bpf_jit_enable) -		return; +		return orig_prog; -	if (!prog || !prog->len) -		return; +	tmp = bpf_jit_blind_constants(prog); +	/* If blinding was requested and we failed during blinding, +	 * we must fall back to the interpreter. +	 */ +	if (IS_ERR(tmp)) +		return orig_prog; +	if (tmp != prog) { +		tmp_blinded = true; +		prog = tmp; +	}  	memset(&ctx, 0, sizeof(ctx));  	ctx.prog = prog;  	ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); -	if (ctx.offset == NULL) -		return; +	if (ctx.offset == NULL) { +		prog = orig_prog; +		goto out; +	}  	/* 1. Initial fake pass to compute ctx->idx. */  	/* Fake pass to fill in ctx->offset and ctx->tmp_used. */ -	if (build_body(&ctx)) -		goto out; +	if (build_body(&ctx)) { +		prog = orig_prog; +		goto out_off; +	}  	build_prologue(&ctx); @@ -797,8 +813,10 @@ void bpf_int_jit_compile(struct bpf_prog *prog)  	image_size = sizeof(u32) * ctx.idx;  	header = bpf_jit_binary_alloc(image_size, &image_ptr,  				      sizeof(u32), jit_fill_hole); -	if (header == NULL) -		goto out; +	if (header == NULL) { +		prog = orig_prog; +		goto out_off; +	}  	/* 2. Now, the actual pass. */ @@ -809,7 +827,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog)  	if (build_body(&ctx)) {  		bpf_jit_binary_free(header); -		goto out; +		prog = orig_prog; +		goto out_off;  	}  	build_epilogue(&ctx); @@ -817,7 +836,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog)  	/* 3. Extra pass to validate JITed code. */  	if (validate_code(&ctx)) {  		bpf_jit_binary_free(header); -		goto out; +		prog = orig_prog; +		goto out_off;  	}  	/* And we're done. */ @@ -829,8 +849,14 @@ void bpf_int_jit_compile(struct bpf_prog *prog)  	set_memory_ro((unsigned long)header, header->pages);  	prog->bpf_func = (void *)ctx.image;  	prog->jited = 1; -out: + +out_off:  	kfree(ctx.offset); +out: +	if (tmp_blinded) +		bpf_jit_prog_release_other(prog, prog == orig_prog ? +					   tmp : orig_prog); +	return prog;  }  void bpf_jit_free(struct bpf_prog *prog) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2018c2b0e078..3ee1ea61b2dc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -15,7 +15,7 @@ config MIPS  	select HAVE_ARCH_KGDB  	select HAVE_ARCH_SECCOMP_FILTER  	select HAVE_ARCH_TRACEHOOK -	select HAVE_BPF_JIT if !CPU_MICROMIPS +	select HAVE_CBPF_JIT if !CPU_MICROMIPS  	select HAVE_FUNCTION_TRACER  	select HAVE_DYNAMIC_FTRACE  	select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7cd32c038286..2fdb73d9198a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,7 +126,7 @@ config PPC  	select IRQ_FORCED_THREADING  	select HAVE_RCU_TABLE_FREE if SMP  	select HAVE_SYSCALL_TRACEPOINTS -	select HAVE_BPF_JIT +	select HAVE_CBPF_JIT  	select HAVE_ARCH_JUMP_LABEL  	select ARCH_HAVE_NMI_SAFE_CMPXCHG  	select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bf24ab188921..a883981c0174 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -126,7 +126,7 @@ config S390  	select HAVE_ARCH_SOFT_DIRTY  	select HAVE_ARCH_TRACEHOOK  	select HAVE_ARCH_TRANSPARENT_HUGEPAGE -	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES +	select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES  	select HAVE_CMPXCHG_DOUBLE  	select HAVE_CMPXCHG_LOCAL  	select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 3c0bfc1f2694..9133b0ec000b 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -54,16 +54,17 @@ struct bpf_jit {  #define SEEN_FUNC	16	/* calls C functions */  #define SEEN_TAIL_CALL	32	/* code uses tail calls */  #define SEEN_SKB_CHANGE	64	/* code changes skb data */ +#define SEEN_REG_AX	128	/* code uses constant blinding */  #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)  /*   * s390 registers   */ -#define REG_W0		(__MAX_BPF_REG+0)	/* Work register 1 (even) */ -#define REG_W1		(__MAX_BPF_REG+1)	/* Work register 2 (odd) */ -#define REG_SKB_DATA	(__MAX_BPF_REG+2)	/* SKB data register */ -#define REG_L		(__MAX_BPF_REG+3)	/* Literal pool register */ -#define REG_15		(__MAX_BPF_REG+4)	/* Register 15 */ +#define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */ +#define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */ +#define REG_SKB_DATA	(MAX_BPF_JIT_REG + 2)	/* SKB data register */ +#define REG_L		(MAX_BPF_JIT_REG + 3)	/* Literal pool register */ +#define REG_15		(MAX_BPF_JIT_REG + 4)	/* Register 15 */  #define REG_0		REG_W0			/* Register 0 */  #define REG_1		REG_W1			/* Register 1 */  #define REG_2		BPF_REG_1		/* Register 2 */ @@ -88,6 +89,8 @@ static const int reg2hex[] = {  	[BPF_REG_9]	= 10,  	/* BPF stack pointer */  	[BPF_REG_FP]	= 13, +	/* Register for blinding (shared with REG_SKB_DATA) */ +	[BPF_REG_AX]	= 12,  	/* SKB data pointer */  	[REG_SKB_DATA]	= 12,  	/* Work registers for s390x backend */ @@ -385,7 +388,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op)  /*   * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"   * we store the SKB header length on the stack and the SKB data - * pointer in REG_SKB_DATA. + * pointer in REG_SKB_DATA if BPF_REG_AX is not used.   */  static void emit_load_skb_data_hlen(struct bpf_jit *jit)  { @@ -397,9 +400,10 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit)  		   offsetof(struct sk_buff, data_len));  	/* stg %w1,ST_OFF_HLEN(%r0,%r15) */  	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN); -	/* lg %skb_data,data_off(%b1) */ -	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, -		      BPF_REG_1, offsetof(struct sk_buff, data)); +	if (!(jit->seen & SEEN_REG_AX)) +		/* lg %skb_data,data_off(%b1) */ +		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, +			      BPF_REG_1, offsetof(struct sk_buff, data));  }  /* @@ -487,6 +491,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i  	s32 imm = insn->imm;  	s16 off = insn->off; +	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) +		jit->seen |= SEEN_REG_AX;  	switch (insn->code) {  	/*  	 * BPF_MOV @@ -1188,7 +1194,7 @@ call_fn:  		/*  		 * Implicit input:  		 *  BPF_REG_6	 (R7) : skb pointer -		 *  REG_SKB_DATA (R12): skb data pointer +		 *  REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)  		 *  		 * Calculated input:  		 *  BPF_REG_2	 (R3) : offset of byte(s) to fetch in skb @@ -1209,6 +1215,11 @@ call_fn:  			/* agfr %b2,%src (%src is s32 here) */  			EMIT4(0xb9180000, BPF_REG_2, src_reg); +		/* Reload REG_SKB_DATA if BPF_REG_AX is used */ +		if (jit->seen & SEEN_REG_AX) +			/* lg %skb_data,data_off(%b6) */ +			EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, +				      BPF_REG_6, offsetof(struct sk_buff, data));  		/* basr %b5,%w1 (%b5 is call saved) */  		EMIT2(0x0d00, BPF_REG_5, REG_W1); @@ -1262,37 +1273,62 @@ void bpf_jit_compile(struct bpf_prog *fp)  /*   * Compile eBPF program "fp"   */ -void bpf_int_jit_compile(struct bpf_prog *fp) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)  { +	struct bpf_prog *tmp, *orig_fp = fp;  	struct bpf_binary_header *header; +	bool tmp_blinded = false;  	struct bpf_jit jit;  	int pass;  	if (!bpf_jit_enable) -		return; +		return orig_fp; + +	tmp = bpf_jit_blind_constants(fp); +	/* +	 * If blinding was requested and we failed during blinding, +	 * we must fall back to the interpreter. +	 */ +	if (IS_ERR(tmp)) +		return orig_fp; +	if (tmp != fp) { +		tmp_blinded = true; +		fp = tmp; +	} +  	memset(&jit, 0, sizeof(jit));  	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); -	if (jit.addrs == NULL) -		return; +	if (jit.addrs == NULL) { +		fp = orig_fp; +		goto out; +	}  	/*  	 * Three initial passes:  	 *   - 1/2: Determine clobbered registers  	 *   - 3:   Calculate program size and addrs arrray  	 */  	for (pass = 1; pass <= 3; pass++) { -		if (bpf_jit_prog(&jit, fp)) +		if (bpf_jit_prog(&jit, fp)) { +			fp = orig_fp;  			goto free_addrs; +		}  	}  	/*  	 * Final pass: Allocate and generate program  	 */ -	if (jit.size >= BPF_SIZE_MAX) +	if (jit.size >= BPF_SIZE_MAX) { +		fp = orig_fp;  		goto free_addrs; +	}  	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); -	if (!header) +	if (!header) { +		fp = orig_fp;  		goto free_addrs; -	if (bpf_jit_prog(&jit, fp)) +	} +	if (bpf_jit_prog(&jit, fp)) { +		fp = orig_fp;  		goto free_addrs; +	}  	if (bpf_jit_enable > 1) {  		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);  		if (jit.prg_buf) @@ -1305,6 +1341,11 @@ void bpf_int_jit_compile(struct bpf_prog *fp)  	}  free_addrs:  	kfree(jit.addrs); +out: +	if (tmp_blinded) +		bpf_jit_prog_release_other(fp, fp == orig_fp ? +					   tmp : orig_fp); +	return fp;  }  /* diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 57ffaf285c2f..d5003812c748 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -32,7 +32,7 @@ config SPARC  	select ARCH_WANT_IPC_PARSE_VERSION  	select GENERIC_PCI_IOMAP  	select HAVE_NMI_WATCHDOG if SPARC64 -	select HAVE_BPF_JIT +	select HAVE_CBPF_JIT  	select HAVE_DEBUG_BUGVERBOSE  	select GENERIC_SMP_IDLE_THREAD  	select GENERIC_CLOCKEVENTS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2dc18605831f..ae83046d51a8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,7 +91,7 @@ config X86  	select HAVE_ARCH_SOFT_DIRTY		if X86_64  	select HAVE_ARCH_TRACEHOOK  	select HAVE_ARCH_TRANSPARENT_HUGEPAGE -	select HAVE_BPF_JIT			if X86_64 +	select HAVE_EBPF_JIT			if X86_64  	select HAVE_CC_STACKPROTECTOR  	select HAVE_CMPXCHG_DOUBLE  	select HAVE_CMPXCHG_LOCAL diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4286f3618bd0..fe04a04dab8e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -110,11 +110,16 @@ static void bpf_flush_icache(void *start, void *end)  	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)  /* pick a register outside of BPF range for JIT internal work */ -#define AUX_REG (MAX_BPF_REG + 1) +#define AUX_REG (MAX_BPF_JIT_REG + 1) -/* the following table maps BPF registers to x64 registers. - * x64 register r12 is unused, since if used as base address register - * in load/store instructions, it always needs an extra byte of encoding +/* The following table maps BPF registers to x64 registers. + * + * x64 register r12 is unused, since if used as base address + * register in load/store instructions, it always needs an + * extra byte of encoding and is callee saved. + * + *  r9 caches skb->len - skb->data_len + * r10 caches skb->data, and used for blinding (if enabled)   */  static const int reg2hex[] = {  	[BPF_REG_0] = 0,  /* rax */ @@ -128,6 +133,7 @@ static const int reg2hex[] = {  	[BPF_REG_8] = 6,  /* r14 callee saved */  	[BPF_REG_9] = 7,  /* r15 callee saved */  	[BPF_REG_FP] = 5, /* rbp readonly */ +	[BPF_REG_AX] = 2, /* r10 temp register */  	[AUX_REG] = 3,    /* r11 temp register */  }; @@ -141,7 +147,8 @@ static bool is_ereg(u32 reg)  			     BIT(AUX_REG) |  			     BIT(BPF_REG_7) |  			     BIT(BPF_REG_8) | -			     BIT(BPF_REG_9)); +			     BIT(BPF_REG_9) | +			     BIT(BPF_REG_AX));  }  /* add modifiers if 'reg' maps to x64 registers r8..r15 */ @@ -182,6 +189,7 @@ static void jit_fill_hole(void *area, unsigned int size)  struct jit_context {  	int cleanup_addr; /* epilogue code offset */  	bool seen_ld_abs; +	bool seen_ax_reg;  };  /* maximum number of bytes emitted while JITing one eBPF insn */ @@ -345,6 +353,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,  	struct bpf_insn *insn = bpf_prog->insnsi;  	int insn_cnt = bpf_prog->len;  	bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); +	bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);  	bool seen_exit = false;  	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];  	int i, cnt = 0; @@ -367,6 +376,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,  		int ilen;  		u8 *func; +		if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) +			ctx->seen_ax_reg = seen_ax_reg = true; +  		switch (insn->code) {  			/* ALU */  		case BPF_ALU | BPF_ADD | BPF_X: @@ -1002,6 +1014,10 @@ common_load:  			 * sk_load_* helpers also use %r10 and %r9d.  			 * See bpf_jit.S  			 */ +			if (seen_ax_reg) +				/* r10 = skb->data, mov %r10, off32(%rbx) */ +				EMIT3_off32(0x4c, 0x8b, 0x93, +					    offsetof(struct sk_buff, data));  			EMIT1_off32(0xE8, jmp_offset); /* call */  			break; @@ -1073,25 +1089,37 @@ void bpf_jit_compile(struct bpf_prog *prog)  {  } -void bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)  {  	struct bpf_binary_header *header = NULL; +	struct bpf_prog *tmp, *orig_prog = prog;  	int proglen, oldproglen = 0;  	struct jit_context ctx = {}; +	bool tmp_blinded = false;  	u8 *image = NULL;  	int *addrs;  	int pass;  	int i;  	if (!bpf_jit_enable) -		return; +		return orig_prog; -	if (!prog || !prog->len) -		return; +	tmp = bpf_jit_blind_constants(prog); +	/* If blinding was requested and we failed during blinding, +	 * we must fall back to the interpreter. +	 */ +	if (IS_ERR(tmp)) +		return orig_prog; +	if (tmp != prog) { +		tmp_blinded = true; +		prog = tmp; +	}  	addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); -	if (!addrs) -		return; +	if (!addrs) { +		prog = orig_prog; +		goto out; +	}  	/* Before first pass, make a rough estimation of addrs[]  	 * each bpf instruction is translated to less than 64 bytes @@ -1113,21 +1141,25 @@ void bpf_int_jit_compile(struct bpf_prog *prog)  			image = NULL;  			if (header)  				bpf_jit_binary_free(header); -			goto out; +			prog = orig_prog; +			goto out_addrs;  		}  		if (image) {  			if (proglen != oldproglen) {  				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",  				       proglen, oldproglen); -				goto out; +				prog = orig_prog; +				goto out_addrs;  			}  			break;  		}  		if (proglen == oldproglen) {  			header = bpf_jit_binary_alloc(proglen, &image,  						      1, jit_fill_hole); -			if (!header) -				goto out; +			if (!header) { +				prog = orig_prog; +				goto out_addrs; +			}  		}  		oldproglen = proglen;  	} @@ -1141,8 +1173,14 @@ void bpf_int_jit_compile(struct bpf_prog *prog)  		prog->bpf_func = (void *)image;  		prog->jited = 1;  	} -out: + +out_addrs:  	kfree(addrs); +out: +	if (tmp_blinded) +		bpf_jit_prog_release_other(prog, prog == orig_prog ? +					   tmp : orig_prog); +	return prog;  }  void bpf_jit_free(struct bpf_prog *fp) diff --git a/include/linux/filter.h b/include/linux/filter.h index ec1411c89105..6fc31ef1da2d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -13,6 +13,8 @@  #include <linux/printk.h>  #include <linux/workqueue.h>  #include <linux/sched.h> +#include <linux/capability.h> +  #include <net/sch_generic.h>  #include <asm/cacheflush.h> @@ -42,6 +44,15 @@ struct bpf_prog_aux;  #define BPF_REG_X	BPF_REG_7  #define BPF_REG_TMP	BPF_REG_8 +/* Kernel hidden auxiliary/helper register for hardening step. + * Only used by eBPF JITs. It's nothing more than a temporary + * register that JITs use internally, only that here it's part + * of eBPF instructions that have been rewritten for blinding + * constants. See JIT pre-step in bpf_jit_blind_constants(). + */ +#define BPF_REG_AX		MAX_BPF_REG +#define MAX_BPF_JIT_REG		(MAX_BPF_REG + 1) +  /* BPF program can access up to 512 bytes of stack space. */  #define MAX_BPF_STACK	512 @@ -458,7 +469,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)  int sk_filter(struct sock *sk, struct sk_buff *skb); -int bpf_prog_select_runtime(struct bpf_prog *fp); +struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);  void bpf_prog_free(struct bpf_prog *fp);  struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); @@ -492,10 +503,17 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);  void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);  u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_int_jit_compile(struct bpf_prog *fp); + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);  bool bpf_helper_changes_skb_data(void *func); +struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, +				       const struct bpf_insn *patch, u32 len); +  #ifdef CONFIG_BPF_JIT +extern int bpf_jit_enable; +extern int bpf_jit_harden; +  typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);  struct bpf_binary_header * @@ -507,6 +525,9 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr);  void bpf_jit_compile(struct bpf_prog *fp);  void bpf_jit_free(struct bpf_prog *fp); +struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); +void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); +  static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,  				u32 pass, void *image)  { @@ -517,6 +538,33 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,  		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,  			       16, 1, image, proglen, false);  } + +static inline bool bpf_jit_is_ebpf(void) +{ +# ifdef CONFIG_HAVE_EBPF_JIT +	return true; +# else +	return false; +# endif +} + +static inline bool bpf_jit_blinding_enabled(void) +{ +	/* These are the prerequisites, should someone ever have the +	 * idea to call blinding outside of them, we make sure to +	 * bail out. +	 */ +	if (!bpf_jit_is_ebpf()) +		return false; +	if (!bpf_jit_enable) +		return false; +	if (!bpf_jit_harden) +		return false; +	if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) +		return false; + +	return true; +}  #else  static inline void bpf_jit_compile(struct bpf_prog *fp)  { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c2f5112f08f7..c148edfe4965 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3759,7 +3759,6 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,  extern int		netdev_max_backlog;  extern int		netdev_tstamp_prequeue;  extern int		weight_p; -extern int		bpf_jit_enable;  bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);  struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d781b077431f..f1e8a0def99b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -129,14 +129,83 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,  	return fp;  } -EXPORT_SYMBOL_GPL(bpf_prog_realloc);  void __bpf_prog_free(struct bpf_prog *fp)  {  	kfree(fp->aux);  	vfree(fp);  } -EXPORT_SYMBOL_GPL(__bpf_prog_free); + +static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) +{ +	return BPF_CLASS(insn->code) == BPF_JMP  && +	       /* Call and Exit are both special jumps with no +		* target inside the BPF instruction image. +		*/ +	       BPF_OP(insn->code) != BPF_CALL && +	       BPF_OP(insn->code) != BPF_EXIT; +} + +static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) +{ +	struct bpf_insn *insn = prog->insnsi; +	u32 i, insn_cnt = prog->len; + +	for (i = 0; i < insn_cnt; i++, insn++) { +		if (!bpf_is_jmp_and_has_target(insn)) +			continue; + +		/* Adjust offset of jmps if we cross boundaries. */ +		if (i < pos && i + insn->off + 1 > pos) +			insn->off += delta; +		else if (i > pos + delta && i + insn->off + 1 <= pos + delta) +			insn->off -= delta; +	} +} + +struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, +				       const struct bpf_insn *patch, u32 len) +{ +	u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; +	struct bpf_prog *prog_adj; + +	/* Since our patchlet doesn't expand the image, we're done. */ +	if (insn_delta == 0) { +		memcpy(prog->insnsi + off, patch, sizeof(*patch)); +		return prog; +	} + +	insn_adj_cnt = prog->len + insn_delta; + +	/* Several new instructions need to be inserted. Make room +	 * for them. Likely, there's no need for a new allocation as +	 * last page could have large enough tailroom. +	 */ +	prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), +				    GFP_USER); +	if (!prog_adj) +		return NULL; + +	prog_adj->len = insn_adj_cnt; + +	/* Patching happens in 3 steps: +	 * +	 * 1) Move over tail of insnsi from next instruction onwards, +	 *    so we can patch the single target insn with one or more +	 *    new ones (patching is always from 1 to n insns, n > 0). +	 * 2) Inject new instructions at the target location. +	 * 3) Adjust branch offsets if necessary. +	 */ +	insn_rest = insn_adj_cnt - off - len; + +	memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, +		sizeof(*patch) * insn_rest); +	memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); + +	bpf_adj_branches(prog_adj, off, insn_delta); + +	return prog_adj; +}  #ifdef CONFIG_BPF_JIT  struct bpf_binary_header * @@ -174,6 +243,209 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)  {  	module_memfree(hdr);  } + +int bpf_jit_harden __read_mostly; + +static int bpf_jit_blind_insn(const struct bpf_insn *from, +			      const struct bpf_insn *aux, +			      struct bpf_insn *to_buff) +{ +	struct bpf_insn *to = to_buff; +	u32 imm_rnd = prandom_u32(); +	s16 off; + +	BUILD_BUG_ON(BPF_REG_AX  + 1 != MAX_BPF_JIT_REG); +	BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + +	if (from->imm == 0 && +	    (from->code == (BPF_ALU   | BPF_MOV | BPF_K) || +	     from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { +		*to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); +		goto out; +	} + +	switch (from->code) { +	case BPF_ALU | BPF_ADD | BPF_K: +	case BPF_ALU | BPF_SUB | BPF_K: +	case BPF_ALU | BPF_AND | BPF_K: +	case BPF_ALU | BPF_OR  | BPF_K: +	case BPF_ALU | BPF_XOR | BPF_K: +	case BPF_ALU | BPF_MUL | BPF_K: +	case BPF_ALU | BPF_MOV | BPF_K: +	case BPF_ALU | BPF_DIV | BPF_K: +	case BPF_ALU | BPF_MOD | BPF_K: +		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); +		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX); +		break; + +	case BPF_ALU64 | BPF_ADD | BPF_K: +	case BPF_ALU64 | BPF_SUB | BPF_K: +	case BPF_ALU64 | BPF_AND | BPF_K: +	case BPF_ALU64 | BPF_OR  | BPF_K: +	case BPF_ALU64 | BPF_XOR | BPF_K: +	case BPF_ALU64 | BPF_MUL | BPF_K: +	case BPF_ALU64 | BPF_MOV | BPF_K: +	case BPF_ALU64 | BPF_DIV | BPF_K: +	case BPF_ALU64 | BPF_MOD | BPF_K: +		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); +		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX); +		break; + +	case BPF_JMP | BPF_JEQ  | BPF_K: +	case BPF_JMP | BPF_JNE  | BPF_K: +	case BPF_JMP | BPF_JGT  | BPF_K: +	case BPF_JMP | BPF_JGE  | BPF_K: +	case BPF_JMP | BPF_JSGT | BPF_K: +	case BPF_JMP | BPF_JSGE | BPF_K: +	case BPF_JMP | BPF_JSET | BPF_K: +		/* Accommodate for extra offset in case of a backjump. */ +		off = from->off; +		if (off < 0) +			off -= 2; +		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); +		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); +		break; + +	case BPF_LD | BPF_ABS | BPF_W: +	case BPF_LD | BPF_ABS | BPF_H: +	case BPF_LD | BPF_ABS | BPF_B: +		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); +		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); +		break; + +	case BPF_LD | BPF_IND | BPF_W: +	case BPF_LD | BPF_IND | BPF_H: +	case BPF_LD | BPF_IND | BPF_B: +		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); +		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg); +		*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); +		break; + +	case BPF_LD | BPF_IMM | BPF_DW: +		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); +		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); +		*to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX); +		break; +	case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ +		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); +		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX); +		break; + +	case BPF_ST | BPF_MEM | BPF_DW: +	case BPF_ST | BPF_MEM | BPF_W: +	case BPF_ST | BPF_MEM | BPF_H: +	case BPF_ST | BPF_MEM | BPF_B: +		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); +		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); +		*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); +		break; +	} +out: +	return to - to_buff; +} + +static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, +					      gfp_t gfp_extra_flags) +{ +	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | +			  gfp_extra_flags; +	struct bpf_prog *fp; + +	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); +	if (fp != NULL) { +		kmemcheck_annotate_bitfield(fp, meta); + +		/* aux->prog still points to the fp_other one, so +		 * when promoting the clone to the real program, +		 * this still needs to be adapted. +		 */ +		memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE); +	} + +	return fp; +} + +static void bpf_prog_clone_free(struct bpf_prog *fp) +{ +	/* aux was stolen by the other clone, so we cannot free +	 * it from this path! It will be freed eventually by the +	 * other program on release. +	 * +	 * At this point, we don't need a deferred release since +	 * clone is guaranteed to not be locked. +	 */ +	fp->aux = NULL; +	__bpf_prog_free(fp); +} + +void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) +{ +	/* We have to repoint aux->prog to self, as we don't +	 * know whether fp here is the clone or the original. +	 */ +	fp->aux->prog = fp; +	bpf_prog_clone_free(fp_other); +} + +struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) +{ +	struct bpf_insn insn_buff[16], aux[2]; +	struct bpf_prog *clone, *tmp; +	int insn_delta, insn_cnt; +	struct bpf_insn *insn; +	int i, rewritten; + +	if (!bpf_jit_blinding_enabled()) +		return prog; + +	clone = bpf_prog_clone_create(prog, GFP_USER); +	if (!clone) +		return ERR_PTR(-ENOMEM); + +	insn_cnt = clone->len; +	insn = clone->insnsi; + +	for (i = 0; i < insn_cnt; i++, insn++) { +		/* We temporarily need to hold the original ld64 insn +		 * so that we can still access the first part in the +		 * second blinding run. +		 */ +		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) && +		    insn[1].code == 0) +			memcpy(aux, insn, sizeof(aux)); + +		rewritten = bpf_jit_blind_insn(insn, aux, insn_buff); +		if (!rewritten) +			continue; + +		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); +		if (!tmp) { +			/* Patching may have repointed aux->prog during +			 * realloc from the original one, so we need to +			 * fix it up here on error. +			 */ +			bpf_jit_prog_release_other(prog, clone); +			return ERR_PTR(-ENOMEM); +		} + +		clone = tmp; +		insn_delta = rewritten - 1; + +		/* Walk new program and skip insns we just inserted. */ +		insn = clone->insnsi + i + insn_delta; +		insn_cnt += insn_delta; +		i        += insn_delta; +	} + +	return clone; +}  #endif /* CONFIG_BPF_JIT */  /* Base function for offset calculation. Needs to go into .text section, @@ -692,15 +964,22 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)  /**   *	bpf_prog_select_runtime - select exec runtime for BPF program   *	@fp: bpf_prog populated with internal BPF program + *	@err: pointer to error variable   *   * Try to JIT eBPF program, if JIT is not available, use interpreter.   * The BPF program will be executed via BPF_PROG_RUN() macro.   */ -int bpf_prog_select_runtime(struct bpf_prog *fp) +struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)  {  	fp->bpf_func = (void *) __bpf_prog_run; -	bpf_int_jit_compile(fp); +	/* eBPF JITs can rewrite the program in case constant +	 * blinding is active. However, in case of error during +	 * blinding, bpf_int_jit_compile() must always return a +	 * valid program, which in this case would simply not +	 * be JITed, but falls back to the interpreter. +	 */ +	fp = bpf_int_jit_compile(fp);  	bpf_prog_lock_ro(fp);  	/* The tail call compatibility check can only be done at @@ -708,7 +987,9 @@ int bpf_prog_select_runtime(struct bpf_prog *fp)  	 * with JITed or non JITed program concatenations and not  	 * all eBPF JITs might immediately support all features.  	 */ -	return bpf_check_tail_call(fp); +	*err = bpf_check_tail_call(fp); + +	return fp;  }  EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); @@ -790,8 +1071,9 @@ const struct bpf_func_proto bpf_tail_call_proto = {  };  /* For classic BPF JITs that don't implement bpf_int_jit_compile(). */ -void __weak bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)  { +	return prog;  }  bool __weak bpf_helper_changes_skb_data(void *func) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cf5e9f7ad13a..46ecce4b79ed 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -762,7 +762,7 @@ static int bpf_prog_load(union bpf_attr *attr)  	fixup_bpf_calls(prog);  	/* eBPF program is ready to be JITed */ -	err = bpf_prog_select_runtime(prog); +	prog = bpf_prog_select_runtime(prog, &err);  	if (err < 0)  		goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 84bff68cf80e..a08d66215245 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2587,26 +2587,6 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)  			insn->src_reg = 0;  } -static void adjust_branches(struct bpf_prog *prog, int pos, int delta) -{ -	struct bpf_insn *insn = prog->insnsi; -	int insn_cnt = prog->len; -	int i; - -	for (i = 0; i < insn_cnt; i++, insn++) { -		if (BPF_CLASS(insn->code) != BPF_JMP || -		    BPF_OP(insn->code) == BPF_CALL || -		    BPF_OP(insn->code) == BPF_EXIT) -			continue; - -		/* adjust offset of jmps if necessary */ -		if (i < pos && i + insn->off + 1 > pos) -			insn->off += delta; -		else if (i > pos + delta && i + insn->off + 1 <= pos + delta) -			insn->off -= delta; -	} -} -  /* convert load instructions that access fields of 'struct __sk_buff'   * into sequence of instructions that access fields of 'struct sk_buff'   */ @@ -2616,14 +2596,15 @@ static int convert_ctx_accesses(struct verifier_env *env)  	int insn_cnt = env->prog->len;  	struct bpf_insn insn_buf[16];  	struct bpf_prog *new_prog; -	u32 cnt; -	int i;  	enum bpf_access_type type; +	int i;  	if (!env->prog->aux->ops->convert_ctx_access)  		return 0;  	for (i = 0; i < insn_cnt; i++, insn++) { +		u32 insn_delta, cnt; +  		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))  			type = BPF_READ;  		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) @@ -2645,34 +2626,18 @@ static int convert_ctx_accesses(struct verifier_env *env)  			return -EINVAL;  		} -		if (cnt == 1) { -			memcpy(insn, insn_buf, sizeof(*insn)); -			continue; -		} - -		/* several new insns need to be inserted. Make room for them */ -		insn_cnt += cnt - 1; -		new_prog = bpf_prog_realloc(env->prog, -					    bpf_prog_size(insn_cnt), -					    GFP_USER); +		new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);  		if (!new_prog)  			return -ENOMEM; -		new_prog->len = insn_cnt; - -		memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1, -			sizeof(*insn) * (insn_cnt - i - cnt)); - -		/* copy substitute insns in place of load instruction */ -		memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt); - -		/* adjust branches in the whole program */ -		adjust_branches(new_prog, i, cnt - 1); +		insn_delta = cnt - 1;  		/* keep walking new program and skip insns we just inserted */  		env->prog = new_prog; -		insn = new_prog->insnsi + i + cnt - 1; -		i += cnt - 1; +		insn      = new_prog->insnsi + i + insn_delta; + +		insn_cnt += insn_delta; +		i        += insn_delta;  	}  	return 0; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8f22fbedc3a6..93f45011a59d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5621,7 +5621,10 @@ static struct bpf_prog *generate_filter(int which, int *err)  		fp->type = BPF_PROG_TYPE_SOCKET_FILTER;  		memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn)); -		bpf_prog_select_runtime(fp); +		/* We cannot error here as we don't need type compatibility +		 * checks. +		 */ +		fp = bpf_prog_select_runtime(fp, err);  		break;  	} diff --git a/net/Kconfig b/net/Kconfig index b841c42e5c9b..ff40562a782c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -289,14 +289,17 @@ config BQL  config BPF_JIT  	bool "enable BPF Just In Time compiler" -	depends on HAVE_BPF_JIT +	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT  	depends on MODULES  	---help---  	  Berkeley Packet Filter filtering capabilities are normally handled  	  by an interpreter. This option allows kernel to generate a native  	  code when filter is loaded in memory. This should speedup -	  packet sniffing (libpcap/tcpdump). Note : Admin should enable -	  this feature changing /proc/sys/net/core/bpf_jit_enable +	  packet sniffing (libpcap/tcpdump). + +	  Note, admin should enable this feature changing: +	  /proc/sys/net/core/bpf_jit_enable +	  /proc/sys/net/core/bpf_jit_harden (optional)  config NET_FLOW_LIMIT  	bool @@ -419,6 +422,14 @@ config MAY_USE_DEVLINK  endif   # if NET -# Used by archs to tell that they support BPF_JIT -config HAVE_BPF_JIT +# Used by archs to tell that they support BPF JIT compiler plus which flavour. +# Only one of the two can be selected for a specific arch since eBPF JIT supersedes +# the cBPF JIT. + +# Classic BPF JIT (cBPF) +config HAVE_CBPF_JIT +	bool + +# Extended BPF JIT (eBPF) +config HAVE_EBPF_JIT  	bool diff --git a/net/core/filter.c b/net/core/filter.c index 71c2a1f473ad..68adb5f52110 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -994,7 +994,11 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)  		 */  		goto out_err_free; -	bpf_prog_select_runtime(fp); +	/* We are guaranteed to never error here with cBPF to eBPF +	 * transitions, since there's no issue with type compatibility +	 * checks on program arrays. +	 */ +	fp = bpf_prog_select_runtime(fp, &err);  	kfree(old_prog);  	return fp; @@ -2069,16 +2073,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)  static bool __is_valid_access(int off, int size, enum bpf_access_type type)  { -	/* check bounds */  	if (off < 0 || off >= sizeof(struct __sk_buff))  		return false; - -	/* disallow misaligned access */ +	/* The verifier guarantees that size > 0. */  	if (off % size != 0)  		return false; - -	/* all __sk_buff fields are __u32 */ -	if (size != 4) +	if (size != sizeof(__u32))  		return false;  	return true; @@ -2097,7 +2097,7 @@ static bool sk_filter_is_valid_access(int off, int size,  	if (type == BPF_WRITE) {  		switch (off) {  		case offsetof(struct __sk_buff, cb[0]) ... -			offsetof(struct __sk_buff, cb[4]): +		     offsetof(struct __sk_buff, cb[4]):  			break;  		default:  			return false; @@ -2278,30 +2278,30 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,  }  static const struct bpf_verifier_ops sk_filter_ops = { -	.get_func_proto = sk_filter_func_proto, -	.is_valid_access = sk_filter_is_valid_access, -	.convert_ctx_access = bpf_net_convert_ctx_access, +	.get_func_proto		= sk_filter_func_proto, +	.is_valid_access	= sk_filter_is_valid_access, +	.convert_ctx_access	= bpf_net_convert_ctx_access,  };  static const struct bpf_verifier_ops tc_cls_act_ops = { -	.get_func_proto = tc_cls_act_func_proto, -	.is_valid_access = tc_cls_act_is_valid_access, -	.convert_ctx_access = bpf_net_convert_ctx_access, +	.get_func_proto		= tc_cls_act_func_proto, +	.is_valid_access	= tc_cls_act_is_valid_access, +	.convert_ctx_access	= bpf_net_convert_ctx_access,  };  static struct bpf_prog_type_list sk_filter_type __read_mostly = { -	.ops = &sk_filter_ops, -	.type = BPF_PROG_TYPE_SOCKET_FILTER, +	.ops	= &sk_filter_ops, +	.type	= BPF_PROG_TYPE_SOCKET_FILTER,  };  static struct bpf_prog_type_list sched_cls_type __read_mostly = { -	.ops = &tc_cls_act_ops, -	.type = BPF_PROG_TYPE_SCHED_CLS, +	.ops	= &tc_cls_act_ops, +	.type	= BPF_PROG_TYPE_SCHED_CLS,  };  static struct bpf_prog_type_list sched_act_type __read_mostly = { -	.ops = &tc_cls_act_ops, -	.type = BPF_PROG_TYPE_SCHED_ACT, +	.ops	= &tc_cls_act_ops, +	.type	= BPF_PROG_TYPE_SCHED_ACT,  };  static int __init register_sk_filter_ops(void) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a6beb7b6ae55..0df2aa652530 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -294,6 +294,15 @@ static struct ctl_table net_core_table[] = {  		.mode		= 0644,  		.proc_handler	= proc_dointvec  	}, +# ifdef CONFIG_HAVE_EBPF_JIT +	{ +		.procname	= "bpf_jit_harden", +		.data		= &bpf_jit_harden, +		.maxlen		= sizeof(int), +		.mode		= 0600, +		.proc_handler	= proc_dointvec, +	}, +# endif  #endif  	{  		.procname	= "netdev_tstamp_prequeue", | 
