summaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 06:20:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 06:20:58 -0700
commitfc02cb2b37fe2cbf1d3334b9f0f0eab9431766c4 (patch)
tree93b16bc48fdc3be4a1adccbf4c7de92a5e8440e1 /arch/riscv
parentbfc484fe6abba4b89ec9330e0e68778e2a9856b2 (diff)
parent84882cf72cd774cf16fd338bdbf00f69ac9f9194 (diff)
Merge tag 'net-next-for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Remove socket skb caches - Add a SO_RESERVE_MEM socket op to forward allocate buffer space and avoid memory accounting overhead on each message sent - Introduce managed neighbor entries - added by control plane and resolved by the kernel for use in acceleration paths (BPF / XDP right now, HW offload users will benefit as well) - Make neighbor eviction on link down controllable by userspace to work around WiFi networks with bad roaming implementations - vrf: Rework interaction with netfilter/conntrack - fq_codel: implement L4S style ce_threshold_ect1 marking - sch: Eliminate unnecessary RCU waits in mini_qdisc_pair_swap() BPF: - Add support for new btf kind BTF_KIND_TAG, arbitrary type tagging as implemented in LLVM14 - Introduce bpf_get_branch_snapshot() to capture Last Branch Records - Implement variadic trace_printk helper - Add a new Bloomfilter map type - Track <8-byte scalar spill and refill - Access hw timestamp through BPF's __sk_buff - Disallow unprivileged BPF by default - Document BPF licensing Netfilter: - Introduce egress hook for looking at raw outgoing packets - Allow matching on and modifying inner headers / payload data - Add NFT_META_IFTYPE to match on the interface type either from ingress or egress Protocols: - Multi-Path TCP: - increase default max additional subflows to 2 - rework forward memory allocation - add getsockopts: MPTCP_INFO, MPTCP_TCPINFO, MPTCP_SUBFLOW_ADDRS - MCTP flow support allowing lower layer drivers to configure msg muxing as needed - Automatic Multicast Tunneling (AMT) driver based on RFC7450 - HSR support the redbox supervision frames (IEC-62439-3:2018) - Support for the ip6ip6 encapsulation of IOAM - Netlink interface for CAN-FD's Transmitter Delay Compensation - Support SMC-Rv2 eliminating the current same-subnet restriction, by exploiting the UDP encapsulation feature of RoCE adapters - TLS: add SM4 GCM/CCM crypto support - Bluetooth: initial support for link quality and audio/codec offload Driver APIs: - Add a batched interface for RX buffer allocation in AF_XDP buffer pool - ethtool: Add ability to control transceiver modules' power mode - phy: Introduce supported interfaces bitmap to express MAC capabilities and simplify PHY code - Drop rtnl_lock from DSA .port_fdb_{add,del} callbacks New drivers: - WiFi driver for Realtek 8852AE 802.11ax devices (rtw89) - Ethernet driver for ASIX AX88796C SPI device (x88796c) Drivers: - Broadcom PHYs - support 72165, 7712 16nm PHYs - support IDDQ-SR for additional power savings - PHY support for QCA8081, QCA9561 PHYs - NXP DPAA2: support for IRQ coalescing - NXP Ethernet (enetc): support for software TCP segmentation - Renesas Ethernet (ravb) - support DMAC and EMAC blocks of Gigabit-capable IP found on RZ/G2L SoC - Intel 100G Ethernet - support for eswitch offload of TC/OvS flow API, including offload of GRE, VxLAN, Geneve tunneling - support application device queues - ability to assign Rx and Tx queues to application threads - PTP and PPS (pulse-per-second) extensions - Broadcom Ethernet (bnxt) - devlink health reporting and device reload extensions - Mellanox Ethernet (mlx5) - offload macvlan interfaces - support HW offload of TC rules involving OVS internal ports - support HW-GRO and header/data split - support application device queues - Marvell OcteonTx2: - add XDP support for PF - add PTP support for VF - Qualcomm Ethernet switch (qca8k): support for QCA8328 - Realtek Ethernet DSA switch (rtl8366rb) - support bridge offload - support STP, fast aging, disabling address learning - support for Realtek RTL8365MB-VC, a 4+1 port 10M/100M/1GE switch - Mellanox Ethernet/IB switch (mlxsw) - multi-level qdisc hierarchy offload (e.g. RED, prio and shaping) - offload root TBF qdisc as port shaper - support multiple routing interface MAC address prefixes - support for IP-in-IP with IPv6 underlay - MediaTek WiFi (mt76) - mt7921 - ASPM, 6GHz, SDIO and testmode support - mt7915 - LED and TWT support - Qualcomm WiFi (ath11k) - include channel rx and tx time in survey dump statistics - support for 80P80 and 160 MHz bandwidths - support channel 2 in 6 GHz band - spectral scan support for QCN9074 - support for rx decapsulation offload (data frames in 802.3 format) - Qualcomm phone SoC WiFi (wcn36xx) - enable Idle Mode Power Save (IMPS) to reduce power consumption during idle - Bluetooth driver support for MediaTek MT7922 and MT7921 - Enable support for AOSP Bluetooth extension in Qualcomm WCN399x and Realtek 8822C/8852A - Microsoft vNIC driver (mana) - support hibernation and kexec - Google vNIC driver (gve) - support for jumbo frames - implement Rx page reuse Refactor: - Make all writes to netdev->dev_addr go thru helpers, so that we can add this address to the address rbtree and handle the updates - Various TCP cleanups and optimizations including improvements to CPU cache use - Simplify the gnet_stats, Qdisc stats' handling and remove qdisc->running sequence counter - Driver changes and API updates to address devlink locking deficiencies" * tag 'net-next-for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2122 commits) Revert "net: avoid double accounting for pure zerocopy skbs" selftests: net: add arp_ndisc_evict_nocarrier net: ndisc: introduce ndisc_evict_nocarrier sysctl parameter net: arp: introduce arp_evict_nocarrier sysctl parameter libbpf: Deprecate AF_XDP support kbuild: Unify options for BTF generation for vmlinux and modules selftests/bpf: Add a testcase for 64-bit bounds propagation issue. bpf: Fix propagation of signed bounds from 64-bit min/max into 32-bit. bpf: Fix propagation of bounds from 64-bit min/max into 32-bit and var_off. net: vmxnet3: remove multiple false checks in vmxnet3_ethtool.c net: avoid double accounting for pure zerocopy skbs tcp: rename sk_wmem_free_skb netdevsim: fix uninit value in nsim_drv_configure_vfs() selftests/bpf: Fix also no-alu32 strobemeta selftest bpf: Add missing map_delete_elem method to bloom filter map selftests/bpf: Add bloom map success test for userspace calls bpf: Add alignment padding for "map_extra" + consolidate holes bpf: Bloom filter map naming fixups selftests/bpf: Add test cases for struct_ops prog bpf: Add dummy BPF STRUCT_OPS for test purpose ...
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/mm/extable.c19
-rw-r--r--arch/riscv/net/bpf_jit.h1
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c185
-rw-r--r--arch/riscv/net/bpf_jit_core.c21
4 files changed, 178 insertions, 48 deletions
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 2fc729422151..18bf338303b6 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -11,14 +11,23 @@
#include <linux/module.h>
#include <linux/uaccess.h>
+#ifdef CONFIG_BPF_JIT
+int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs);
+#endif
+
int fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->epc);
- if (fixup) {
- regs->epc = fixup->fixup;
- return 1;
- }
- return 0;
+ if (!fixup)
+ return 0;
+
+#ifdef CONFIG_BPF_JIT
+ if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END)
+ return rv_bpf_fixup_exception(fixup, regs);
+#endif
+
+ regs->epc = fixup->fixup;
+ return 1;
}
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 75c1e9996867..f42d9cd3b64d 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -71,6 +71,7 @@ struct rv_jit_context {
int ninsns;
int epilogue_offset;
int *offset; /* BPF to RV */
+ int nexentries;
unsigned long flags;
int stack_size;
};
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 3af4131c22c7..2ca345c7b0bf 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -5,6 +5,7 @@
*
*/
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include "bpf_jit.h"
@@ -27,6 +28,21 @@ static const int regmap[] = {
[BPF_REG_AX] = RV_REG_T0,
};
+static const int pt_regmap[] = {
+ [RV_REG_A0] = offsetof(struct pt_regs, a0),
+ [RV_REG_A1] = offsetof(struct pt_regs, a1),
+ [RV_REG_A2] = offsetof(struct pt_regs, a2),
+ [RV_REG_A3] = offsetof(struct pt_regs, a3),
+ [RV_REG_A4] = offsetof(struct pt_regs, a4),
+ [RV_REG_A5] = offsetof(struct pt_regs, a5),
+ [RV_REG_S1] = offsetof(struct pt_regs, s1),
+ [RV_REG_S2] = offsetof(struct pt_regs, s2),
+ [RV_REG_S3] = offsetof(struct pt_regs, s3),
+ [RV_REG_S4] = offsetof(struct pt_regs, s4),
+ [RV_REG_S5] = offsetof(struct pt_regs, s5),
+ [RV_REG_T0] = offsetof(struct pt_regs, t0),
+};
+
enum {
RV_CTX_F_SEEN_TAIL_CALL = 0,
RV_CTX_F_SEEN_CALL = RV_REG_RA,
@@ -440,6 +456,69 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
return 0;
}
+#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
+#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
+
+int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
+ int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
+
+ *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
+ regs->epc = (unsigned long)&ex->fixup - offset;
+
+ return 1;
+}
+
+/* For accesses to BTF pointers, add an entry to the exception table */
+static int add_exception_handler(const struct bpf_insn *insn,
+ struct rv_jit_context *ctx,
+ int dst_reg, int insn_len)
+{
+ struct exception_table_entry *ex;
+ unsigned long pc;
+ off_t offset;
+
+ if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
+ return 0;
+
+ if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(insn_len > ctx->ninsns))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1))
+ return -EINVAL;
+
+ ex = &ctx->prog->aux->extable[ctx->nexentries];
+ pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len];
+
+ offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex->insn = pc;
+
+ /*
+ * Since the extable follows the program, the fixup offset is always
+ * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
+ * to keep things simple, and put the destination register in the upper
+ * bits. We don't need to worry about buildtime or runtime sort
+ * modifying the upper bits because the table is already sorted, and
+ * isn't part of the main exception table.
+ */
+ offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
+ if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+ return -ERANGE;
+
+ ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+ FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+
+ ctx->nexentries++;
+ return 0;
+}
+
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
@@ -893,52 +972,86 @@ out_be:
/* LDX: dst = *(size *)(src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- if (is_12b_int(off)) {
- emit(rv_lbu(rd, off, rs), ctx);
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ {
+ int insn_len, insns_start;
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit(rv_lbu(rd, off, rs), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
- }
+ case BPF_H:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit(rv_lhu(rd, off, rs), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
- if (insn_is_zext(&insn[1]))
- return 1;
- break;
- case BPF_LDX | BPF_MEM | BPF_H:
- if (is_12b_int(off)) {
- emit(rv_lhu(rd, off, rs), ctx);
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
- }
+ case BPF_W:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit(rv_lwu(rd, off, rs), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
- if (insn_is_zext(&insn[1]))
- return 1;
- break;
- case BPF_LDX | BPF_MEM | BPF_W:
- if (is_12b_int(off)) {
- emit(rv_lwu(rd, off, rs), ctx);
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
+ insn_len = ctx->ninsns - insns_start;
+ if (insn_is_zext(&insn[1]))
+ return 1;
break;
- }
+ case BPF_DW:
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit_ld(rd, off, rs, ctx);
+ insn_len = ctx->ninsns - insns_start;
+ break;
+ }
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
- if (insn_is_zext(&insn[1]))
- return 1;
- break;
- case BPF_LDX | BPF_MEM | BPF_DW:
- if (is_12b_int(off)) {
- emit_ld(rd, off, rs, ctx);
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit_ld(rd, 0, RV_REG_T1, ctx);
+ insn_len = ctx->ninsns - insns_start;
break;
}
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- emit_ld(rd, 0, RV_REG_T1, ctx);
+ ret = add_exception_handler(insn, ctx, rd, insn_len);
+ if (ret)
+ return ret;
break;
-
+ }
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
break;
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 753d85bdfad0..be743d700aa7 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -11,7 +11,7 @@
#include "bpf_jit.h"
/* Number of iterations to try until offsets converge. */
-#define NR_JIT_ITERATIONS 16
+#define NR_JIT_ITERATIONS 32
static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
{
@@ -41,12 +41,12 @@ bool bpf_jit_needs_zext(void)
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
+ unsigned int prog_size = 0, extable_size = 0;
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
struct rv_jit_context *ctx;
- unsigned int image_size = 0;
if (!prog->jit_requested)
return orig_prog;
@@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (ctx->offset) {
extra_pass = true;
- image_size = sizeof(*ctx->insns) * ctx->ninsns;
+ prog_size = sizeof(*ctx->insns) * ctx->ninsns;
goto skip_init_ctx;
}
@@ -102,10 +102,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (ctx->ninsns == prev_ninsns) {
if (jit_data->header)
break;
+ /* obtain the actual image size */
+ extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+ prog_size = sizeof(*ctx->insns) * ctx->ninsns;
- image_size = sizeof(*ctx->insns) * ctx->ninsns;
jit_data->header =
- bpf_jit_binary_alloc(image_size,
+ bpf_jit_binary_alloc(prog_size + extable_size,
&jit_data->image,
sizeof(u32),
bpf_fill_ill_insns);
@@ -131,9 +134,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_offset;
}
+ if (extable_size)
+ prog->aux->extable = (void *)ctx->insns + prog_size;
+
skip_init_ctx:
pass++;
ctx->ninsns = 0;
+ ctx->nexentries = 0;
bpf_jit_build_prologue(ctx);
if (build_body(ctx, extra_pass, NULL)) {
@@ -144,11 +151,11 @@ skip_init_ctx:
bpf_jit_build_epilogue(ctx);
if (bpf_jit_enable > 1)
- bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
+ bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
prog->bpf_func = (void *)ctx->insns;
prog->jited = 1;
- prog->jited_len = image_size;
+ prog->jited_len = prog_size;
bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);