summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 17:44:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 17:44:08 -0700
commit9187210eee7d87eea37b45ea93454a88681894a4 (patch)
tree31b4610e62cdd5e1dfb700014aa619e41145d7d3 /arch
parent1f440397665f4241346e4cc6d93f8b73880815d1 (diff)
parented1f164038b50c5864aa85389f3ffd456f050cca (diff)
Merge tag 'net-next-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Large effort by Eric to lower rtnl_lock pressure and remove locks: - Make commonly used parts of rtnetlink (address, route dumps etc) lockless, protected by RCU instead of rtnl_lock. - Add a netns exit callback which already holds rtnl_lock, allowing netns exit to take rtnl_lock once in the core instead of once for each driver / callback. - Remove locks / serialization in the socket diag interface. - Remove 6 calls to synchronize_rcu() while holding rtnl_lock. - Remove the dev_base_lock, depend on RCU where necessary. - Support busy polling on a per-epoll context basis. Poll length and budget parameters can be set independently of system defaults. - Introduce struct net_hotdata, to make sure read-mostly global config variables fit in as few cache lines as possible. - Add optional per-nexthop statistics to ease monitoring / debug of ECMP imbalance problems. - Support TCP_NOTSENT_LOWAT in MPTCP. - Ensure that IPv6 temporary addresses' preferred lifetimes are long enough, compared to other configured lifetimes, and at least 2 sec. - Support forwarding of ICMP Error messages in IPSec, per RFC 4301. - Add support for the independent control state machine for bonding per IEEE 802.1AX-2008 5.4.15 in addition to the existing coupled control state machine. - Add "network ID" to MCTP socket APIs to support hosts with multiple disjoint MCTP networks. - Re-use the mono_delivery_time skbuff bit for packets which user space wants to be sent at a specified time. Maintain the timing information while traversing veth links, bridge etc. - Take advantage of MSG_SPLICE_PAGES for RxRPC DATA and ACK packets. - Simplify many places iterating over netdevs by using an xarray instead of a hash table walk (hash table remains in place, for use on fastpaths). - Speed up scanning for expired routes by keeping a dedicated list. - Speed up "generic" XDP by trying harder to avoid large allocations. - Support attaching arbitrary metadata to netconsole messages. Things we sprinkled into general kernel code: - Enforce VM_IOREMAP flag and range in ioremap_page_range and introduce VM_SPARSE kind and vm_area_[un]map_pages (used by bpf_arena). - Rework selftest harness to enable the use of the full range of ksft exit code (pass, fail, skip, xfail, xpass). Netfilter: - Allow userspace to define a table that is exclusively owned by a daemon (via netlink socket aliveness) without auto-removing this table when the userspace program exits. Such table gets marked as orphaned and a restarting management daemon can re-attach/regain ownership. - Speed up element insertions to nftables' concatenated-ranges set type. Compact a few related data structures. BPF: - Add BPF token support for delegating a subset of BPF subsystem functionality from privileged system-wide daemons such as systemd through special mount options for userns-bound BPF fs to a trusted & unprivileged application. - Introduce bpf_arena which is sparse shared memory region between BPF program and user space where structures inside the arena can have pointers to other areas of the arena, and pointers work seamlessly for both user-space programs and BPF programs. - Introduce may_goto instruction that is a contract between the verifier and the program. The verifier allows the program to loop assuming it's behaving well, but reserves the right to terminate it. - Extend the BPF verifier to enable static subprog calls in spin lock critical sections. - Support registration of struct_ops types from modules which helps projects like fuse-bpf that seeks to implement a new struct_ops type. - Add support for retrieval of cookies for perf/kprobe multi links. - Support arbitrary TCP SYN cookie generation / validation in the TC layer with BPF to allow creating SYN flood handling in BPF firewalls. - Add code generation to inline the bpf_kptr_xchg() helper which improves performance when stashing/popping the allocated BPF objects. Wireless: - Add SPP (signaling and payload protected) AMSDU support. - Support wider bandwidth OFDMA, as required for EHT operation. Driver API: - Major overhaul of the Energy Efficient Ethernet internals to support new link modes (2.5GE, 5GE), share more code between drivers (especially those using phylib), and encourage more uniform behavior. Convert and clean up drivers. - Define an API for querying per netdev queue statistics from drivers. - IPSec: account in global stats for fully offloaded sessions. - Create a concept of Ethernet PHY Packages at the Device Tree level, to allow parameterizing the existing PHY package code. - Enable Rx hashing (RSS) on GTP protocol fields. Misc: - Improvements and refactoring all over networking selftests. - Create uniform module aliases for TC classifiers, actions, and packet schedulers to simplify creating modprobe policies. - Address all missing MODULE_DESCRIPTION() warnings in networking. - Extend the Netlink descriptions in YAML to cover message encapsulation or "Netlink polymorphism", where interpretation of nested attributes depends on link type, classifier type or some other "class type". Drivers: - Ethernet high-speed NICs: - Add a new driver for Marvell's Octeon PCI Endpoint NIC VF. - Intel (100G, ice, idpf): - support E825-C devices - nVidia/Mellanox: - support devices with one port and multiple PCIe links - Broadcom (bnxt): - support n-tuple filters - support configuring the RSS key - Wangxun (ngbe/txgbe): - implement irq_domain for TXGBE's sub-interrupts - Pensando/AMD: - support XDP - optimize queue submission and wakeup handling (+17% bps) - optimize struct layout, saving 28% of memory on queues - Ethernet NICs embedded and virtual: - Google cloud vNIC: - refactor driver to perform memory allocations for new queue config before stopping and freeing the old queue memory - Synopsys (stmmac): - obey queueMaxSDU and implement counters required by 802.1Qbv - Renesas (ravb): - support packet checksum offload - suspend to RAM and runtime PM support - Ethernet switches: - nVidia/Mellanox: - support for nexthop group statistics - Microchip: - ksz8: implement PHY loopback - add support for KSZ8567, a 7-port 10/100Mbps switch - PTP: - New driver for RENESAS FemtoClock3 Wireless clock generator. - Support OCP PTP cards designed and built by Adva. - CAN: - Support recvmsg() flags for own, local and remote traffic on CAN BCM sockets. - Support for esd GmbH PCIe/402 CAN device family. - m_can: - Rx/Tx submission coalescing - wake on frame Rx - WiFi: - Intel (iwlwifi): - enable signaling and payload protected A-MSDUs - support wider-bandwidth OFDMA - support for new devices - bump FW API to 89 for AX devices; 90 for BZ/SC devices - MediaTek (mt76): - mt7915: newer ADIE version support - mt7925: radio temperature sensor support - Qualcomm (ath11k): - support 6 GHz station power modes: Low Power Indoor (LPI), Standard Power) SP and Very Low Power (VLP) - QCA6390 & WCN6855: support 2 concurrent station interfaces - QCA2066 support - Qualcomm (ath12k): - refactoring in preparation for Multi-Link Operation (MLO) support - 1024 Block Ack window size support - firmware-2.bin support - support having multiple identical PCI devices (firmware needs to have ATH12K_FW_FEATURE_MULTI_QRTR_ID) - QCN9274: support split-PHY devices - WCN7850: enable Power Save Mode in station mode - WCN7850: P2P support - RealTek: - rtw88: support for more rtw8811cu and rtw8821cu devices - rtw89: support SCAN_RANDOM_SN and SET_SCAN_DWELL - rtlwifi: speed up USB firmware initialization - rtwl8xxxu: - RTL8188F: concurrent interface support - Channel Switch Announcement (CSA) support in AP mode - Broadcom (brcmfmac): - per-vendor feature support - per-vendor SAE password setup - DMI nvram filename quirk for ACEPC W5 Pro" * tag 'net-next-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2255 commits) nexthop: Fix splat with CONFIG_DEBUG_PREEMPT=y nexthop: Fix out-of-bounds access during attribute validation nexthop: Only parse NHA_OP_FLAGS for dump messages that require it nexthop: Only parse NHA_OP_FLAGS for get messages that require it bpf: move sleepable flag from bpf_prog_aux to bpf_prog bpf: hardcode BPF_PROG_PACK_SIZE to 2MB * num_possible_nodes() selftests/bpf: Add kprobe multi triggering benchmarks ptp: Move from simple ida to xarray vxlan: Remove generic .ndo_get_stats64 vxlan: Do not alloc tstats manually devlink: Add comments to use netlink gen tool nfp: flower: handle acti_netdevs allocation failure net/packet: Add getsockopt support for PACKET_COPY_THRESH net/netlink: Add getsockopt support for NETLINK_LISTEN_ALL_NSID selftests/bpf: Add bpf_arena_htab test. selftests/bpf: Add bpf_arena_list test. selftests/bpf: Add unit tests for bpf_arena_alloc/free_pages bpf: Add helper macro bpf_addr_space_cast() libbpf: Recognize __arena global variables. bpftool: Recognize arena map type ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/mm/ioremap.c8
-rw-r--r--arch/arm64/include/asm/patching.h2
-rw-r--r--arch/arm64/kernel/patching.c75
-rw-r--r--arch/arm64/kernel/stacktrace.c26
-rw-r--r--arch/arm64/net/bpf_jit_comp.c286
-rw-r--r--arch/loongarch/kernel/setup.c2
-rw-r--r--arch/mips/loongson64/init.c2
-rw-r--r--arch/powerpc/kernel/isa-bridge.c4
-rw-r--r--arch/riscv/include/asm/cfi.h17
-rw-r--r--arch/riscv/kernel/cfi.c53
-rw-r--r--arch/riscv/net/bpf_jit.h136
-rw-r--r--arch/riscv/net/bpf_jit_comp32.c2
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c229
-rw-r--r--arch/riscv/net/bpf_jit_core.c9
-rw-r--r--arch/x86/net/bpf_jit_comp.c236
15 files changed, 880 insertions, 207 deletions
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 2129070065c3..794cfea9f9d4 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -110,8 +110,8 @@ void __init add_static_vm_early(struct static_vm *svm)
int ioremap_page(unsigned long virt, unsigned long phys,
const struct mem_type *mtype)
{
- return ioremap_page_range(virt, virt + PAGE_SIZE, phys,
- __pgprot(mtype->prot_pte));
+ return vmap_page_range(virt, virt + PAGE_SIZE, phys,
+ __pgprot(mtype->prot_pte));
}
EXPORT_SYMBOL(ioremap_page);
@@ -466,8 +466,8 @@ int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
if (res->end > IO_SPACE_LIMIT)
return -EINVAL;
- return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
- __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
+ return vmap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
+ __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
}
EXPORT_SYMBOL(pci_remap_iospace);
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 68908b82b168..587bdb91ab7a 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -8,6 +8,8 @@ int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
int aarch64_insn_write_literal_u64(void *addr, u64 val);
+void *aarch64_insn_set(void *dst, u32 insn, size_t len);
+void *aarch64_insn_copy(void *dst, void *src, size_t len);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index b4835f6d594b..255534930368 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -105,6 +105,81 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
return ret;
}
+typedef void text_poke_f(void *dst, void *src, size_t patched, size_t len);
+
+static void *__text_poke(text_poke_f func, void *addr, void *src, size_t len)
+{
+ unsigned long flags;
+ size_t patched = 0;
+ size_t size;
+ void *waddr;
+ void *ptr;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+
+ while (patched < len) {
+ ptr = addr + patched;
+ size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
+ len - patched);
+
+ waddr = patch_map(ptr, FIX_TEXT_POKE0);
+ func(waddr, src, patched, size);
+ patch_unmap(FIX_TEXT_POKE0);
+
+ patched += size;
+ }
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
+
+ return addr;
+}
+
+static void text_poke_memcpy(void *dst, void *src, size_t patched, size_t len)
+{
+ copy_to_kernel_nofault(dst, src + patched, len);
+}
+
+static void text_poke_memset(void *dst, void *src, size_t patched, size_t len)
+{
+ u32 c = *(u32 *)src;
+
+ memset32(dst, c, len / 4);
+}
+
+/**
+ * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
+ * @dst: address to modify
+ * @src: source of the copy
+ * @len: length to copy
+ *
+ * Useful for JITs to dump new code blocks into unused regions of RX memory.
+ */
+noinstr void *aarch64_insn_copy(void *dst, void *src, size_t len)
+{
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memcpy, dst, src, len);
+}
+
+/**
+ * aarch64_insn_set - memset for RX memory regions.
+ * @dst: address to modify
+ * @insn: value to set
+ * @len: length of memory region.
+ *
+ * Useful for JITs to fill regions of RX memory with illegal instructions.
+ */
+noinstr void *aarch64_insn_set(void *dst, u32 insn, size_t len)
+{
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memset, dst, &insn, len);
+}
+
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
{
u32 *tp = addr;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b2a60e0bcfd2..684c26511696 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/efi.h>
#include <linux/export.h>
+#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
@@ -266,6 +267,31 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+struct bpf_unwind_consume_entry_data {
+ bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
+ void *cookie;
+};
+
+static bool
+arch_bpf_unwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct bpf_unwind_consume_entry_data *data = cookie;
+
+ return data->consume_entry(data->cookie, state->common.pc, 0,
+ state->common.fp);
+}
+
+noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u64 ip, u64 sp,
+ u64 fp), void *cookie)
+{
+ struct bpf_unwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
+}
+
static bool dump_backtrace_entry(void *arg, unsigned long where)
{
char *loglvl = arg;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8955da5c47cf..c5b461dda438 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -76,6 +76,7 @@ struct jit_ctx {
int *offset;
int exentry_idx;
__le32 *image;
+ __le32 *ro_image;
u32 stack_size;
int fpb_offset;
};
@@ -205,6 +206,14 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
}
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len))
+ return -EINVAL;
+
+ return 0;
+}
+
static inline int epilogue_offset(const struct jit_ctx *ctx)
{
int to = ctx->epilogue_offset;
@@ -285,7 +294,8 @@ static bool is_lsi_offset(int offset, int scale)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
-static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
+static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
+ bool is_exception_cb)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
@@ -333,19 +343,34 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
emit(A64_NOP, ctx);
- /* Sign lr */
- if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
- emit(A64_PACIASP, ctx);
-
- /* Save FP and LR registers to stay align with ARM64 AAPCS */
- emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
- emit(A64_MOV(1, A64_FP, A64_SP), ctx);
-
- /* Save callee-saved registers */
- emit(A64_PUSH(r6, r7, A64_SP), ctx);
- emit(A64_PUSH(r8, r9, A64_SP), ctx);
- emit(A64_PUSH(fp, tcc, A64_SP), ctx);
- emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
+ if (!is_exception_cb) {
+ /* Sign lr */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
+ emit(A64_PACIASP, ctx);
+ /* Save FP and LR registers to stay align with ARM64 AAPCS */
+ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
+
+ /* Save callee-saved registers */
+ emit(A64_PUSH(r6, r7, A64_SP), ctx);
+ emit(A64_PUSH(r8, r9, A64_SP), ctx);
+ emit(A64_PUSH(fp, tcc, A64_SP), ctx);
+ emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
+ } else {
+ /*
+ * Exception callback receives FP of Main Program as third
+ * parameter
+ */
+ emit(A64_MOV(1, A64_FP, A64_R(2)), ctx);
+ /*
+ * Main Program already pushed the frame record and the
+ * callee-saved registers. The exception callback will not push
+ * anything and re-use the main program's stack.
+ *
+ * 10 registers are on the stack
+ */
+ emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx);
+ }
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
@@ -365,6 +390,20 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit_bti(A64_BTI_J, ctx);
}
+ /*
+ * Program acting as exception boundary should save all ARM64
+ * Callee-saved registers as the exception callback needs to recover
+ * all ARM64 Callee-saved registers in its epilogue.
+ */
+ if (prog->aux->exception_boundary) {
+ /*
+ * As we are pushing two more registers, BPF_FP should be moved
+ * 16 bytes
+ */
+ emit(A64_SUB_I(1, fp, fp, 16), ctx);
+ emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
+ }
+
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
/* Stack must be multiples of 16B */
@@ -653,7 +692,7 @@ static void build_plt(struct jit_ctx *ctx)
plt->target = (u64)&dummy_tramp;
}
-static void build_epilogue(struct jit_ctx *ctx)
+static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)
{
const u8 r0 = bpf2a64[BPF_REG_0];
const u8 r6 = bpf2a64[BPF_REG_6];
@@ -666,6 +705,15 @@ static void build_epilogue(struct jit_ctx *ctx)
/* We're done with BPF stack */
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+ /*
+ * Program acting as exception boundary pushes R23 and R24 in addition
+ * to BPF callee-saved registers. Exception callback uses the boundary
+ * program's stack frame, so recover these extra registers in the above
+ * two cases.
+ */
+ if (ctx->prog->aux->exception_boundary || is_exception_cb)
+ emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
+
/* Restore x27 and x28 */
emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
/* Restore fs (x25) and x26 */
@@ -707,7 +755,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
struct jit_ctx *ctx,
int dst_reg)
{
- off_t offset;
+ off_t ins_offset;
+ off_t fixup_offset;
unsigned long pc;
struct exception_table_entry *ex;
@@ -724,12 +773,17 @@ static int add_exception_handler(const struct bpf_insn *insn,
return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->exentry_idx];
- pc = (unsigned long)&ctx->image[ctx->idx - 1];
+ pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
- offset = pc - (long)&ex->insn;
- if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ /*
+ * This is the relative offset of the instruction that may fault from
+ * the exception table itself. This will be written to the exception
+ * table and if this instruction faults, the destination register will
+ * be set to '0' and the execution will jump to the next instruction.
+ */
+ ins_offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;
- ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
@@ -738,12 +792,25 @@ static int add_exception_handler(const struct bpf_insn *insn,
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted, and
* isn't part of the main exception table.
+ *
+ * The fixup_offset is set to the next instruction from the instruction
+ * that may fault. The execution will jump to this after handling the
+ * fault.
*/
- offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
- if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+ fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
+ if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;
- ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+ /*
+ * The offsets above have been calculated using the RO buffer but we
+ * need to use the R/W buffer for writes.
+ * switch ex to rw buffer for writing.
+ */
+ ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
+
+ ex->insn = ins_offset;
+
+ ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->type = EX_TYPE_BPF;
@@ -1511,7 +1578,8 @@ static inline void bpf_flush_icache(void *start, void *end)
struct arm64_jit_data {
struct bpf_binary_header *header;
- u8 *image;
+ u8 *ro_image;
+ struct bpf_binary_header *ro_header;
struct jit_ctx ctx;
};
@@ -1520,12 +1588,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
int image_size, prog_size, extable_size, extable_align, extable_offset;
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
+ struct bpf_binary_header *ro_header;
struct arm64_jit_data *jit_data;
bool was_classic = bpf_prog_was_classic(prog);
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
u8 *image_ptr;
+ u8 *ro_image_ptr;
if (!prog->jit_requested)
return orig_prog;
@@ -1552,8 +1622,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
- image_ptr = jit_data->image;
+ ro_image_ptr = jit_data->ro_image;
+ ro_header = jit_data->ro_header;
header = jit_data->header;
+ image_ptr = (void *)header + ((void *)ro_image_ptr
+ - (void *)ro_header);
extra_pass = true;
prog_size = sizeof(u32) * ctx.idx;
goto skip_init_ctx;
@@ -1575,7 +1648,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* BPF line info needs ctx->offset[i] to be the offset of
* instruction[i] in jited image, so build prologue first.
*/
- if (build_prologue(&ctx, was_classic)) {
+ if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) {
prog = orig_prog;
goto out_off;
}
@@ -1586,7 +1659,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.epilogue_offset = ctx.idx;
- build_epilogue(&ctx);
+ build_epilogue(&ctx, prog->aux->exception_cb);
build_plt(&ctx);
extable_align = __alignof__(struct exception_table_entry);
@@ -1598,63 +1671,81 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* also allocate space for plt target */
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
- header = bpf_jit_binary_alloc(image_size, &image_ptr,
- sizeof(u32), jit_fill_hole);
- if (header == NULL) {
+ ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
+ sizeof(u32), &header, &image_ptr,
+ jit_fill_hole);
+ if (!ro_header) {
prog = orig_prog;
goto out_off;
}
/* 2. Now, the actual pass. */
+ /*
+ * Use the image(RW) for writing the JITed instructions. But also save
+ * the ro_image(RX) for calculating the offsets in the image. The RW
+ * image will be later copied to the RX image from where the program
+ * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
+ * final step.
+ */
ctx.image = (__le32 *)image_ptr;
+ ctx.ro_image = (__le32 *)ro_image_ptr;
if (extable_size)
- prog->aux->extable = (void *)image_ptr + extable_offset;
+ prog->aux->extable = (void *)ro_image_ptr + extable_offset;
skip_init_ctx:
ctx.idx = 0;
ctx.exentry_idx = 0;
- build_prologue(&ctx, was_classic);
+ build_prologue(&ctx, was_classic, prog->aux->exception_cb);
if (build_body(&ctx, extra_pass)) {
- bpf_jit_binary_free(header);
prog = orig_prog;
- goto out_off;
+ goto out_free_hdr;
}
- build_epilogue(&ctx);
+ build_epilogue(&ctx, prog->aux->exception_cb);
build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */
if (validate_ctx(&ctx)) {
- bpf_jit_binary_free(header);
prog = orig_prog;
- goto out_off;
+ goto out_free_hdr;
}
/* And we're done. */
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
- bpf_flush_icache(header, ctx.image + ctx.idx);
-
if (!prog->is_func || extra_pass) {
if (extra_pass && ctx.idx != jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d != %d\n",
ctx.idx, jit_data->ctx.idx);
- bpf_jit_binary_free(header);
prog->bpf_func = NULL;
prog->jited = 0;
prog->jited_len = 0;
+ goto out_free_hdr;
+ }
+ if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header,
+ header))) {
+ /* ro_header has been freed */
+ ro_header = NULL;
+ prog = orig_prog;
goto out_off;
}
- bpf_jit_binary_lock_ro(header);
+ /*
+ * The instructions have now been copied to the ROX region from
+ * where they will execute. Now the data cache has to be cleaned to
+ * the PoU and the I-cache has to be invalidated for the VAs.
+ */
+ bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
} else {
jit_data->ctx = ctx;
- jit_data->image = image_ptr;
+ jit_data->ro_image = ro_image_ptr;
jit_data->header = header;
+ jit_data->ro_header = ro_header;
}
- prog->bpf_func = (void *)ctx.image;
+
+ prog->bpf_func = (void *)ctx.ro_image;
prog->jited = 1;
prog->jited_len = prog_size;
@@ -1675,6 +1766,14 @@ out:
bpf_jit_prog_release_other(prog, prog == orig_prog ?
tmp : orig_prog);
return prog;
+
+out_free_hdr:
+ if (header) {
+ bpf_arch_text_copy(&ro_header->size, &header->size,
+ sizeof(header->size));
+ bpf_jit_binary_pack_free(ro_header, header);
+ }
+ goto out_off;
}
bool bpf_jit_supports_kfunc_call(void)
@@ -1682,6 +1781,13 @@ bool bpf_jit_supports_kfunc_call(void)
return true;
}
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ if (!aarch64_insn_copy(dst, src, len))
+ return ERR_PTR(-EINVAL);
+ return dst;
+}
+
u64 bpf_jit_alloc_exec_limit(void)
{
return VMALLOC_END - VMALLOC_START;
@@ -1970,7 +2076,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
/* store return value */
emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
/* reserve a nop for bpf_tramp_image_put */
- im->ip_after_call = ctx->image + ctx->idx;
+ im->ip_after_call = ctx->ro_image + ctx->idx;
emit(A64_NOP, ctx);
}
@@ -1985,7 +2091,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
run_ctx_off, false);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = ctx->image + ctx->idx;
+ im->ip_epilogue = ctx->ro_image + ctx->idx;
emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
@@ -2018,9 +2124,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_RET(A64_R(10)), ctx);
}
- if (ctx->image)
- bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
-
kfree(branches);
return ctx->idx;
@@ -2063,14 +2166,43 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
}
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
- void *image_end, const struct btf_func_model *m,
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+ return bpf_prog_pack_alloc(size, jit_fill_hole);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+ bpf_prog_pack_free(image, size);
+}
+
+void arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
+ void *ro_image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
int ret, nregs;
+ void *image, *tmp;
+ u32 size = ro_image_end - ro_image;
+
+ /* image doesn't need to be in module memory range, so we can
+ * use kvmalloc.
+ */
+ image = kvmalloc(size, GFP_KERNEL);
+ if (!image)
+ return -ENOMEM;
+
struct jit_ctx ctx = {
.image = image,
+ .ro_image = ro_image,
.idx = 0,
};
@@ -2079,15 +2211,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
if (nregs > 8)
return -ENOTSUPP;
- jit_fill_hole(image, (unsigned int)(image_end - image));
+ jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
- if (ret > 0 && validate_code(&ctx) < 0)
+ if (ret > 0 && validate_code(&ctx) < 0) {
ret = -EINVAL;
+ goto out;
+ }
if (ret > 0)
ret *= AARCH64_INSN_SIZE;
+ tmp = bpf_arch_text_copy(ro_image, image, size);
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ goto out;
+ }
+
+ bpf_flush_icache(ro_image, ro_image + size);
+out:
+ kvfree(image);
return ret;
}
@@ -2305,3 +2448,42 @@ out:
return ret;
}
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_exceptions(void)
+{
+ /* We unwind through both kernel frames starting from within bpf_throw
+ * call and BPF frames. Therefore we require FP unwinder to be enabled
+ * to walk kernel frames and reach BPF frames in the stack trace.
+ * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y
+ */
+ return true;
+}
+
+void bpf_jit_free(struct bpf_prog *prog)
+{
+ if (prog->jited) {
+ struct arm64_jit_data *jit_data = prog->aux->jit_data;
+ struct bpf_binary_header *hdr;
+
+ /*
+ * If we fail the final pass of JIT (from jit_subprogs),
+ * the program may not be finalized yet. Call finalize here
+ * before freeing it.
+ */
+ if (jit_data) {
+ bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
+ sizeof(jit_data->header->size));
+ kfree(jit_data);
+ }
+ hdr = bpf_jit_binary_pack_hdr(prog);
+ bpf_jit_binary_pack_free(hdr, NULL);
+ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
+ }
+
+ bpf_prog_unlock_free(prog);
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 634ef17fd38b..fd915ad69c09 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -490,7 +490,7 @@ static int __init add_legacy_isa_io(struct fwnode_handle *fwnode,
}
vaddr = (unsigned long)(PCI_IOBASE + range->io_start);
- ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
+ vmap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
return 0;
}
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index 553142c1f14f..a35dd7311795 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -180,7 +180,7 @@ static int __init add_legacy_isa_io(struct fwnode_handle *fwnode, resource_size_
vaddr = PCI_IOBASE + range->io_start;
- ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
+ vmap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
return 0;
}
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 48e0eaf1ad61..5c064485197a 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -46,8 +46,8 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size)
WARN_ON_ONCE(size & ~PAGE_MASK);
if (slab_is_available()) {
- if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
- pgprot_noncached(PAGE_KERNEL)))
+ if (vmap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+ pgprot_noncached(PAGE_KERNEL)))
vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size);
} else {
early_ioremap_range(ISA_IO_BASE, pa, size,
diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h
index 8f7a62257044..fb9696d7a3f2 100644
--- a/arch/riscv/include/asm/cfi.h
+++ b/arch/riscv/include/asm/cfi.h
@@ -13,11 +13,28 @@ struct pt_regs;
#ifdef CONFIG_CFI_CLANG
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#define __bpfcall
+static inline int cfi_get_offset(void)
+{
+ return 4;
+}
+
+#define cfi_get_offset cfi_get_offset
+extern u32 cfi_bpf_hash;
+extern u32 cfi_bpf_subprog_hash;
+extern u32 cfi_get_func_hash(void *func);
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
return BUG_TRAP_TYPE_NONE;
}
+
+#define cfi_bpf_hash 0U
+#define cfi_bpf_subprog_hash 0U
+static inline u32 cfi_get_func_hash(void *func)
+{
+ return 0;
+}
#endif /* CONFIG_CFI_CLANG */
#endif /* _ASM_RISCV_CFI_H */
diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c
index 6ec9dbd7292e..64bdd3e1ab8c 100644
--- a/arch/riscv/kernel/cfi.c
+++ b/arch/riscv/kernel/cfi.c
@@ -75,3 +75,56 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
return report_cfi_failure(regs, regs->epc, &target, type);
}
+
+#ifdef CONFIG_CFI_CLANG
+struct bpf_insn;
+
+/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */
+extern unsigned int __bpf_prog_runX(const void *ctx,
+ const struct bpf_insn *insn);
+
+/*
+ * Force a reference to the external symbol so the compiler generates
+ * __kcfi_typid.
+ */
+__ADDRESSABLE(__bpf_prog_runX);
+
+/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_hash,@object \n"
+" .globl cfi_bpf_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_hash: \n"
+" .word __kcfi_typeid___bpf_prog_runX \n"
+" .size cfi_bpf_hash, 4 \n"
+" .popsection \n"
+);
+
+/* Must match bpf_callback_t */
+extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
+
+__ADDRESSABLE(__bpf_callback_fn);
+
+/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_subprog_hash,@object \n"
+" .globl cfi_bpf_subprog_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_subprog_hash: \n"
+" .word __kcfi_typeid___bpf_callback_fn \n"
+" .size cfi_bpf_subprog_hash, 4 \n"
+" .popsection \n"
+);
+
+u32 cfi_get_func_hash(void *func)
+{
+ u32 hash;
+
+ if (get_kernel_nofault(hash, func - cfi_get_offset()))
+ return 0;
+
+ return hash;
+}
+#endif
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index a5ce1ab76ece..f4b6b3b9edda 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -18,6 +18,11 @@ static inline bool rvc_enabled(void)
return IS_ENABLED(CONFIG_RISCV_ISA_C);
}
+static inline bool rvzbb_enabled(void)
+{
+ return IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBB);
+}
+
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
@@ -730,6 +735,33 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2)
return rv_css_insn(0x6, imm, rs2, 0x2);
}
+/* RVZBB instrutions. */
+static inline u32 rvzbb_sextb(u8 rd, u8 rs1)
+{
+ return rv_i_insn(0x604, rs1, 1, rd, 0x13);
+}
+
+static inline u32 rvzbb_sexth(u8 rd, u8 rs1)
+{
+ return rv_i_insn(0x605, rs1, 1, rd, 0x13);
+}
+
+static inline u32 rvzbb_zexth(u8 rd, u8 rs)
+{
+ if (IS_ENABLED(CONFIG_64BIT))
+ return rv_i_insn(0x80, rs, 4, rd, 0x3b);
+
+ return rv_i_insn(0x80, rs, 4, rd, 0x33);
+}
+
+static inline u32 rvzbb_rev8(u8 rd, u8 rs)
+{
+ if (IS_ENABLED(CONFIG_64BIT))
+ return rv_i_insn(0x6b8, rs, 5, rd, 0x13);
+
+ return rv_i_insn(0x698, rs, 5, rd, 0x13);
+}
+
/*
* RV64-only instructions.
*
@@ -1087,9 +1119,111 @@ static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
emit(rv_subw(rd, rs1, rs2), ctx);
}
+static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ emit(rvzbb_sextb(rd, rs), ctx);
+ return;
+ }
+
+ emit_slli(rd, rs, 56, ctx);
+ emit_srai(rd, rd, 56, ctx);
+}
+
+static inline void emit_sexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ emit(rvzbb_sexth(rd, rs), ctx);
+ return;
+ }
+
+ emit_slli(rd, rs, 48, ctx);
+ emit_srai(rd, rd, 48, ctx);
+}
+
+static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ emit_addiw(rd, rs, 0, ctx);
+}
+
+static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ emit(rvzbb_zexth(rd, rs), ctx);
+ return;
+ }
+
+ emit_slli(rd, rs, 48, ctx);
+ emit_srli(rd, rd, 48, ctx);
+}
+
+static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+ emit_slli(rd, rs, 32, ctx);
+ emit_srli(rd, rd, 32, ctx);
+}
+
+static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+ if (rvzbb_enabled()) {
+ int bits = 64 - imm;
+
+ emit(rvzbb_rev8(rd, rd), ctx);
+ if (bits)
+ emit_srli(rd, rd, bits, ctx);
+ return;
+ }
+
+ emit_li(RV_REG_T2, 0, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+ if (imm == 16)
+ goto out_be;
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+ if (imm == 32)
+ goto out_be;
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+ emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+ emit_srli(rd, rd, 8, ctx);
+out_be:
+ emit_andi(RV_REG_T1, rd, 0xff, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+
+ emit_mv(rd, RV_REG_T2, ctx);
+}
+
#endif /* __riscv_xlen == 64 */
-void bpf_jit_build_prologue(struct rv_jit_context *ctx);
+void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog);
void bpf_jit_build_epilogue(struct rv_jit_context *ctx);
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index 529a83b85c1c..f5ba73bb153d 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -1301,7 +1301,7 @@ notsupported:
return 0;
}
-void bpf_jit_build_prologue(struct rv_jit_context *ctx)
+void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
{
const s8 *fp = bpf2rv32[BPF_REG_FP];
const s8 *r1 = bpf2rv32[BPF_REG_1];
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 719a97e7edb2..aac190085472 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -11,6 +11,7 @@
#include <linux/memory.h>
#include <linux/stop_machine.h>
#include <asm/patch.h>
+#include <asm/cfi.h>
#include "bpf_jit.h"
#define RV_FENTRY_NINSNS 2
@@ -141,6 +142,19 @@ static bool in_auipc_jalr_range(s64 val)
val < ((1L << 31) - (1L << 11));
}
+/* Modify rd pointer to alternate reg to avoid corrupting original reg */
+static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx)
+{
+ emit_sextw(ra, *rd, ctx);
+ *rd = ra;
+}
+
+static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx)
+{
+ emit_zextw(ra, *rd, ctx);
+ *rd = ra;
+}
+
/* Emit fixed-length instructions for address */
static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
{
@@ -326,12 +340,6 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
}
-static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
-{
- emit_slli(reg, reg, 32, ctx);
- emit_srli(reg, reg, 32, ctx);
-}
-
static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
{
int tc_ninsn, off, start_insn = ctx->ninsns;
@@ -346,7 +354,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
*/
tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] :
ctx->offset[0];
- emit_zext_32(RV_REG_A2, ctx);
+ emit_zextw(RV_REG_A2, RV_REG_A2, ctx);
off = offsetof(struct bpf_array, map.max_entries);
if (is_12b_check(off, insn))
@@ -405,38 +413,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
*rs = bpf_to_rv_reg(insn->src_reg, ctx);
}
-static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
-{
- emit_mv(RV_REG_T2, *rd, ctx);
- emit_zext_32(RV_REG_T2, ctx);
- emit_mv(RV_REG_T1, *rs, ctx);
- emit_zext_32(RV_REG_T1, ctx);
- *rd = RV_REG_T2;
- *rs = RV_REG_T1;
-}
-
-static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
-{
- emit_addiw(RV_REG_T2, *rd, 0, ctx);
- emit_addiw(RV_REG_T1, *rs, 0, ctx);
- *rd = RV_REG_T2;
- *rs = RV_REG_T1;
-}
-
-static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
-{
- emit_mv(RV_REG_T2, *rd, ctx);
- emit_zext_32(RV_REG_T2, ctx);
- emit_zext_32(RV_REG_T1, ctx);
- *rd = RV_REG_T2;
-}
-
-static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
-{
- emit_addiw(RV_REG_T2, *rd, 0, ctx);
- *rd = RV_REG_T2;
-}
-
static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr,
struct rv_jit_context *ctx)
{
@@ -480,6 +456,12 @@ static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx);
}
+static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
+{
+ if (IS_ENABLED(CONFIG_CFI_CLANG))
+ emit(hash, ctx);
+}
+
static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
struct rv_jit_context *ctx)
{
@@ -519,32 +501,32 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) :
rv_amoadd_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
case BPF_AND | BPF_FETCH:
emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) :
rv_amoand_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
case BPF_OR | BPF_FETCH:
emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) :
rv_amoor_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
case BPF_XOR | BPF_FETCH:
emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) :
rv_amoxor_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
/* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
case BPF_XCHG:
emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) :
rv_amoswap_w(rs, rs, rd, 0, 0), ctx);
if (!is64)
- emit_zext_32(rs, ctx);
+ emit_zextw(rs, rs, ctx);
break;
/* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
case BPF_CMPXCHG:
@@ -894,6 +876,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
} else {
+ /* emit kcfi hash */
+ emit_kcfi(cfi_get_func_hash(func_addr), ctx);
/* For the trampoline called directly, just handle
* the frame of trampoline.
*/
@@ -1091,7 +1075,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_MOV | BPF_X:
if (imm == 1) {
/* Special mov32 for zext */
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
}
switch (insn->off) {
@@ -1099,16 +1083,17 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_mv(rd, rs, ctx);
break;
case 8:
+ emit_sextb(rd, rs, ctx);
+ break;
case 16:
- emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx);
- emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx);
+ emit_sexth(rd, rs, ctx);
break;
case 32:
- emit_addiw(rd, rs, 0, ctx);
+ emit_sextw(rd, rs, ctx);
break;
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = dst OP src */
@@ -1116,7 +1101,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_ADD | BPF_X:
emit_add(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_X:
case BPF_ALU64 | BPF_SUB | BPF_X:
@@ -1126,31 +1111,31 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
emit_subw(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_X:
case BPF_ALU64 | BPF_AND | BPF_X:
emit_and(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_X:
case BPF_ALU64 | BPF_OR | BPF_X:
emit_or(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_X:
case BPF_ALU64 | BPF_XOR | BPF_X:
emit_xor(rd, rd, rs, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_X:
emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1159,7 +1144,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
else
emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
@@ -1168,25 +1153,25 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
else
emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_RSH | BPF_X:
emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X:
emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = -dst */
@@ -1194,73 +1179,27 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_ALU64 | BPF_NEG:
emit_sub(rd, RV_REG_ZERO, rd, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = BSWAP##imm(dst) */
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16:
- emit_slli(rd, rd, 48, ctx);
- emit_srli(rd, rd, 48, ctx);
+ emit_zexth(rd, rd, ctx);
break;
case 32:
if (!aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case 64:
/* Do nothing */
break;
}
break;
-
case BPF_ALU | BPF_END | BPF_FROM_BE:
case BPF_ALU64 | BPF_END | BPF_FROM_LE:
- emit_li(RV_REG_T2, 0, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
- if (imm == 16)
- goto out_be;
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
- if (imm == 32)
- goto out_be;
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
- emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
- emit_srli(rd, rd, 8, ctx);
-out_be:
- emit_andi(RV_REG_T1, rd, 0xff, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
-
- emit_mv(rd, RV_REG_T2, ctx);
+ emit_bswap(rd, imm, ctx);
break;
/* dst = imm */
@@ -1268,7 +1207,7 @@ out_be:
case BPF_ALU64 | BPF_MOV | BPF_K:
emit_imm(rd, imm, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* dst = dst OP imm */
@@ -1281,7 +1220,7 @@ out_be:
emit_add(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_SUB | BPF_K:
case BPF_ALU64 | BPF_SUB | BPF_K:
@@ -1292,7 +1231,7 @@ out_be:
emit_sub(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_AND | BPF_K:
case BPF_ALU64 | BPF_AND | BPF_K:
@@ -1303,7 +1242,7 @@ out_be:
emit_and(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_OR | BPF_K:
case BPF_ALU64 | BPF_OR | BPF_K:
@@ -1314,7 +1253,7 @@ out_be:
emit_or(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_XOR | BPF_K:
case BPF_ALU64 | BPF_XOR | BPF_K:
@@ -1325,7 +1264,7 @@ out_be:
emit_xor(rd, rd, RV_REG_T1, ctx);
}
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_K:
@@ -1333,7 +1272,7 @@ out_be:
emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
rv_mulw(rd, rd, RV_REG_T1), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_K:
case BPF_ALU64 | BPF_DIV | BPF_K:
@@ -1345,7 +1284,7 @@ out_be:
emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
rv_divuw(rd, rd, RV_REG_T1), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_K:
@@ -1357,14 +1296,14 @@ out_be:
emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
rv_remuw(rd, rd, RV_REG_T1), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K:
emit_slli(rd, rd, imm, ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU64 | BPF_RSH | BPF_K:
@@ -1374,7 +1313,7 @@ out_be:
emit(rv_srliw(rd, rd, imm), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU64 | BPF_ARSH | BPF_K:
@@ -1384,7 +1323,7 @@ out_be:
emit(rv_sraiw(rd, rd, imm), ctx);
if (!is64 && !aux->verifier_zext)
- emit_zext_32(rd, ctx);
+ emit_zextw(rd, rd, ctx);
break;
/* JUMP off */
@@ -1425,10 +1364,13 @@ out_be:
rvoff = rv_offset(i, off, ctx);
if (!is64) {
s = ctx->ninsns;
- if (is_signed_bpf_cond(BPF_OP(code)))
- emit_sext_32_rd_rs(&rd, &rs, ctx);
- else
- emit_zext_32_rd_rs(&rd, &rs, ctx);
+ if (is_signed_bpf_cond(BPF_OP(code))) {
+ emit_sextw_alt(&rs, RV_REG_T1, ctx);
+ emit_sextw_alt(&rd, RV_REG_T2, ctx);
+ } else {
+ emit_zextw_alt(&rs, RV_REG_T1, ctx);
+ emit_zextw_alt(&rd, RV_REG_T2, ctx);
+ }
e = ctx->ninsns;
/* Adjust for extra insns */
@@ -1439,8 +1381,7 @@ out_be:
/* Adjust for and */
rvoff -= 4;
emit_and(RV_REG_T1, rd, rs, ctx);
- emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
- ctx);
+ emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
} else {
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
}
@@ -1469,18 +1410,18 @@ out_be:
case BPF_JMP32 | BPF_JSLE | BPF_K:
rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns;
- if (imm) {
+ if (imm)
emit_imm(RV_REG_T1, imm, ctx);
- rs = RV_REG_T1;
- } else {
- /* If imm is 0, simply use zero register. */
- rs = RV_REG_ZERO;
- }
+ rs = imm ? RV_REG_T1 : RV_REG_ZERO;
if (!is64) {
- if (is_signed_bpf_cond(BPF_OP(code)))
- emit_sext_32_rd(&rd, ctx);
- else
- emit_zext_32_rd_t1(&rd, ctx);
+ if (is_signed_bpf_cond(BPF_OP(code))) {
+ emit_sextw_alt(&rd, RV_REG_T2, ctx);
+ /* rs has been sign extended */
+ } else {
+ emit_zextw_alt(&rd, RV_REG_T2, ctx);
+ if (imm)
+ emit_zextw(rs, rs, ctx);
+ }
}
e = ctx->ninsns;
@@ -1504,7 +1445,7 @@ out_be:
* as t1 is used only in comparison against zero.
*/
if (!is64 && imm < 0)
- emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
+ emit_sextw(RV_REG_T1, RV_REG_T1, ctx);
e = ctx->ninsns;
rvoff -= ninsns_rvoff(e - s);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
@@ -1779,7 +1720,7 @@ out_be:
return 0;
}
-void bpf_jit_build_prologue(struct rv_jit_context *ctx)
+void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
{
int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
@@ -1808,6 +1749,9 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
store_offset = stack_adjust - 8;
+ /* emit kcfi type preamble immediately before the first insn */
+ emit_kcfi(is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash, ctx);
+
/* nops reserved for auipc+jalr pair */
for (i = 0; i < RV_FENTRY_NINSNS; i++)
emit(rv_nop(), ctx);
@@ -1874,3 +1818,8 @@ bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+ return true;
+}
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 7b70ccb7fec3..6b3acac30c06 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -10,6 +10,7 @@
#include <linux/filter.h>
#include <linux/memory.h>
#include <asm/patch.h>
+#include <asm/cfi.h>
#include "bpf_jit.h"
/* Number of iterations to try until offsets converge. */
@@ -100,7 +101,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
pass++;
ctx->ninsns = 0;
- bpf_jit_build_prologue(ctx);
+ bpf_jit_build_prologue(ctx, bpf_is_subprog(prog));
ctx->prologue_len = ctx->ninsns;
if (build_body(ctx, extra_pass, ctx->offset)) {
@@ -160,7 +161,7 @@ skip_init_ctx:
ctx->ninsns = 0;
ctx->nexentries = 0;
- bpf_jit_build_prologue(ctx);
+ bpf_jit_build_prologue(ctx, bpf_is_subprog(prog));
if (build_body(ctx, extra_pass, NULL)) {
prog = orig_prog;
goto out_free_hdr;
@@ -170,9 +171,9 @@ skip_init_ctx:
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
- prog->bpf_func = (void *)ctx->ro_insns;
+ prog->bpf_func = (void *)ctx->ro_insns + cfi_get_offset();
prog->jited = 1;
- prog->jited_len = prog_size;
+ prog->jited_len = prog_size - cfi_get_offset();
if (!prog->is_func || extra_pass) {
if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f3b4716317c1..a7ba8e178645 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
+#define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -139,6 +140,7 @@ static const int reg2hex[] = {
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
[X86_REG_R9] = 1, /* R9 register, 6th function argument */
+ [X86_REG_R12] = 4, /* R12 callee saved */
};
static const int reg2pt_regs[] = {
@@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
BIT(X86_REG_R9) |
+ BIT(X86_REG_R12) |
BIT(BPF_REG_AX));
}
@@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
return byte;
}
+static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index)
+{
+ if (is_ereg(r1))
+ byte |= 1;
+ if (is_ereg(index))
+ byte |= 2;
+ if (is_ereg(r2))
+ byte |= 4;
+ return byte;
+}
+
/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
static u8 add_1reg(u8 byte, u32 dst_reg)
{
@@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
+ pop_r12(&prog);
}
EMIT1(0x58); /* pop rax */
@@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
+ pop_r12(&prog);
}
EMIT1(0x58); /* pop rax */
@@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
*pprog = prog;
}
+static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ if (is_imm8(off)) {
+ EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+ } else {
+ EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+ }
+ *pprog = prog;
+}
+
/*
* Emit a REX byte if it will be necessary to address these registers
*/
@@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* movzx rax, byte ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* movzx rax, word ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* mov eax, dword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B);
+ break;
+ case BPF_DW:
+ /* mov rax, qword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+ *pprog = prog;
+}
+
+static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
+/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
+static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* mov byte ptr [rax + r12 + off], al */
+ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88);
+ break;
+ case BPF_H:
+ /* mov word ptr [rax + r12 + off], ax */
+ EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ case BPF_W:
+ /* mov dword ptr [rax + r12 + 1], eax */
+ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ case BPF_DW:
+ /* mov qword ptr [rax + r12 + 1], rax */
+ EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off);
+ *pprog = prog;
+}
+
+static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
+/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
+static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* mov byte ptr [rax + r12 + off], imm8 */
+ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6);
+ break;
+ case BPF_H:
+ /* mov word ptr [rax + r12 + off], imm16 */
+ EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+ break;
+ case BPF_W:
+ /* mov dword ptr [rax + r12 + 1], imm32 */
+ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+ break;
+ case BPF_DW:
+ /* mov qword ptr [rax + r12 + 1], imm32 */
+ EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off);
+ EMIT(imm, bpf_size_to_x86_bytes(size));
+ *pprog = prog;
+}
+
+static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
+{
+ emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
+}
+
static int emit_atomic(u8 **pprog, u8 atomic_op,
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
@@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
+#define DONT_CLEAR 1
+
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
/* jump over faulting load and clear dest register */
- *(unsigned long *)((void *)regs + reg) = 0;
+ if (reg != DONT_CLEAR)
+ *(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
return true;
}
@@ -1147,11 +1276,15 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ u64 arena_vm_start, user_vm_start;
int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
int err;
+ arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
+ user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
+
detect_reg_usage(insn, insn_cnt, callee_regs_used,
&tail_call_seen);
@@ -1172,8 +1305,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
push_r12(&prog);
push_callee_regs(&prog, all_callee_regs_used);
} else {
+ if (arena_vm_start)
+ push_r12(&prog);
push_callee_regs(&prog, callee_regs_used);
}
+ if (arena_vm_start)
+ emit_mov_imm64(&prog, X86_REG_R12,
+ arena_vm_start >> 32, (u32) arena_vm_start);
ilen = prog - temp;
if (rw_image)
@@ -1213,6 +1351,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
+ if (insn->off == BPF_ADDR_SPACE_CAST &&
+ insn->imm == 1U << 16) {
+ if (dst_reg != src_reg)
+ /* 32-bit mov */
+ emit_mov_reg(&prog, false, dst_reg, src_reg);
+ /* shl dst_reg, 32 */
+ maybe_emit_1mod(&prog, dst_reg, true);
+ EMIT3(0xC1, add_1reg(0xE0, dst_reg), 32);
+
+ /* or dst_reg, user_vm_start */
+ maybe_emit_1mod(&prog, dst_reg, true);
+ if (is_axreg(dst_reg))
+ EMIT1_off32(0x0D, user_vm_start >> 32);
+ else
+ EMIT2_off32(0x81, add_1reg(0xC8, dst_reg), user_vm_start >> 32);
+
+ /* rol dst_reg, 32 */
+ maybe_emit_1mod(&prog, dst_reg, true);
+ EMIT3(0xC1, add_1reg(0xC0, dst_reg), 32);
+
+ /* xor r11, r11 */
+ EMIT3(0x4D, 0x31, 0xDB);
+
+ /* test dst_reg32, dst_reg32; check if lower 32-bit are zero */
+ maybe_emit_mod(&prog, dst_reg, dst_reg, false);
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+
+ /* cmove r11, dst_reg; if so, set dst_reg to zero */
+ /* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */
+ maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
+ EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
+ break;
+ }
+ fallthrough;
case BPF_ALU | BPF_MOV | BPF_X:
if (insn->off == 0)
emit_mov_reg(&prog,
@@ -1564,6 +1736,56 @@ st: if (is_imm8(insn->off))
emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+ start_of_ldx = prog;
+ emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm);
+ goto populate_extable;
+
+ /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ start_of_ldx = prog;
+ if (BPF_CLASS(insn->code) == BPF_LDX)
+ emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ else
+ emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+populate_extable:
+ {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen + (start_of_ldx - temp);
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("mem32 extable bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ /* switch ex to rw buffer for writes */
+ ex = (void *)rw_image + ((void *)ex - (void *)image);
+
+ ex->insn = delta;
+
+ ex->data = EX_TYPE_BPF;
+
+ ex->fixup = (prog - start_of_ldx) |
+ ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+ }
+ break;
+
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
@@ -2036,6 +2258,8 @@ emit_jmp:
pop_r12(&prog);
} else {
pop_callee_regs(&prog, callee_regs_used);
+ if (arena_vm_start)
+ pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
@@ -3242,3 +3466,13 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
BUG_ON(ret < 0);
}
}
+
+bool bpf_jit_supports_arena(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+ return true;
+}