summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/checklist.rst55
-rw-r--r--Documentation/bpf/index.rst13
-rw-r--r--Documentation/bpf/libbpf/libbpf.rst14
-rw-r--r--Documentation/bpf/libbpf/libbpf_api.rst27
-rw-r--r--Documentation/bpf/libbpf/libbpf_build.rst37
-rw-r--r--Documentation/bpf/libbpf/libbpf_naming_convention.rst (renamed from tools/lib/bpf/README.rst)30
-rw-r--r--Documentation/networking/af_xdp.rst32
-rw-r--r--arch/x86/net/bpf_jit_comp.c46
-rw-r--r--drivers/media/rc/bpf-lirc.c3
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c8
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c3
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c7
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c2
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c2
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c6
-rw-r--r--drivers/net/ethernet/sfc/rx.c9
-rw-r--r--drivers/net/ethernet/socionext/netsec.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c10
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c10
-rw-r--r--include/linux/filter.h8
-rw-r--r--include/linux/rcupdate.h14
-rw-r--r--include/net/xdp_sock.h2
-rw-r--r--kernel/bpf/cpumap.c13
-rw-r--r--kernel/bpf/devmap.c49
-rw-r--r--kernel/bpf/hashtab.c21
-rw-r--r--kernel/bpf/helpers.c6
-rw-r--r--kernel/bpf/lpm_trie.c6
-rw-r--r--kernel/bpf/ringbuf.c2
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--net/bpfilter/main.c2
-rw-r--r--net/core/filter.c72
-rw-r--r--net/core/xdp.c11
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/cls_bpf.c3
-rw-r--r--net/xdp/xsk.c4
-rw-r--r--net/xdp/xsk.h4
-rw-r--r--net/xdp/xskmap.c29
-rw-r--r--samples/bpf/xdp_redirect_user.c4
-rw-r--r--tools/lib/bpf/libbpf.c4
-rw-r--r--tools/lib/bpf/netlink.c115
-rw-r--r--tools/lib/bpf/nlattr.c2
-rw-r--r--tools/lib/bpf/nlattr.h38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c2
56 files changed, 394 insertions, 380 deletions
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 1030119294d0..01cc21f17f7b 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -211,27 +211,40 @@ over a rather long period of time, but improvements are always welcome!
of the system, especially to real-time workloads running on
the rest of the system.
-7. As of v4.20, a given kernel implements only one RCU flavor,
- which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
- If the updater uses call_rcu() or synchronize_rcu(),
- then the corresponding readers may use rcu_read_lock() and
- rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
- or any pair of primitives that disables and re-enables preemption,
- for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
- If the updater uses synchronize_srcu() or call_srcu(),
- then the corresponding readers must use srcu_read_lock() and
- srcu_read_unlock(), and with the same srcu_struct. The rules for
- the expedited primitives are the same as for their non-expedited
- counterparts. Mixing things up will result in confusion and
- broken kernels, and has even resulted in an exploitable security
- issue.
-
- One exception to this rule: rcu_read_lock() and rcu_read_unlock()
- may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
- in cases where local bottom halves are already known to be
- disabled, for example, in irq or softirq context. Commenting
- such cases is a must, of course! And the jury is still out on
- whether the increased speed is worth it.
+7. As of v4.20, a given kernel implements only one RCU flavor, which
+ is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
+ If the updater uses call_rcu() or synchronize_rcu(), then
+ the corresponding readers may use: (1) rcu_read_lock() and
+ rcu_read_unlock(), (2) any pair of primitives that disables
+ and re-enables softirq, for example, rcu_read_lock_bh() and
+ rcu_read_unlock_bh(), or (3) any pair of primitives that disables
+ and re-enables preemption, for example, rcu_read_lock_sched() and
+ rcu_read_unlock_sched(). If the updater uses synchronize_srcu()
+ or call_srcu(), then the corresponding readers must use
+ srcu_read_lock() and srcu_read_unlock(), and with the same
+ srcu_struct. The rules for the expedited RCU grace-period-wait
+ primitives are the same as for their non-expedited counterparts.
+
+ If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(),
+ then the readers must refrain from executing voluntary
+ context switches, that is, from blocking. If the updater uses
+ call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then
+ the corresponding readers must use rcu_read_lock_trace() and
+ rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude()
+ or synchronize_rcu_tasks_rude(), then the corresponding readers
+ must use anything that disables interrupts.
+
+ Mixing things up will result in confusion and broken kernels, and
+ has even resulted in an exploitable security issue. Therefore,
+ when using non-obvious pairs of primitives, commenting is
+ of course a must. One example of non-obvious pairing is
+ the XDP feature in networking, which calls BPF programs from
+ network-driver NAPI (softirq) context. BPF relies heavily on RCU
+ protection for its data structures, but because the BPF program
+ invocation happens entirely within a single local_bh_disable()
+ section in a NAPI poll cycle, this usage is safe. The reason
+ that this usage is safe is that readers can use anything that
+ disables BH when updaters use call_rcu() or synchronize_rcu().
8. Although synchronize_rcu() is slower than is call_rcu(), it
usually results in simpler code. So, unless update performance is
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 93e8cf12a6d4..baea6c2abba5 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -12,6 +12,19 @@ BPF instruction-set.
The Cilium project also maintains a `BPF and XDP Reference Guide`_
that goes into great technical depth about the BPF Architecture.
+libbpf
+======
+
+Libbpf is a userspace library for loading and interacting with bpf programs.
+
+.. toctree::
+ :maxdepth: 1
+
+ libbpf/libbpf
+ libbpf/libbpf_api
+ libbpf/libbpf_build
+ libbpf/libbpf_naming_convention
+
BPF Type Format (BTF)
=====================
diff --git a/Documentation/bpf/libbpf/libbpf.rst b/Documentation/bpf/libbpf/libbpf.rst
new file mode 100644
index 000000000000..1b1e61d5ead1
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+libbpf
+======
+
+This is documentation for libbpf, a userspace library for loading and
+interacting with bpf programs.
+
+All general BPF questions, including kernel functionality, libbpf APIs and
+their application, should be sent to bpf@vger.kernel.org mailing list.
+You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
+mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
+Please search the archive before asking new questions. It very well might
+be that this was already addressed or answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_api.rst b/Documentation/bpf/libbpf/libbpf_api.rst
new file mode 100644
index 000000000000..f07eecd054da
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_api.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+API
+===
+
+This documentation is autogenerated from header files in libbpf, tools/lib/bpf
+
+.. kernel-doc:: tools/lib/bpf/libbpf.h
+ :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf.h
+ :internal:
+
+.. kernel-doc:: tools/lib/bpf/btf.h
+ :internal:
+
+.. kernel-doc:: tools/lib/bpf/xsk.h
+ :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf_tracing.h
+ :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf_core_read.h
+ :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf_endian.h
+ :internal: \ No newline at end of file
diff --git a/Documentation/bpf/libbpf/libbpf_build.rst b/Documentation/bpf/libbpf/libbpf_build.rst
new file mode 100644
index 000000000000..8e8c23e8093d
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_build.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+Building libbpf
+===============
+
+libelf and zlib are internal dependencies of libbpf and thus are required to link
+against and must be installed on the system for applications to work.
+pkg-config is used by default to find libelf, and the program called
+can be overridden with PKG_CONFIG.
+
+If using pkg-config at build time is not desired, it can be disabled by
+setting NO_PKG_CONFIG=1 when calling make.
+
+To build both static libbpf.a and shared libbpf.so:
+
+.. code-block:: bash
+
+ $ cd src
+ $ make
+
+To build only static libbpf.a library in directory build/ and install them
+together with libbpf headers in a staging directory root/:
+
+.. code-block:: bash
+
+ $ cd src
+ $ mkdir build root
+ $ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install
+
+To build both static libbpf.a and shared libbpf.so against a custom libelf
+dependency installed in /build/root/ and install them together with libbpf
+headers in a build directory /build/root/:
+
+.. code-block:: bash
+
+ $ cd src
+ $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make \ No newline at end of file
diff --git a/tools/lib/bpf/README.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
index 8928f7787f2d..3de1d51e41da 100644
--- a/tools/lib/bpf/README.rst
+++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-libbpf API naming convention
-============================
+API naming convention
+=====================
libbpf API provides access to a few logically separated groups of
functions and types. Every group has its own naming convention
@@ -10,14 +10,14 @@ new function or type is added to keep libbpf API clean and consistent.
All types and functions provided by libbpf API should have one of the
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``perf_buffer_``.
+``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
System call wrappers
--------------------
System call wrappers are simple wrappers for commands supported by
sys_bpf system call. These wrappers should go to ``bpf.h`` header file
-and map one-on-one to corresponding commands.
+and map one to one to corresponding commands.
For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
@@ -49,10 +49,6 @@ object, ``bpf_object``, double underscore and ``open`` that defines the
purpose of the function to open ELF file and create ``bpf_object`` from
it.
-Another example: ``bpf_program__load`` is named for corresponding
-object, ``bpf_program``, that is separated from other part of the name
-by double underscore.
-
All objects and corresponding functions other than BTF related should go
to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
@@ -72,11 +68,7 @@ of both low-level ring access functions and high-level configuration
functions. These can be mixed and matched. Note that these functions
are not reentrant for performance reasons.
-Please take a look at Documentation/networking/af_xdp.rst in the Linux
-kernel source tree on how to use XDP sockets and for some common
-mistakes in case you do not get any traffic up to user space.
-
-libbpf ABI
+ABI
==========
libbpf can be both linked statically or used as DSO. To avoid possible
@@ -116,7 +108,8 @@ This bump in ABI version is at most once per kernel development cycle.
For example, if current state of ``libbpf.map`` is:
-.. code-block::
+.. code-block:: c
+
LIBBPF_0.0.1 {
global:
bpf_func_a;
@@ -128,7 +121,8 @@ For example, if current state of ``libbpf.map`` is:
, and a new symbol ``bpf_func_c`` is being introduced, then
``libbpf.map`` should be changed like this:
-.. code-block::
+.. code-block:: c
+
LIBBPF_0.0.1 {
global:
bpf_func_a;
@@ -148,7 +142,7 @@ Format of version script and ways to handle ABI changes, including
incompatible ones, described in details in [1].
Stand-alone build
-=================
+-------------------
Under https://github.com/libbpf/libbpf there is a (semi-)automated
mirror of the mainline's version of libbpf for a stand-alone build.
@@ -157,12 +151,12 @@ However, all changes to libbpf's code base must be upstreamed through
the mainline kernel tree.
License
-=======
+-------------------
libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
Links
-=====
+-------------------
[1] https://www.akkadia.org/drepper/dsohowto.pdf
(Chapter 3. Maintaining APIs and ABIs).
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 2ccc5644cc98..42576880aa4a 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -290,19 +290,19 @@ round-robin example of distributing packets is shown below:
#define MAX_SOCKS 16
struct {
- __uint(type, BPF_MAP_TYPE_XSKMAP);
- __uint(max_entries, MAX_SOCKS);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
} xsks_map SEC(".maps");
static unsigned int rr;
SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
{
- rr = (rr + 1) & (MAX_SOCKS - 1);
+ rr = (rr + 1) & (MAX_SOCKS - 1);
- return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
+ return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
}
Note, that since there is only a single set of FILL and COMPLETION
@@ -379,7 +379,7 @@ would look like this for the TX path:
.. code-block:: c
if (xsk_ring_prod__needs_wakeup(&my_tx_ring))
- sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0);
I.e., only use the syscall if the flag is set.
@@ -442,9 +442,9 @@ purposes. The supported statistics are shown below:
.. code-block:: c
struct xdp_statistics {
- __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
- __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
- __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
};
XDP_OPTIONS getsockopt
@@ -483,15 +483,15 @@ like this:
.. code-block:: c
// struct xdp_rxtx_ring {
- // __u32 *producer;
- // __u32 *consumer;
- // struct xdp_desc *desc;
+ // __u32 *producer;
+ // __u32 *consumer;
+ // struct xdp_desc *desc;
// };
// struct xdp_umem_ring {
- // __u32 *producer;
- // __u32 *consumer;
- // __u64 *desc;
+ // __u32 *producer;
+ // __u32 *consumer;
+ // __u64 *desc;
// };
// typedef struct xdp_rxtx_ring RING;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2a2e290fa5d8..e835164189f1 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -31,7 +31,7 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
}
#define EMIT(bytes, len) \
- do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
+ do { prog = emit_code(prog, bytes, len); } while (0)
#define EMIT1(b1) EMIT(b1, 1)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
@@ -239,7 +239,6 @@ struct jit_context {
static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
{
u8 *prog = *pprog;
- int cnt = 0;
if (callee_regs_used[0])
EMIT1(0x53); /* push rbx */
@@ -255,7 +254,6 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
{
u8 *prog = *pprog;
- int cnt = 0;
if (callee_regs_used[3])
EMIT2(0x41, 0x5F); /* pop r15 */
@@ -277,13 +275,12 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog)
{
u8 *prog = *pprog;
- int cnt = X86_PATCH_SIZE;
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
- memcpy(prog, x86_nops[5], cnt);
- prog += cnt;
+ memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
if (!ebpf_from_cbpf) {
if (tail_call_reachable && !is_subprog)
EMIT2(0x31, 0xC0); /* xor eax, eax */
@@ -303,7 +300,6 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
{
u8 *prog = *pprog;
- int cnt = 0;
s64 offset;
offset = func - (ip + X86_PATCH_SIZE);
@@ -423,7 +419,6 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
int off1 = 42;
int off2 = 31;
int off3 = 9;
- int cnt = 0;
/* count the additional bytes used for popping callee regs from stack
* that need to be taken into account for each of the offsets that
@@ -513,7 +508,6 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
int pop_bytes = 0;
int off1 = 20;
int poke_off;
- int cnt = 0;
/* count the additional bytes used for popping callee regs to stack
* that need to be taken into account for jump offset that is used for
@@ -615,7 +609,6 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
{
u8 *prog = *pprog;
u8 b1, b2, b3;
- int cnt = 0;
/*
* Optimization: if imm32 is positive, use 'mov %eax, imm32'
@@ -655,7 +648,6 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
const u32 imm32_hi, const u32 imm32_lo)
{
u8 *prog = *pprog;
- int cnt = 0;
if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
/*
@@ -678,7 +670,6 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
{
u8 *prog = *pprog;
- int cnt = 0;
if (is64) {
/* mov dst, src */
@@ -697,7 +688,6 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
{
u8 *prog = *pprog;
- int cnt = 0;
if (is_imm8(off)) {
/* 1-byte signed displacement.
@@ -720,7 +710,6 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
{
u8 *prog = *pprog;
- int cnt = 0;
if (is64)
EMIT1(add_2mod(0x48, dst_reg, src_reg));
@@ -733,7 +722,6 @@ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
u8 *prog = *pprog;
- int cnt = 0;
switch (size) {
case BPF_B:
@@ -764,7 +752,6 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
u8 *prog = *pprog;
- int cnt = 0;
switch (size) {
case BPF_B:
@@ -799,7 +786,6 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
u8 *prog = *pprog;
- int cnt = 0;
EMIT1(0xF0); /* lock prefix */
@@ -869,10 +855,10 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
}
}
-static int emit_nops(u8 **pprog, int len)
+static void emit_nops(u8 **pprog, int len)
{
u8 *prog = *pprog;
- int i, noplen, cnt = 0;
+ int i, noplen;
while (len > 0) {
noplen = len;
@@ -886,8 +872,6 @@ static int emit_nops(u8 **pprog, int len)
}
*pprog = prog;
-
- return cnt;
}
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
@@ -902,7 +886,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, cnt = 0, excnt = 0;
+ int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
int err;
@@ -1297,7 +1281,7 @@ st: if (is_imm8(insn->off))
emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
struct exception_table_entry *ex;
- u8 *_insn = image + proglen;
+ u8 *_insn = image + proglen + (start_of_ldx - temp);
s64 delta;
/* populate jmp_offset for JMP above */
@@ -1576,7 +1560,7 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
nops);
return -EFAULT;
}
- cnt += emit_nops(&prog, nops);
+ emit_nops(&prog, nops);
}
EMIT2(jmp_cond, jmp_offset);
} else if (is_simm32(jmp_offset)) {
@@ -1622,7 +1606,7 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
nops);
return -EFAULT;
}
- cnt += emit_nops(&prog, nops);
+ emit_nops(&prog, nops);
}
break;
}
@@ -1647,7 +1631,7 @@ emit_jmp:
nops);
return -EFAULT;
}
- cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
+ emit_nops(&prog, INSN_SZ_DIFF - 2);
}
EMIT2(0xEB, jmp_offset);
} else if (is_simm32(jmp_offset)) {
@@ -1754,7 +1738,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
{
u8 *prog = *pprog;
u8 *jmp_insn;
- int cnt = 0;
/* arg1: mov rdi, progs[i] */
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
@@ -1822,7 +1805,6 @@ static void emit_align(u8 **pprog, u32 align)
static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
{
u8 *prog = *pprog;
- int cnt = 0;
s64 offset;
offset = func - (ip + 2 + 4);
@@ -1854,7 +1836,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
u8 **branches)
{
u8 *prog = *pprog;
- int i, cnt = 0;
+ int i;
/* The first fmod_ret program will receive a garbage return value.
* Set this to 0 to avoid confusing the program.
@@ -1950,7 +1932,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
struct bpf_tramp_progs *tprogs,
void *orig_call)
{
- int ret, i, cnt = 0, nr_args = m->nr_args;
+ int ret, i, nr_args = m->nr_args;
int stack_size = nr_args * 8;
struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
@@ -2095,8 +2077,6 @@ static int emit_fallback_jump(u8 **pprog)
*/
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
#else
- int cnt = 0;
-
EMIT2(0xFF, 0xE2); /* jmp rdx */
#endif
*pprog = prog;
@@ -2106,7 +2086,7 @@ static int emit_fallback_jump(u8 **pprog)
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *prog = *pprog;
- int pivot, err, jg_bytes = 1, cnt = 0;
+ int pivot, err, jg_bytes = 1;
s64 jg_offset;
if (a == b) {
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 3fe3edd80876..afae0afe3f81 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -326,7 +326,8 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
}
if (attr->query.prog_cnt != 0 && prog_ids && cnt)
- ret = bpf_prog_array_copy_to_user(progs, prog_ids, cnt);
+ ret = bpf_prog_array_copy_to_user(progs, prog_ids,
+ attr->query.prog_cnt);
unlock:
mutex_unlock(&ir_raw_handler_lock);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index edaf37823c50..0e43000614ab 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -384,7 +384,6 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
struct xdp_frame *xdpf;
u64 *xdp_stat;
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
if (!xdp_prog)
@@ -441,8 +440,6 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
out:
- rcu_read_unlock();
-
return verdict;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index ec9564e584e0..bee6e091a997 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -138,9 +138,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
xdp_prepare_buff(&xdp, *data_ptr - offset, offset, *len, false);
orig_data = xdp.data;
- rcu_read_lock();
act = bpf_prog_run_xdp(xdp_prog, &xdp);
- rcu_read_unlock();
tx_avail = bnxt_tx_avail(bp, txr);
/* If the tx ring is not full, we must not update the rx producer yet
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index c33b4e837515..e2b290135fd9 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -555,9 +555,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false);
orig_data = xdp.data;
- rcu_read_lock();
action = bpf_prog_run_xdp(prog, &xdp);
- rcu_read_unlock();
len = xdp.data_end - xdp.data;
/* Check if XDP program has changed headers */
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 177c020bf34a..e6826561cf11 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2558,13 +2558,9 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr,
u32 xdp_act;
int err;
- rcu_read_lock();
-
xdp_prog = READ_ONCE(priv->xdp_prog);
- if (!xdp_prog) {
- rcu_read_unlock();
+ if (!xdp_prog)
return XDP_PASS;
- }
xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE,
&dpaa_fq->xdp_rxq);
@@ -2638,8 +2634,6 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr,
break;
}
- rcu_read_unlock();
-
return xdp_act;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 8433aa730c42..973352393bd4 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -352,8 +352,6 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
u32 xdp_act = XDP_PASS;
int err, offset;
- rcu_read_lock();
-
xdp_prog = READ_ONCE(ch->xdp.prog);
if (!xdp_prog)
goto out;
@@ -414,7 +412,6 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
ch->xdp.res |= xdp_act;
out:
- rcu_read_unlock();
return xdp_act;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b883ab809df3..38eb8151ee9a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2298,7 +2298,6 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
struct bpf_prog *xdp_prog;
u32 act;
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
@@ -2334,7 +2333,6 @@ out_failure:
break;
}
xdp_out:
- rcu_read_unlock();
return result;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 68f177a86403..e7e778ca074c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -153,7 +153,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
struct bpf_prog *xdp_prog;
u32 act;
- rcu_read_lock();
/* NB! xdp_prog will always be !NULL, due to the fact that
* this path is enabled by setting an XDP program.
*/
@@ -164,7 +163,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
if (err)
goto out_failure;
- rcu_read_unlock();
return I40E_XDP_REDIR;
}
@@ -188,7 +186,6 @@ out_failure:
result = I40E_XDP_CONSUMED;
break;
}
- rcu_read_unlock();
return result;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index a63d5916ebb0..6ee8e0032d52 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1140,15 +1140,11 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
#endif
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
- if (!xdp_prog) {
- rcu_read_unlock();
+ if (!xdp_prog)
goto construct_skb;
- }
xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
- rcu_read_unlock();
if (!xdp_res)
goto construct_skb;
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 52acbe325db3..5a9f61deeb38 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -466,7 +466,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
struct ice_ring *xdp_ring;
u32 act;
- rcu_read_lock();
/* ZC patch is enabled only when XDP program is set,
* so here it can not be NULL
*/
@@ -478,7 +477,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
if (err)
goto out_failure;
- rcu_read_unlock();
return ICE_XDP_REDIR;
}
@@ -503,7 +501,6 @@ out_failure:
break;
}
- rcu_read_unlock();
return result;
}
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 5db303d64d14..7e6435dc7e80 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8381,7 +8381,6 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
struct bpf_prog *xdp_prog;
u32 act;
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
@@ -8416,7 +8415,6 @@ out_failure:
break;
}
xdp_out:
- rcu_read_unlock();
return ERR_PTR(-result);
}
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 3f6b6d4543a8..95323095094d 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2240,18 +2240,15 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
struct bpf_prog *prog;
int res;
- rcu_read_lock();
-
prog = READ_ONCE(adapter->xdp_prog);
if (!prog) {
res = IGC_XDP_PASS;
- goto unlock;
+ goto out;
}
res = __igc_xdp_run_prog(adapter, prog, xdp);
-unlock:
- rcu_read_unlock();
+out:
return ERR_PTR(-res);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2ac5b82676f3..ffff69efd78a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2199,7 +2199,6 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
struct xdp_frame *xdpf;
u32 act;
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
@@ -2237,7 +2236,6 @@ out_failure:
break;
}
xdp_out:
- rcu_read_unlock();
return ERR_PTR(-result);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index f72d2978263b..96dd1a4f956a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -100,7 +100,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
struct xdp_frame *xdpf;
u32 act;
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -108,7 +107,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
if (err)
goto out_failure;
- rcu_read_unlock();
return IXGBE_XDP_REDIR;
}
@@ -134,7 +132,6 @@ out_failure:
result = IXGBE_XDP_CONSUMED;
break;
}
- rcu_read_unlock();
return result;
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index dc56931fc1dc..c714e1ecd308 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1054,7 +1054,6 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
struct bpf_prog *xdp_prog;
u32 act;
- rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
@@ -1082,7 +1081,6 @@ out_failure:
break;
}
xdp_out:
- rcu_read_unlock();
return ERR_PTR(-result);
}
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 88a755034c39..361bc4fbe20b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2369,7 +2369,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
/* Get number of received packets */
rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
- rcu_read_lock();
xdp_prog = READ_ONCE(pp->xdp_prog);
/* Fairness NAPI loop */
@@ -2447,7 +2446,6 @@ next:
xdp_buf.data_hard_start = NULL;
sinfo.nr_frags = 0;
}
- rcu_read_unlock();
if (xdp_buf.data_hard_start)
mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 3135220a8942..3229bafa2a2c 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3877,8 +3877,6 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
int rx_done = 0;
u32 xdp_ret = 0;
- rcu_read_lock();
-
xdp_prog = READ_ONCE(port->xdp_prog);
/* Get number of received packets and clamp the to-do */
@@ -4024,8 +4022,6 @@ err_drop_frame:
mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
}
- rcu_read_unlock();
-
if (xdp_ret & MVPP2_XDP_REDIR)
xdp_do_flush_map();
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index cea62b8f554c..442991d91c15 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -679,9 +679,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
ring = priv->rx_ring[cq_ring];
- /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
- rcu_read_lock();
- xdp_prog = rcu_dereference(ring->xdp_prog);
+ xdp_prog = rcu_dereference_bh(ring->xdp_prog);
xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq);
doorbell_pending = false;
@@ -744,7 +742,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Drop the packet, since HW loopback-ed it */
mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
bucket = &priv->mac_hash[mac_hash];
- hlist_for_each_entry_rcu(entry, bucket, hlist) {
+ hlist_for_each_entry_rcu_bh(entry, bucket, hlist) {
if (ether_addr_equal_64bits(entry->mac,
ethh->h_source))
goto next;
@@ -899,8 +897,6 @@ next:
break;
}
- rcu_read_unlock();
-
if (likely(polled)) {
if (doorbell_pending) {
priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index eeb30680b4dc..5dfa4799c34f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1819,7 +1819,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
struct xdp_buff xdp;
int idx;
- rcu_read_lock();
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
@@ -2036,7 +2035,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
if (!nfp_net_xdp_complete(tx_ring))
pkts_polled = budget;
}
- rcu_read_unlock();
return pkts_polled;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 8e150dd4f899..065e9004598e 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1089,13 +1089,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
xdp_prepare_buff(&xdp, page_address(bd->data), *data_offset,
*len, false);
- /* Queues always have a full reset currently, so for the time
- * being until there's atomic program replace just mark read
- * side for map helpers.
- */
- rcu_read_lock();
act = bpf_prog_run_xdp(prog, &xdp);
- rcu_read_unlock();
/* Recalculate, as XDP might have changed the headers */
*data_offset = xdp.data - xdp.data_hard_start;
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 17b8119c48e5..606750938b89 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -260,18 +260,14 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
s16 offset;
int err;
- rcu_read_lock();
- xdp_prog = rcu_dereference(efx->xdp_prog);
- if (!xdp_prog) {
- rcu_read_unlock();
+ xdp_prog = rcu_dereference_bh(efx->xdp_prog);
+ if (!xdp_prog)
return true;
- }
rx_queue = efx_channel_get_rx_queue(channel);
if (unlikely(channel->rx_pkt_n_frags > 1)) {
/* We can't do XDP on fragmented packets - drop. */
- rcu_read_unlock();
efx_free_rx_buffers(rx_queue, rx_buf,
channel->rx_pkt_n_frags);
if (net_ratelimit())
@@ -296,7 +292,6 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
rx_buf->len, false);
xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
- rcu_read_unlock();
offset = (u8 *)xdp.data - *ehp;
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index dfc85cc68173..20d148c019d8 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -958,7 +958,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
xdp_init_buff(&xdp, PAGE_SIZE, &dring->xdp_rxq);
- rcu_read_lock();
xdp_prog = READ_ONCE(priv->xdp_prog);
dma_dir = page_pool_get_dma_dir(dring->page_pool);
@@ -1069,8 +1068,6 @@ next:
}
netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
- rcu_read_unlock();
-
return done;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 16820873b01d..219535ab2c0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4651,7 +4651,6 @@ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
return res;
}
-/* This function assumes rcu_read_lock() is held by the caller. */
static int __stmmac_xdp_run_prog(struct stmmac_priv *priv,
struct bpf_prog *prog,
struct xdp_buff *xdp)
@@ -4693,17 +4692,14 @@ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
struct bpf_prog *prog;
int res;
- rcu_read_lock();
-
prog = READ_ONCE(priv->xdp_prog);
if (!prog) {
res = STMMAC_XDP_PASS;
- goto unlock;
+ goto out;
}
res = __stmmac_xdp_run_prog(priv, prog, xdp);
-unlock:
- rcu_read_unlock();
+out:
return ERR_PTR(-res);
}
@@ -4973,10 +4969,8 @@ read_again:
buf->xdp->data_end = buf->xdp->data + buf1_len;
xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool);
- rcu_read_lock();
prog = READ_ONCE(priv->xdp_prog);
res = __stmmac_xdp_run_prog(priv, prog, buf->xdp);
- rcu_read_unlock();
switch (res) {
case STMMAC_XDP_PASS:
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 5862f0a4a975..ecc2a6b7e28f 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1328,13 +1328,9 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
struct bpf_prog *prog;
u32 act;
- rcu_read_lock();
-
prog = READ_ONCE(priv->xdp_prog);
- if (!prog) {
- ret = CPSW_XDP_PASS;
- goto out;
- }
+ if (!prog)
+ return CPSW_XDP_PASS;
act = bpf_prog_run_xdp(prog, xdp);
/* XDP prog might have changed packet data and boundaries */
@@ -1378,10 +1374,8 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
ndev->stats.rx_bytes += *len;
ndev->stats.rx_packets++;
out:
- rcu_read_unlock();
return ret;
drop:
- rcu_read_unlock();
page_pool_recycle_direct(cpsw->page_pool[ch], page);
return ret;
}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 688856e0b28a..472f97074da0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -763,11 +763,9 @@ DECLARE_BPF_DISPATCHER(xdp)
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
- /* Caller needs to hold rcu_read_lock() (!), otherwise program
- * can be released while still running, or map elements could be
- * freed early while still having concurrent users. XDP fastpath
- * already takes rcu_read_lock() when fetching the program, so
- * it's not necessary here anymore.
+ /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
+ * under local_bh_disable(), which provides the needed RCU protection
+ * for accessing map entries.
*/
return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9455476c5ba2..d7895b81264e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -363,6 +363,20 @@ static inline void rcu_preempt_sleep_check(void) { }
#define rcu_check_sparse(p, space)
#endif /* #else #ifdef __CHECKER__ */
+/**
+ * unrcu_pointer - mark a pointer as not being RCU protected
+ * @p: pointer needing to lose its __rcu property
+ *
+ * Converts @p from an __rcu pointer to a __kernel pointer.
+ * This allows an __rcu pointer to be used with xchg() and friends.
+ */
+#define unrcu_pointer(p) \
+({ \
+ typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \
+ rcu_check_sparse(p, __rcu); \
+ ((typeof(*p) __force __kernel *)(_________p1)); \
+})
+
#define __rcu_access_pointer(p, space) \
({ \
typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9c0722c6d7ac..fff069d2ed1b 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -37,7 +37,7 @@ struct xdp_umem {
struct xsk_map {
struct bpf_map map;
spinlock_t lock; /* Synchronize map updates */
- struct xdp_sock *xsk_map[];
+ struct xdp_sock __rcu *xsk_map[];
};
struct xdp_sock {
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a1a0c4e791c6..480e936c54d0 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -74,7 +74,7 @@ struct bpf_cpu_map_entry {
struct bpf_cpu_map {
struct bpf_map map;
/* Below members specific for map type */
- struct bpf_cpu_map_entry **cpu_map;
+ struct bpf_cpu_map_entry __rcu **cpu_map;
};
static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
@@ -469,7 +469,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
{
struct bpf_cpu_map_entry *old_rcpu;
- old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu);
+ old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu)));
if (old_rcpu) {
call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
@@ -551,7 +551,7 @@ static void cpu_map_free(struct bpf_map *map)
for (i = 0; i < cmap->map.max_entries; i++) {
struct bpf_cpu_map_entry *rcpu;
- rcpu = READ_ONCE(cmap->cpu_map[i]);
+ rcpu = rcu_dereference_raw(cmap->cpu_map[i]);
if (!rcpu)
continue;
@@ -562,6 +562,10 @@ static void cpu_map_free(struct bpf_map *map)
kfree(cmap);
}
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
@@ -570,7 +574,8 @@ static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
if (key >= map->max_entries)
return NULL;
- rcpu = READ_ONCE(cmap->cpu_map[key]);
+ rcpu = rcu_dereference_check(cmap->cpu_map[key],
+ rcu_read_lock_bh_held());
return rcpu;
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2a75e6c2d27d..2f6bd75cd682 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -73,7 +73,7 @@ struct bpf_dtab_netdev {
struct bpf_dtab {
struct bpf_map map;
- struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
+ struct bpf_dtab_netdev __rcu **netdev_map; /* DEVMAP type only */
struct list_head list;
/* these are only used for DEVMAP_HASH type maps */
@@ -226,7 +226,7 @@ static void dev_map_free(struct bpf_map *map)
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev;
- dev = dtab->netdev_map[i];
+ dev = rcu_dereference_raw(dtab->netdev_map[i]);
if (!dev)
continue;
@@ -259,6 +259,10 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
@@ -410,15 +414,9 @@ out:
trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
}
-/* __dev_flush is called from xdp_do_flush() which _must_ be signaled
- * from the driver before returning from its napi->poll() routine. The poll()
- * routine is called either from busy_poll context or net_rx_action signaled
- * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
- * net device can be torn down. On devmap tear down we ensure the flush list
- * is empty before completing to ensure all flush operations have completed.
- * When drivers update the bpf program they may need to ensure any flush ops
- * are also complete. Using synchronize_rcu or call_rcu will suffice for this
- * because both wait for napi context to exit.
+/* __dev_flush is called from xdp_do_flush() which _must_ be signalled from the
+ * driver before returning from its napi->poll() routine. See the comment above
+ * xdp_do_flush() in filter.c.
*/
void __dev_flush(void)
{
@@ -433,9 +431,9 @@ void __dev_flush(void)
}
}
-/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
- * update happens in parallel here a dev_put won't happen until after reading
- * the ifindex.
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
*/
static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
@@ -445,12 +443,14 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
if (key >= map->max_entries)
return NULL;
- obj = READ_ONCE(dtab->netdev_map[key]);
+ obj = rcu_dereference_check(dtab->netdev_map[key],
+ rcu_read_lock_bh_held());
return obj;
}
-/* Runs under RCU-read-side, plus in softirq under NAPI protection.
- * Thus, safe percpu variable access.
+/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
+ * variable access, and map elements stick around. See comment above
+ * xdp_do_flush() in filter.c.
*/
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
@@ -735,14 +735,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
if (k >= map->max_entries)
return -EINVAL;
- /* Use call_rcu() here to ensure any rcu critical sections have
- * completed as well as any flush operations because call_rcu
- * will wait for preempt-disable region to complete, NAPI in this
- * context. And additionally, the driver tear down ensures all
- * soft irqs are complete before removing the net device in the
- * case of dev_put equals zero.
- */
- old_dev = xchg(&dtab->netdev_map[k], NULL);
+ old_dev = unrcu_pointer(xchg(&dtab->netdev_map[k], NULL));
if (old_dev)
call_rcu(&old_dev->rcu, __dev_map_entry_free);
return 0;
@@ -851,7 +844,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
* Remembering the driver side flush operation will happen before the
* net device is removed.
*/
- old_dev = xchg(&dtab->netdev_map[i], dev);
+ old_dev = unrcu_pointer(xchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev)));
if (old_dev)
call_rcu(&old_dev->rcu, __dev_map_entry_free);
@@ -1031,10 +1024,10 @@ static int dev_map_notification(struct notifier_block *notifier,
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev, *odev;
- dev = READ_ONCE(dtab->netdev_map[i]);
+ dev = rcu_dereference(dtab->netdev_map[i]);
if (!dev || netdev != dev->dev)
continue;
- odev = cmpxchg(&dtab->netdev_map[i], dev, NULL);
+ odev = unrcu_pointer(cmpxchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev), NULL));
if (dev == odev)
call_rcu(&dev->rcu,
__dev_map_entry_free);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 6f6681b07364..72c58cc516a3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -596,7 +596,8 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
struct htab_elem *l;
u32 hash, key_size;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
@@ -989,7 +990,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* unknown flags */
return -EINVAL;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
@@ -1082,7 +1084,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
/* unknown flags */
return -EINVAL;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
@@ -1148,7 +1151,8 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
/* unknown flags */
return -EINVAL;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
@@ -1202,7 +1206,8 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
/* unknown flags */
return -EINVAL;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
@@ -1276,7 +1281,8 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
u32 hash, key_size;
int ret;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
@@ -1311,7 +1317,8 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
u32 hash, key_size;
int ret;
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
key_size = map->key_size;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a2f1f15ce432..62cf00383910 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -29,7 +29,7 @@
*/
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
return (unsigned long) map->ops->map_lookup_elem(map, key);
}
@@ -45,7 +45,7 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
void *, value, u64, flags)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
return map->ops->map_update_elem(map, key, value, flags);
}
@@ -62,7 +62,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
return map->ops->map_delete_elem(map, key);
}
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1b7b8a6f34ee..423549d2c52e 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -232,7 +232,8 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
/* Start walking the trie from the root node ... */
- for (node = rcu_dereference(trie->root); node;) {
+ for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held());
+ node;) {
unsigned int next_bit;
size_t matchlen;
@@ -264,7 +265,8 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
* traverse down.
*/
next_bit = extract_bit(key->data, node->prefixlen);
- node = rcu_dereference(node->child[next_bit]);
+ node = rcu_dereference_check(node->child[next_bit],
+ rcu_read_lock_bh_held());
}
if (!found)
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 84b3b35fc0d0..9e0c10c6892a 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -8,6 +8,7 @@
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/poll.h>
+#include <linux/kmemleak.h>
#include <uapi/linux/btf.h>
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
@@ -105,6 +106,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages,
VM_ALLOC | VM_USERMAP, PAGE_KERNEL);
if (rb) {
+ kmemleak_not_leak(pages);
rb->pages = pages;
rb->nr_pages = nr_pages;
return rb;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7a52bc172841..64bd2d84367f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1017,6 +1017,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
+ case BPF_FUNC_get_current_ancestor_cgroup_id:
+ return &bpf_get_current_ancestor_cgroup_id_proto;
#endif
case BPF_FUNC_send_signal:
return &bpf_send_signal_proto;
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index 05e1cfc1e5cd..291a92546246 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -57,7 +57,7 @@ int main(void)
{
debug_f = fopen("/dev/kmsg", "w");
setvbuf(debug_f, 0, _IOLBF, 0);
- fprintf(debug_f, "Started bpfilter\n");
+ fprintf(debug_f, "<5>Started bpfilter\n");
loop();
fclose(debug_f);
return 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index 0b13d8157a8f..d22895caa164 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3235,15 +3235,12 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
return ret;
}
-static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
u32 off = skb_mac_header_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
-
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
@@ -3255,21 +3252,11 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* SKB_GSO_TCPV4 needs to be changed into
- * SKB_GSO_TCPV6.
- */
+ /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
if (shinfo->gso_type & SKB_GSO_TCPV4) {
shinfo->gso_type &= ~SKB_GSO_TCPV4;
shinfo->gso_type |= SKB_GSO_TCPV6;
}
-
- /* Due to IPv6 header, MSS needs to be downgraded. */
- if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
- skb_decrease_gso_size(shinfo, len_diff);
-
- /* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
- shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IPV6);
@@ -3278,15 +3265,12 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
return 0;
}
-static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
{
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
u32 off = skb_mac_header_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
-
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
@@ -3298,21 +3282,11 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* SKB_GSO_TCPV6 needs to be changed into
- * SKB_GSO_TCPV4.
- */
+ /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
if (shinfo->gso_type & SKB_GSO_TCPV6) {
shinfo->gso_type &= ~SKB_GSO_TCPV6;
shinfo->gso_type |= SKB_GSO_TCPV4;
}
-
- /* Due to IPv4 header, MSS can be upgraded. */
- if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
- skb_increase_gso_size(shinfo, len_diff);
-
- /* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
- shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IP);
@@ -3321,17 +3295,17 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
return 0;
}
-static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
{
__be16 from_proto = skb->protocol;
if (from_proto == htons(ETH_P_IP) &&
to_proto == htons(ETH_P_IPV6))
- return bpf_skb_proto_4_to_6(skb, flags);
+ return bpf_skb_proto_4_to_6(skb);
if (from_proto == htons(ETH_P_IPV6) &&
to_proto == htons(ETH_P_IP))
- return bpf_skb_proto_6_to_4(skb, flags);
+ return bpf_skb_proto_6_to_4(skb);
return -ENOTSUPP;
}
@@ -3341,7 +3315,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
{
int ret;
- if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))
+ if (unlikely(flags))
return -EINVAL;
/* General idea is that this helper does the basic groundwork
@@ -3361,7 +3335,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
* that. For offloads, we mark packet as dodgy, so that headers
* need to be verified first.
*/
- ret = bpf_skb_proto_xlat(skb, proto, flags);
+ ret = bpf_skb_proto_xlat(skb, proto);
bpf_compute_data_pointers(skb);
return ret;
}
@@ -3923,6 +3897,34 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
+/* XDP_REDIRECT works by a three-step process, implemented in the functions
+ * below:
+ *
+ * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
+ * of the redirect and store it (along with some other metadata) in a per-CPU
+ * struct bpf_redirect_info.
+ *
+ * 2. When the program returns the XDP_REDIRECT return code, the driver will
+ * call xdp_do_redirect() which will use the information in struct
+ * bpf_redirect_info to actually enqueue the frame into a map type-specific
+ * bulk queue structure.
+ *
+ * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
+ * which will flush all the different bulk queues, thus completing the
+ * redirect.
+ *
+ * Pointers to the map entries will be kept around for this whole sequence of
+ * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
+ * the core code; instead, the RCU protection relies on everything happening
+ * inside a single NAPI poll sequence, which means it's between a pair of calls
+ * to local_bh_disable()/local_bh_enable().
+ *
+ * The map entries are marked as __rcu and the map code makes sure to
+ * dereference those pointers with rcu_dereference_check() in a way that works
+ * for both sections that to hold an rcu_read_lock() and sections that are
+ * called from NAPI without a separate rcu_read_lock(). The code below does not
+ * use RCU annotations, but relies on those in the map code.
+ */
void xdp_do_flush(void)
{
__dev_flush();
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 725d20f1b100..cc92ccb38432 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -113,8 +113,13 @@ static void mem_allocator_disconnect(void *allocator)
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
+ int type = xdp_rxq->mem.type;
int id = xdp_rxq->mem.id;
+ /* Reset mem info to defaults */
+ xdp_rxq->mem.id = 0;
+ xdp_rxq->mem.type = 0;
+
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
WARN(1, "Missing register, driver bug");
return;
@@ -123,7 +128,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
if (id == 0)
return;
- if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
+ if (type == MEM_TYPE_PAGE_POOL) {
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
page_pool_destroy(xa->page_pool);
@@ -144,10 +149,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
xdp_rxq->dev = NULL;
-
- /* Reset mem info to defaults */
- xdp_rxq->mem.id = 0;
- xdp_rxq->mem.type = 0;
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index e48e980c3b93..e409a0005717 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -43,7 +43,6 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- rcu_read_lock();
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
@@ -56,7 +55,6 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
}
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
- rcu_read_unlock();
/* A BPF program may overwrite the default action opcode.
* Similarly as in cls_bpf, if filter_res == -1 we use the
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6e3e63db0e01..fa739efa59f4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -85,8 +85,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_bpf_prog *prog;
int ret = -1;
- /* Needed here for accessing maps. */
- rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
int filter_res;
@@ -131,7 +129,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
break;
}
- rcu_read_unlock();
return ret;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cd62d4ba87a9..996da915f520 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -749,7 +749,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
}
static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
- struct xdp_sock ***map_entry)
+ struct xdp_sock __rcu ***map_entry)
{
struct xsk_map *map = NULL;
struct xsk_map_node *node;
@@ -785,7 +785,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs)
* might be updates to the map between
* xsk_get_map_list_entry() and xsk_map_try_sock_delete().
*/
- struct xdp_sock **map_entry = NULL;
+ struct xdp_sock __rcu **map_entry = NULL;
struct xsk_map *map;
while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index edcf249ad1f1..a4bc4749faac 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -31,7 +31,7 @@ struct xdp_mmap_offsets_v1 {
struct xsk_map_node {
struct list_head node;
struct xsk_map *map;
- struct xdp_sock **map_entry;
+ struct xdp_sock __rcu **map_entry;
};
static inline struct xdp_sock *xdp_sk(struct sock *sk)
@@ -40,7 +40,7 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk)
}
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
- struct xdp_sock **map_entry);
+ struct xdp_sock __rcu **map_entry);
void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id);
int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
u16 queue_id);
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 9df75ea4a567..2e48d0e094d9 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -12,7 +12,7 @@
#include "xsk.h"
static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
- struct xdp_sock **map_entry)
+ struct xdp_sock __rcu **map_entry)
{
struct xsk_map_node *node;
@@ -42,7 +42,7 @@ static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
}
static void xsk_map_sock_delete(struct xdp_sock *xs,
- struct xdp_sock **map_entry)
+ struct xdp_sock __rcu **map_entry)
{
struct xsk_map_node *n, *tmp;
@@ -124,6 +124,10 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf;
}
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
@@ -131,12 +135,11 @@ static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
if (key >= map->max_entries)
return NULL;
- return READ_ONCE(m->xsk_map[key]);
+ return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held());
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
return __xsk_map_lookup_elem(map, *(u32 *)key);
}
@@ -149,7 +152,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs, *old_xs, **map_entry;
+ struct xdp_sock __rcu **map_entry;
+ struct xdp_sock *xs, *old_xs;
u32 i = *(u32 *)key, fd = *(u32 *)value;
struct xsk_map_node *node;
struct socket *sock;
@@ -179,7 +183,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
}
spin_lock_bh(&m->lock);
- old_xs = READ_ONCE(*map_entry);
+ old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock));
if (old_xs == xs) {
err = 0;
goto out;
@@ -191,7 +195,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
goto out;
}
xsk_map_sock_add(xs, node);
- WRITE_ONCE(*map_entry, xs);
+ rcu_assign_pointer(*map_entry, xs);
if (old_xs)
xsk_map_sock_delete(old_xs, map_entry);
spin_unlock_bh(&m->lock);
@@ -208,7 +212,8 @@ out:
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *old_xs, **map_entry;
+ struct xdp_sock __rcu **map_entry;
+ struct xdp_sock *old_xs;
int k = *(u32 *)key;
if (k >= map->max_entries)
@@ -216,7 +221,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
spin_lock_bh(&m->lock);
map_entry = &m->xsk_map[k];
- old_xs = xchg(map_entry, NULL);
+ old_xs = unrcu_pointer(xchg(map_entry, NULL));
if (old_xs)
xsk_map_sock_delete(old_xs, map_entry);
spin_unlock_bh(&m->lock);
@@ -231,11 +236,11 @@ static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
}
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
- struct xdp_sock **map_entry)
+ struct xdp_sock __rcu **map_entry)
{
spin_lock_bh(&map->lock);
- if (READ_ONCE(*map_entry) == xs) {
- WRITE_ONCE(*map_entry, NULL);
+ if (rcu_access_pointer(*map_entry) == xs) {
+ rcu_assign_pointer(*map_entry, NULL);
xsk_map_sock_delete(xs, map_entry);
}
spin_unlock_bh(&map->lock);
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 41d705c3a1f7..93854e135134 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -130,7 +130,7 @@ int main(int argc, char **argv)
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
- if (optind == argc) {
+ if (optind + 2 != argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
}
@@ -213,5 +213,5 @@ int main(int argc, char **argv)
poll_stats(2, ifindex_out);
out:
- return 0;
+ return ret;
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 48c0ade05ab1..1e04ce724240 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4001,6 +4001,10 @@ bpf_object__probe_loading(struct bpf_object *obj)
ret = bpf_load_program_xattr(&attr, NULL, 0);
if (ret < 0) {
+ attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ }
+ if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index cf9381f03b16..39f25e09b51e 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -154,7 +154,7 @@ done:
return ret;
}
-static int libbpf_netlink_send_recv(struct nlmsghdr *nh,
+static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
__dump_nlmsg_t parse_msg,
libbpf_dump_nlmsg_t parse_attr,
void *cookie)
@@ -166,15 +166,15 @@ static int libbpf_netlink_send_recv(struct nlmsghdr *nh,
if (sock < 0)
return sock;
- nh->nlmsg_pid = 0;
- nh->nlmsg_seq = time(NULL);
+ req->nh.nlmsg_pid = 0;
+ req->nh.nlmsg_seq = time(NULL);
- if (send(sock, nh, nh->nlmsg_len, 0) < 0) {
+ if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
ret = -errno;
goto out;
}
- ret = libbpf_netlink_recv(sock, nl_pid, nh->nlmsg_seq,
+ ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
parse_msg, parse_attr, cookie);
out:
libbpf_netlink_close(sock);
@@ -186,11 +186,7 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
{
struct nlattr *nla;
int ret;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
+ struct libbpf_nla_req req;
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
@@ -199,27 +195,26 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
req.ifinfo.ifi_family = AF_UNSPEC;
req.ifinfo.ifi_index = ifindex;
- nla = nlattr_begin_nested(&req.nh, sizeof(req), IFLA_XDP);
+ nla = nlattr_begin_nested(&req, IFLA_XDP);
if (!nla)
return -EMSGSIZE;
- ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FD, &fd, sizeof(fd));
+ ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
if (ret < 0)
return ret;
if (flags) {
- ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FLAGS, &flags,
- sizeof(flags));
+ ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
if (ret < 0)
return ret;
}
if (flags & XDP_FLAGS_REPLACE) {
- ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_EXPECTED_FD,
- &old_fd, sizeof(old_fd));
+ ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
+ sizeof(old_fd));
if (ret < 0)
return ret;
}
- nlattr_end_nested(&req.nh, nla);
+ nlattr_end_nested(&req, nla);
- return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
@@ -314,14 +309,11 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
struct xdp_id_md xdp_id = {};
__u32 mask;
int ret;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifm;
- } req = {
- .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
- .nh.nlmsg_type = RTM_GETLINK,
- .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
- .ifm.ifi_family = AF_PACKET,
+ struct libbpf_nla_req req = {
+ .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nh.nlmsg_type = RTM_GETLINK,
+ .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifinfo.ifi_family = AF_PACKET,
};
if (flags & ~XDP_FLAGS_MASK || !info_size)
@@ -336,7 +328,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
xdp_id.ifindex = ifindex;
xdp_id.flags = flags;
- ret = libbpf_netlink_send_recv(&req.nh, __dump_link_nlmsg,
+ ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
get_xdp_info, &xdp_id);
if (!ret) {
size_t sz = min(info_size, sizeof(xdp_id.info));
@@ -376,15 +368,14 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
return libbpf_err(ret);
}
-typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t,
- size_t maxsz);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
-static int clsact_config(struct nlmsghdr *nh, struct tcmsg *t, size_t maxsz)
+static int clsact_config(struct libbpf_nla_req *req)
{
- t->tcm_parent = TC_H_CLSACT;
- t->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+ req->tc.tcm_parent = TC_H_CLSACT;
+ req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
- return nlattr_add(nh, maxsz, TCA_KIND, "clsact", sizeof("clsact"));
+ return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
}
static int attach_point_to_config(struct bpf_tc_hook *hook,
@@ -431,11 +422,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
{
qdisc_config_t config;
int ret;
- struct {
- struct nlmsghdr nh;
- struct tcmsg tc;
- char buf[256];
- } req;
+ struct libbpf_nla_req req;
ret = attach_point_to_config(hook, &config);
if (ret < 0)
@@ -448,11 +435,11 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
- ret = config(&req.nh, &req.tc, sizeof(req));
+ ret = config(&req);
if (ret < 0)
return ret;
- return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
@@ -537,14 +524,14 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
struct nlattr *tb[TCA_MAX + 1];
libbpf_nla_parse(tb, TCA_MAX,
- (struct nlattr *)((char *)tc + NLMSG_ALIGN(sizeof(*tc))),
+ (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
if (!tb[TCA_KIND])
return NL_CONT;
return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
}
-static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
+static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
{
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
@@ -555,7 +542,7 @@ static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
if (ret < 0)
return ret;
- ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd));
+ ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
if (ret < 0)
return ret;
len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
@@ -563,7 +550,7 @@ static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
return -errno;
if (len >= sizeof(name))
return -ENAMETOOLONG;
- return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1);
+ return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
}
int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
@@ -571,12 +558,8 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
__u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
int ret, ifindex, attach_point, prog_fd;
struct bpf_cb_ctx info = {};
+ struct libbpf_nla_req req;
struct nlattr *nla;
- struct {
- struct nlmsghdr nh;
- struct tcmsg tc;
- char buf[256];
- } req;
if (!hook || !opts ||
!OPTS_VALID(hook, bpf_tc_hook) ||
@@ -618,25 +601,24 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
return libbpf_err(ret);
req.tc.tcm_parent = parent;
- ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
if (ret < 0)
return libbpf_err(ret);
- nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS);
+ nla = nlattr_begin_nested(&req, TCA_OPTIONS);
if (!nla)
return libbpf_err(-EMSGSIZE);
- ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd);
+ ret = tc_add_fd_and_name(&req, prog_fd);
if (ret < 0)
return libbpf_err(ret);
bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
- ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags,
- sizeof(bpf_flags));
+ ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
if (ret < 0)
return libbpf_err(ret);
- nlattr_end_nested(&req.nh, nla);
+ nlattr_end_nested(&req, nla);
info.opts = opts;
- ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
+ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
if (ret < 0)
return libbpf_err(ret);
if (!info.processed)
@@ -650,11 +632,7 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
{
__u32 protocol = 0, handle, priority, parent, prog_id, flags;
int ret, ifindex, attach_point, prog_fd;
- struct {
- struct nlmsghdr nh;
- struct tcmsg tc;
- char buf[256];
- } req;
+ struct libbpf_nla_req req;
if (!hook ||
!OPTS_VALID(hook, bpf_tc_hook) ||
@@ -701,13 +679,12 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
req.tc.tcm_parent = parent;
if (!flush) {
- ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND,
- "bpf", sizeof("bpf"));
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
if (ret < 0)
return ret;
}
- return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+ return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
}
int bpf_tc_detach(const struct bpf_tc_hook *hook,
@@ -727,11 +704,7 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
__u32 protocol, handle, priority, parent, prog_id, flags;
int ret, ifindex, attach_point, prog_fd;
struct bpf_cb_ctx info = {};
- struct {
- struct nlmsghdr nh;
- struct tcmsg tc;
- char buf[256];
- } req;
+ struct libbpf_nla_req req;
if (!hook || !opts ||
!OPTS_VALID(hook, bpf_tc_hook) ||
@@ -770,13 +743,13 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
return libbpf_err(ret);
req.tc.tcm_parent = parent;
- ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
+ ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
if (ret < 0)
return libbpf_err(ret);
info.opts = opts;
- ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
+ ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
if (ret < 0)
return libbpf_err(ret);
if (!info.processed)
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index b607fa9852b1..f57e77a6e40f 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -27,7 +27,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
int totlen = NLA_ALIGN(nla->nla_len);
*remaining -= totlen;
- return (struct nlattr *) ((char *) nla + totlen);
+ return (struct nlattr *)((void *)nla + totlen);
}
static int nla_ok(const struct nlattr *nla, int remaining)
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 3c780ab6d022..4d15ae2ff812 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -13,6 +13,7 @@
#include <string.h>
#include <errno.h>
#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
/* avoid multiple definition of netlink features */
#define __LINUX_NETLINK_H
@@ -52,6 +53,15 @@ struct libbpf_nla_policy {
uint16_t maxlen;
};
+struct libbpf_nla_req {
+ struct nlmsghdr nh;
+ union {
+ struct ifinfomsg ifinfo;
+ struct tcmsg tc;
+ };
+ char buf[128];
+};
+
/**
* @ingroup attr
* Iterate over a stream of attributes
@@ -71,7 +81,7 @@ struct libbpf_nla_policy {
*/
static inline void *libbpf_nla_data(const struct nlattr *nla)
{
- return (char *) nla + NLA_HDRLEN;
+ return (void *)nla + NLA_HDRLEN;
}
static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
@@ -108,47 +118,47 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
static inline struct nlattr *nla_data(struct nlattr *nla)
{
- return (struct nlattr *)((char *)nla + NLA_HDRLEN);
+ return (struct nlattr *)((void *)nla + NLA_HDRLEN);
}
-static inline struct nlattr *nh_tail(struct nlmsghdr *nh)
+static inline struct nlattr *req_tail(struct libbpf_nla_req *req)
{
- return (struct nlattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len));
+ return (struct nlattr *)((void *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
}
-static inline int nlattr_add(struct nlmsghdr *nh, size_t maxsz, int type,
+static inline int nlattr_add(struct libbpf_nla_req *req, int type,
const void *data, int len)
{
struct nlattr *nla;
- if (NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > maxsz)
+ if (NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > sizeof(*req))
return -EMSGSIZE;
if (!!data != !!len)
return -EINVAL;
- nla = nh_tail(nh);
+ nla = req_tail(req);
nla->nla_type = type;
nla->nla_len = NLA_HDRLEN + len;
if (data)
memcpy(nla_data(nla), data, len);
- nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(nla->nla_len);
+ req->nh.nlmsg_len = NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(nla->nla_len);
return 0;
}
-static inline struct nlattr *nlattr_begin_nested(struct nlmsghdr *nh,
- size_t maxsz, int type)
+static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int type)
{
struct nlattr *tail;
- tail = nh_tail(nh);
- if (nlattr_add(nh, maxsz, type | NLA_F_NESTED, NULL, 0))
+ tail = req_tail(req);
+ if (nlattr_add(req, type | NLA_F_NESTED, NULL, 0))
return NULL;
return tail;
}
-static inline void nlattr_end_nested(struct nlmsghdr *nh, struct nlattr *tail)
+static inline void nlattr_end_nested(struct libbpf_nla_req *req,
+ struct nlattr *tail)
{
- tail->nla_len = (char *)nh_tail(nh) - (char *)tail;
+ tail->nla_len = (void *)req_tail(req) - (void *)tail;
}
#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index a01788090c31..4706cee84360 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -100,7 +100,7 @@ void test_ringbuf(void)
if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
goto cleanup;
- rb_fd = bpf_map__fd(skel->maps.ringbuf);
+ rb_fd = skel->maps.ringbuf.map_fd;
/* good read/write cons_pos */
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");