summaryrefslogtreecommitdiff
path: root/kernel/bpf/ringbuf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-06-27 10:05:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-06-27 10:05:35 -0700
commitfd19d4a492af77b1e8fb0439781a3048d1d1f554 (patch)
treec293c1a1218fe87c4b6712938352dbc349d0b68e /kernel/bpf/ringbuf.c
parent3c1d29e53d34537063e60f5eafe0482780a1735a (diff)
parentb62cb6a7e83622783100182d9b70e9c70393cfbe (diff)
Merge tag 'net-6.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from can, bpf and netfilter. There are a bunch of regressions addressed here, but hopefully nothing spectacular. We are still waiting the driver fix from Intel, mentioned by Jakub in the previous networking pull. Current release - regressions: - core: add softirq safety to netdev_rename_lock - tcp: fix tcp_rcv_fastopen_synack() to enter TCP_CA_Loss for failed TFO - batman-adv: fix RCU race at module unload time Previous releases - regressions: - openvswitch: get related ct labels from its master if it is not confirmed - eth: bonding: fix incorrect software timestamping report - eth: mlxsw: fix memory corruptions on spectrum-4 systems - eth: ionic: use dev_consume_skb_any outside of napi Previous releases - always broken: - netfilter: fully validate NFT_DATA_VALUE on store to data registers - unix: several fixes for OoB data - tcp: fix race for duplicate reqsk on identical SYN - bpf: - fix may_goto with negative offset - fix the corner case with may_goto and jump to the 1st insn - fix overrunning reservations in ringbuf - can: - j1939: recover socket queue on CAN bus error during BAM transmission - mcp251xfd: fix infinite loop when xmit fails - dsa: microchip: monitor potential faults in half-duplex mode - eth: vxlan: pull inner IP header in vxlan_xmit_one() - eth: ionic: fix kernel panic due to multi-buffer handling Misc: - selftest: unix tests refactor and a lot of new cases added" * tag 'net-6.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (61 commits) net: mana: Fix possible double free in error handling path selftest: af_unix: Check SIOCATMARK after every send()/recv() in msg_oob.c. af_unix: Fix wrong ioctl(SIOCATMARK) when consumed OOB skb is at the head. selftest: af_unix: Check EPOLLPRI after every send()/recv() in msg_oob.c selftest: af_unix: Check SIGURG after every send() in msg_oob.c selftest: af_unix: Add SO_OOBINLINE test cases in msg_oob.c af_unix: Don't stop recv() at consumed ex-OOB skb. selftest: af_unix: Add non-TCP-compliant test cases in msg_oob.c. af_unix: Don't stop recv(MSG_DONTWAIT) if consumed OOB skb is at the head. af_unix: Stop recv(MSG_PEEK) at consumed OOB skb. selftest: af_unix: Add msg_oob.c. selftest: af_unix: Remove test_unix_oob.c. tracing/net_sched: NULL pointer dereference in perf_trace_qdisc_reset() netfilter: nf_tables: fully validate NFT_DATA_VALUE on store to data registers net: usb: qmi_wwan: add Telit FN912 compositions tcp: fix tcp_rcv_fastopen_synack() to enter TCP_CA_Loss for failed TFO ionic: use dev_consume_skb_any outside of napi net: dsa: microchip: fix wrong register write when masking interrupt Fix race for duplicate reqsk on identical SYN ibmvnic: Add tx check to prevent skb leak ...
Diffstat (limited to 'kernel/bpf/ringbuf.c')
-rw-r--r--kernel/bpf/ringbuf.c31
1 files changed, 25 insertions, 6 deletions
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0ee653a936ea..e20b90c36131 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
* This prevents a user-space application from modifying the
* position and ruining in-kernel tracking. The permissions of the
* pages depend on who is producing samples: user-space or the
- * kernel.
+ * kernel. Note that the pending counter is placed in the same
+ * page as the producer, so that it shares the same cache line.
*
* Kernel-producer
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
+ unsigned long pending_pos;
char data[] __aligned(PAGE_SIZE);
};
@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb->mask = data_sz - 1;
rb->consumer_pos = 0;
rb->producer_pos = 0;
+ rb->pending_pos = 0;
return rb;
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
{
- unsigned long cons_pos, prod_pos, new_prod_pos, flags;
- u32 len, pg_off;
+ unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
struct bpf_ringbuf_hdr *hdr;
+ u32 len, pg_off, tmp_size, hdr_len;
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
spin_lock_irqsave(&rb->spinlock, flags);
}
+ pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
new_prod_pos = prod_pos + len;
- /* check for out of ringbuf space by ensuring producer position
- * doesn't advance more than (ringbuf_size - 1) ahead
+ while (pend_pos < prod_pos) {
+ hdr = (void *)rb->data + (pend_pos & rb->mask);
+ hdr_len = READ_ONCE(hdr->len);
+ if (hdr_len & BPF_RINGBUF_BUSY_BIT)
+ break;
+ tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
+ tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
+ pend_pos += tmp_size;
+ }
+ rb->pending_pos = pend_pos;
+
+ /* check for out of ringbuf space:
+ * - by ensuring producer position doesn't advance more than
+ * (ringbuf_size - 1) ahead
+ * - by ensuring oldest not yet committed record until newest
+ * record does not span more than (ringbuf_size - 1)
*/
- if (new_prod_pos - cons_pos > rb->mask) {
+ if (new_prod_pos - cons_pos > rb->mask ||
+ new_prod_pos - pend_pos > rb->mask) {
spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}