summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-08-03 15:34:36 -0700
committerJakub Kicinski <kuba@kernel.org>2023-08-03 15:34:36 -0700
commitd07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch)
tree606829d4b33a57dbe0f0e825ca8505e0b5fcb759 /include
parent35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff)
parent648880e9331c68b2008430fd90f3648d1795399d (diff)
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2023-08-03 We've added 54 non-merge commits during the last 10 day(s) which contain a total of 84 files changed, 4026 insertions(+), 562 deletions(-). The main changes are: 1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer, Daniel Borkmann 2) Support new insns from cpu v4 from Yonghong Song 3) Non-atomically allocate freelist during prefill from YiFei Zhu 4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu 5) Add tracepoint to xdp attaching failure from Leon Hwang 6) struct netdev_rx_queue and xdp.h reshuffling to reduce rebuild time from Jakub Kicinski * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits) net: invert the netdevice.h vs xdp.h dependency net: move struct netdev_rx_queue out of netdevice.h eth: add missing xdp.h includes in drivers selftests/bpf: Add testcase for xdp attaching failure tracepoint bpf, xdp: Add tracepoint to xdp attaching failure selftests/bpf: fix static assert compilation issue for test_cls_*.c bpf: fix bpf_probe_read_kernel prototype mismatch riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework libbpf: fix typos in Makefile tracing: bpf: use struct trace_entry in struct syscall_tp_t bpf, devmap: Remove unused dtab field from bpf_dtab_netdev bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry netfilter: bpf: Only define get_proto_defrag_hook() if necessary bpf: Fix an array-index-out-of-bounds issue in disasm.c net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn docs/bpf: Fix malformed documentation bpf: selftests: Add defrag selftests bpf: selftests: Support custom type and proto for client sockets bpf: selftests: Support not connecting client socket netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link ... ==================== Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h12
-rw-r--r--include/linux/filter.h34
-rw-r--r--include/linux/netdevice.h55
-rw-r--r--include/linux/netfilter.h10
-rw-r--r--include/net/busy_poll.h1
-rw-r--r--include/net/inet6_hashtables.h81
-rw-r--r--include/net/inet_hashtables.h74
-rw-r--r--include/net/mana/mana.h2
-rw-r--r--include/net/netdev_rx_queue.h53
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/xdp.h29
-rw-r--r--include/trace/events/xdp.h18
-rw-r--r--include/uapi/linux/bpf.h9
13 files changed, 293 insertions, 92 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ceaa8c23287f..abe75063630b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2661,6 +2661,18 @@ static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
}
#endif /* CONFIG_BPF_SYSCALL */
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
struct btf_mod_pair *used_btfs, u32 len);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f69114083ec7..2d6fe30bad5f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -69,6 +69,9 @@ struct ctl_table_header;
/* unused opcode to mark special load instruction. Same as BPF_ABS */
#define BPF_PROBE_MEM 0x20
+/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
+#define BPF_PROBE_MEMSX 0x40
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@ -90,22 +93,28 @@ struct ctl_table_header;
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
-#define BPF_ALU64_REG(OP, DST, SRC) \
+#define BPF_ALU64_REG_OFF(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
- .off = 0, \
+ .off = OFF, \
.imm = 0 })
-#define BPF_ALU32_REG(OP, DST, SRC) \
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ BPF_ALU64_REG_OFF(OP, DST, SRC, 0)
+
+#define BPF_ALU32_REG_OFF(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
- .off = 0, \
+ .off = OFF, \
.imm = 0 })
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ BPF_ALU32_REG_OFF(OP, DST, SRC, 0)
+
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
#define BPF_ALU64_IMM(OP, DST, IMM) \
@@ -765,23 +774,6 @@ DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp);
-static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
- struct xdp_buff *xdp)
-{
- /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
- * under local_bh_disable(), which provides the needed RCU protection
- * for accessing map entries.
- */
- u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
-
- if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
- if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
- act = xdp_master_redirect(xdp);
- }
-
- return act;
-}
-
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4176a738177b..0896aaa91dd7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -40,7 +40,6 @@
#include <net/dcbnl.h>
#endif
#include <net/netprio_cgroup.h>
-#include <net/xdp.h>
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
@@ -77,8 +76,12 @@ struct udp_tunnel_nic_info;
struct udp_tunnel_nic;
struct bpf_prog;
struct xdp_buff;
+struct xdp_frame;
+struct xdp_metadata_ops;
struct xdp_md;
+typedef u32 xdp_features_t;
+
void synchronize_net(void);
void netdev_set_default_ethtool_ops(struct net_device *dev,
const struct ethtool_ops *ops);
@@ -783,32 +786,6 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
#endif
#endif /* CONFIG_RPS */
-/* This structure contains an instance of an RX queue. */
-struct netdev_rx_queue {
- struct xdp_rxq_info xdp_rxq;
-#ifdef CONFIG_RPS
- struct rps_map __rcu *rps_map;
- struct rps_dev_flow_table __rcu *rps_flow_table;
-#endif
- struct kobject kobj;
- struct net_device *dev;
- netdevice_tracker dev_tracker;
-
-#ifdef CONFIG_XDP_SOCKETS
- struct xsk_buff_pool *pool;
-#endif
-} ____cacheline_aligned_in_smp;
-
-/*
- * RX queue sysfs structures and functions.
- */
-struct rx_queue_attribute {
- struct attribute attr;
- ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
- ssize_t (*store)(struct netdev_rx_queue *queue,
- const char *buf, size_t len);
-};
-
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
enum xps_map_type {
XPS_CPUS = 0,
@@ -1670,12 +1647,6 @@ struct net_device_ops {
struct netlink_ext_ack *extack);
};
-struct xdp_metadata_ops {
- int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
- int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
- enum xdp_rss_hash_type *rss_type);
-};
-
/**
* enum netdev_priv_flags - &struct net_device priv_flags
*
@@ -3851,24 +3822,6 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
int netif_set_real_num_queues(struct net_device *dev,
unsigned int txq, unsigned int rxq);
-static inline struct netdev_rx_queue *
-__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
-{
- return dev->_rx + rxq;
-}
-
-#ifdef CONFIG_SYSFS
-static inline unsigned int get_netdev_rx_queue_index(
- struct netdev_rx_queue *queue)
-{
- struct net_device *dev = queue->dev;
- int index = queue - dev->_rx;
-
- BUG_ON(index >= dev->num_rx_queues);
- return index;
-}
-#endif
-
int netif_get_num_default_rss_queues(void);
void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index d4fed4c508ca..d68644b7c299 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -11,6 +11,7 @@
#include <linux/wait.h>
#include <linux/list.h>
#include <linux/static_key.h>
+#include <linux/module.h>
#include <linux/netfilter_defs.h>
#include <linux/netdevice.h>
#include <linux/sockptr.h>
@@ -481,6 +482,15 @@ struct nfnl_ct_hook {
};
extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
+struct nf_defrag_hook {
+ struct module *owner;
+ int (*enable)(struct net *net);
+ void (*disable)(struct net *net);
+};
+
+extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook;
+extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook;
+
/*
* nf_skb_duplicated - TEE target has sent a packet
*
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index f90f0021f5f2..4dabeb6c76d3 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -16,6 +16,7 @@
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
#include <net/ip.h>
+#include <net/xdp.h>
/* 0 - Reserved to indicate value not set
* 1..NR_CPUS - Reserved for sender_cpu
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 56f1286583d3..284b5ce7205d 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -48,6 +48,22 @@ struct sock *__inet6_lookup_established(struct net *net,
const u16 hnum, const int dif,
const int sdif);
+typedef u32 (inet6_ehashfn_t)(const struct net *net,
+ const struct in6_addr *laddr, const u16 lport,
+ const struct in6_addr *faddr, const __be16 fport);
+
+inet6_ehashfn_t inet6_ehashfn;
+
+INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn);
+
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn);
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -57,6 +73,15 @@ struct sock *inet6_lookup_listener(struct net *net,
const unsigned short hnum,
const int dif, const int sdif);
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn);
+
static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -78,6 +103,46 @@ static inline struct sock *__inet6_lookup(struct net *net,
daddr, hnum, dif, sdif);
}
+static inline
+struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
+ bool *refcounted, inet6_ehashfn_t *ehashfn)
+{
+ struct sock *sk, *reuse_sk;
+ bool prefetched;
+
+ sk = skb_steal_sock(skb, refcounted, &prefetched);
+ if (!sk)
+ return NULL;
+
+ if (!prefetched)
+ return sk;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_LISTEN)
+ return sk;
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ if (sk->sk_state != TCP_CLOSE)
+ return sk;
+ } else {
+ return sk;
+ }
+
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, ntohs(dport),
+ ehashfn);
+ if (!reuse_sk)
+ return sk;
+
+ /* We've chosen a new reuseport sock which is never refcounted. This
+ * implies that sk also isn't refcounted.
+ */
+ WARN_ON_ONCE(*refcounted);
+
+ return reuse_sk;
+}
+
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be16 sport,
@@ -85,14 +150,20 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
int iif, int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb, refcounted);
-
+ struct net *net = dev_net(skb_dst(skb)->dev);
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct sock *sk;
+
+ sk = inet6_steal_sock(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, dport,
+ refcounted, inet6_ehashfn);
+ if (IS_ERR(sk))
+ return NULL;
if (sk)
return sk;
- return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
- doff, &ipv6_hdr(skb)->saddr, sport,
- &ipv6_hdr(skb)->daddr, ntohs(dport),
+ return __inet6_lookup(net, hashinfo, skb,
+ doff, &ip6h->saddr, sport,
+ &ip6h->daddr, ntohs(dport),
iif, sdif, refcounted);
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 99bd823e97f6..1177effabed3 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -379,6 +379,27 @@ struct sock *__inet_lookup_established(struct net *net,
const __be32 daddr, const u16 hnum,
const int dif, const int sdif);
+typedef u32 (inet_ehashfn_t)(const struct net *net,
+ const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport);
+
+inet_ehashfn_t inet_ehashfn;
+
+INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn);
+
+struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum,
+ inet_ehashfn_t *ehashfn);
+
+struct sock *inet_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum, const int dif,
+ inet_ehashfn_t *ehashfn);
+
static inline struct sock *
inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
@@ -428,6 +449,46 @@ static inline struct sock *inet_lookup(struct net *net,
return sk;
}
+static inline
+struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+ bool *refcounted, inet_ehashfn_t *ehashfn)
+{
+ struct sock *sk, *reuse_sk;
+ bool prefetched;
+
+ sk = skb_steal_sock(skb, refcounted, &prefetched);
+ if (!sk)
+ return NULL;
+
+ if (!prefetched)
+ return sk;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_state != TCP_LISTEN)
+ return sk;
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ if (sk->sk_state != TCP_CLOSE)
+ return sk;
+ } else {
+ return sk;
+ }
+
+ reuse_sk = inet_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, ntohs(dport),
+ ehashfn);
+ if (!reuse_sk)
+ return sk;
+
+ /* We've chosen a new reuseport sock which is never refcounted. This
+ * implies that sk also isn't refcounted.
+ */
+ WARN_ON_ONCE(*refcounted);
+
+ return reuse_sk;
+}
+
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
int doff,
@@ -436,22 +497,23 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
const int sdif,
bool *refcounted)
{
- struct sock *sk = skb_steal_sock(skb, refcounted);
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
+ struct sock *sk;
+ sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport,
+ refcounted, inet_ehashfn);
+ if (IS_ERR(sk))
+ return NULL;
if (sk)
return sk;
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ return __inet_lookup(net, hashinfo, skb,
doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb), sdif,
refcounted);
}
-u32 inet6_ehashfn(const struct net *net,
- const struct in6_addr *laddr, const u16 lport,
- const struct in6_addr *faddr, const __be16 fport);
-
static inline void sk_daddr_set(struct sock *sk, __be32 addr)
{
sk->sk_daddr = addr; /* alias of inet_daddr */
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 024ad8ddb27e..1ccdca03e166 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -4,6 +4,8 @@
#ifndef _MANA_H
#define _MANA_H
+#include <net/xdp.h>
+
#include "gdma.h"
#include "hw_channel.h"
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
new file mode 100644
index 000000000000..cdcafb30d437
--- /dev/null
+++ b/include/net/netdev_rx_queue.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NETDEV_RX_QUEUE_H
+#define _LINUX_NETDEV_RX_QUEUE_H
+
+#include <linux/kobject.h>
+#include <linux/netdevice.h>
+#include <linux/sysfs.h>
+#include <net/xdp.h>
+
+/* This structure contains an instance of an RX queue. */
+struct netdev_rx_queue {
+ struct xdp_rxq_info xdp_rxq;
+#ifdef CONFIG_RPS
+ struct rps_map __rcu *rps_map;
+ struct rps_dev_flow_table __rcu *rps_flow_table;
+#endif
+ struct kobject kobj;
+ struct net_device *dev;
+ netdevice_tracker dev_tracker;
+
+#ifdef CONFIG_XDP_SOCKETS
+ struct xsk_buff_pool *pool;
+#endif
+} ____cacheline_aligned_in_smp;
+
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
+ ssize_t (*store)(struct netdev_rx_queue *queue,
+ const char *buf, size_t len);
+};
+
+static inline struct netdev_rx_queue *
+__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
+{
+ return dev->_rx + rxq;
+}
+
+#ifdef CONFIG_SYSFS
+static inline unsigned int
+get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int index = queue - dev->_rx;
+
+ BUG_ON(index >= dev->num_rx_queues);
+ return index;
+}
+#endif
+#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 7ae44bf866af..74cbfb15d289 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2815,20 +2815,23 @@ sk_is_refcounted(struct sock *sk)
* skb_steal_sock - steal a socket from an sk_buff
* @skb: sk_buff to steal the socket from
* @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
*/
static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted)
+skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
{
if (skb->sk) {
struct sock *sk = skb->sk;
*refcounted = true;
- if (skb_sk_is_prefetched(skb))
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched)
*refcounted = sk_is_refcounted(sk);
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
+ *prefetched = false;
*refcounted = false;
return NULL;
}
diff --git a/include/net/xdp.h b/include/net/xdp.h
index d1c5381fc95f..de08c8e0d134 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -6,9 +6,10 @@
#ifndef __LINUX_NET_XDP_H__
#define __LINUX_NET_XDP_H__
-#include <linux/skbuff.h> /* skb_shared_info */
-#include <uapi/linux/netdev.h>
#include <linux/bitfield.h>
+#include <linux/filter.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h> /* skb_shared_info */
/**
* DOC: XDP RX-queue information
@@ -45,8 +46,6 @@ enum xdp_mem_type {
MEM_TYPE_MAX,
};
-typedef u32 xdp_features_t;
-
/* XDP flags for ndo_xdp_xmit */
#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
@@ -443,6 +442,12 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR,
};
+struct xdp_metadata_ops {
+ int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
+ int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type);
+};
+
#ifdef CONFIG_NET
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
@@ -474,4 +479,20 @@ static inline void xdp_clear_features_flag(struct net_device *dev)
xdp_set_features_flag(dev, 0);
}
+static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+ struct xdp_buff *xdp)
+{
+ /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
+ * under local_bh_disable(), which provides the needed RCU protection
+ * for accessing map entries.
+ */
+ u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+ if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+ if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+ act = xdp_master_redirect(xdp);
+ }
+
+ return act;
+}
#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index c40fc97f9417..9adc2bdf2f94 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -9,6 +9,7 @@
#include <linux/filter.h>
#include <linux/tracepoint.h>
#include <linux/bpf.h>
+#include <net/xdp.h>
#define __XDP_ACT_MAP(FN) \
FN(ABORTED) \
@@ -404,6 +405,23 @@ TRACE_EVENT(mem_return_failed,
)
);
+TRACE_EVENT(bpf_xdp_link_attach_failed,
+
+ TP_PROTO(const char *msg),
+
+ TP_ARGS(msg),
+
+ TP_STRUCT__entry(
+ __string(msg, msg)
+ ),
+
+ TP_fast_assign(
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("errmsg=%s", __get_str(msg))
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 739c15906a65..70da85200695 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -19,6 +19,7 @@
/* ld/ldx fields */
#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_MEMSX 0x80 /* load with sign extension */
#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */
#define BPF_XADD 0xc0 /* exclusive add - legacy name */
@@ -1187,6 +1188,11 @@ enum bpf_perf_event_type {
*/
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
+/* link_create.netfilter.flags used in LINK_CREATE command for
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
+ */
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -4198,9 +4204,6 @@ union bpf_attr {
* **-EOPNOTSUPP** if the operation is not supported, for example
* a call from outside of TC ingress.
*
- * **-ESOCKTNOSUPPORT** if the socket type is not supported
- * (reuseport).
- *
* long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This