summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/dropreason-core.h26
-rw-r--r--include/net/eee.h38
-rw-r--r--include/net/genetlink.h2
-rw-r--r--include/net/gro.h39
-rw-r--r--include/net/hotdata.h52
-rw-r--r--include/net/if_inet6.h4
-rw-r--r--include/net/inet_sock.h1
-rw-r--r--include/net/ioam6.h4
-rw-r--r--include/net/ip6_route.h2
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/ipv6.h8
-rw-r--r--include/net/mctp.h1
-rw-r--r--include/net/netdev_queues.h56
-rw-r--r--include/net/nexthop.h31
-rw-r--r--include/net/nfc/nfc.h2
-rw-r--r--include/net/protocol.h3
-rw-r--r--include/net/rps.h125
-rw-r--r--include/net/rtnetlink.h1
-rw-r--r--include/net/sch_generic.h7
-rw-r--r--include/net/sock.h40
-rw-r--r--include/net/tcp.h6
22 files changed, 372 insertions, 79 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 30d6f1e84e46..9d06eb945509 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
if (unlikely(!idev))
return true;
- return !!idev->cnf.ignore_routes_with_linkdown;
+ return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown);
}
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 6d3a20163260..9707ab54fdd5 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -30,6 +30,7 @@
FN(TCP_AOFAILURE) \
FN(SOCKET_BACKLOG) \
FN(TCP_FLAGS) \
+ FN(TCP_ABORT_ON_DATA) \
FN(TCP_ZEROWINDOW) \
FN(TCP_OLD_DATA) \
FN(TCP_OVERWINDOW) \
@@ -37,6 +38,7 @@
FN(TCP_RFC7323_PAWS) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_INVALID_ACK_SEQUENCE) \
FN(TCP_RESET) \
FN(TCP_INVALID_SYN) \
FN(TCP_CLOSE) \
@@ -54,6 +56,7 @@
FN(NEIGH_QUEUEFULL) \
FN(NEIGH_DEAD) \
FN(TC_EGRESS) \
+ FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
FN(CPU_BACKLOG) \
FN(XDP) \
@@ -105,7 +108,13 @@ enum skb_drop_reason {
SKB_CONSUMED,
/** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
SKB_DROP_REASON_NOT_SPECIFIED,
- /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ /**
+ * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used.
+ * Reason could be one of three cases:
+ * 1) no established/listening socket found during lookup process
+ * 2) no valid request socket during 3WHS process
+ * 3) no valid child socket during 3WHS process
+ */
SKB_DROP_REASON_NO_SOCKET,
/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
SKB_DROP_REASON_PKT_TOO_SMALL,
@@ -198,6 +207,11 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
SKB_DROP_REASON_TCP_FLAGS,
/**
+ * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to
+ * LINUX_MIB_TCPABORTONDATA
+ */
+ SKB_DROP_REASON_TCP_ABORT_ON_DATA,
+ /**
* @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
* see LINUX_MIB_TCPZEROWINDOWDROP
*/
@@ -221,13 +235,19 @@ enum skb_drop_reason {
SKB_DROP_REASON_TCP_OFOMERGE,
/**
* @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
+ * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
+ * field because ack sequence is not in the window between snd_una
+ * and snd_nxt
+ */
+ SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE,
/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
SKB_DROP_REASON_TCP_RESET,
/**
@@ -271,6 +291,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_NEIGH_DEAD,
/** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
SKB_DROP_REASON_TC_EGRESS,
+ /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
+ SKB_DROP_REASON_SECURITY_HOOK,
/**
* @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
* failed to enqueue to current qdisc)
diff --git a/include/net/eee.h b/include/net/eee.h
new file mode 100644
index 000000000000..84837aba3cd9
--- /dev/null
+++ b/include/net/eee.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _EEE_H
+#define _EEE_H
+
+#include <linux/types.h>
+
+struct eee_config {
+ u32 tx_lpi_timer;
+ bool tx_lpi_enabled;
+ bool eee_enabled;
+};
+
+static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg)
+{
+ /* eee_enabled is the master on/off */
+ if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled)
+ return false;
+
+ return true;
+}
+
+static inline void eeecfg_to_eee(struct ethtool_keee *eee,
+ const struct eee_config *eeecfg)
+{
+ eee->tx_lpi_timer = eeecfg->tx_lpi_timer;
+ eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled;
+ eee->eee_enabled = eeecfg->eee_enabled;
+}
+
+static inline void eee_to_eeecfg(struct eee_config *eeecfg,
+ const struct ethtool_keee *eee)
+{
+ eeecfg->tx_lpi_timer = eee->tx_lpi_timer;
+ eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled;
+ eeecfg->eee_enabled = eee->eee_enabled;
+}
+
+#endif
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index ecadba836ae5..9ece6e5a3ea8 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -153,7 +153,7 @@ static inline void *genl_info_userhdr(const struct genl_info *info)
/* Report that a root attribute is missing */
#define GENL_REQ_ATTR_CHECK(info, attr) ({ \
- struct genl_info *__info = (info); \
+ const struct genl_info *__info = (info); \
\
NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \
})
diff --git a/include/net/gro.h b/include/net/gro.h
index b435f0ddbf64..d6fc8fbd3730 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -9,6 +9,7 @@
#include <net/ip6_checksum.h>
#include <linux/skbuff.h>
#include <net/udp.h>
+#include <net/hotdata.h>
struct napi_gro_cb {
union {
@@ -139,21 +140,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
NAPI_GRO_CB(skb)->data_offset += len;
}
-static inline void *skb_gro_header_fast(struct sk_buff *skb,
+static inline void *skb_gro_header_fast(const struct sk_buff *skb,
unsigned int offset)
{
return NAPI_GRO_CB(skb)->frag0 + offset;
}
-static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
+static inline bool skb_gro_may_pull(const struct sk_buff *skb,
+ unsigned int hlen)
{
- return NAPI_GRO_CB(skb)->frag0_len < hlen;
-}
-
-static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
-{
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
}
static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
@@ -162,28 +158,30 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
if (!pskb_may_pull(skb, hlen))
return NULL;
- skb_gro_frag0_invalidate(skb);
return skb->data + offset;
}
-static inline void *skb_gro_header(struct sk_buff *skb,
- unsigned int hlen, unsigned int offset)
+static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
+ unsigned int offset)
{
void *ptr;
ptr = skb_gro_header_fast(skb, offset);
- if (skb_gro_header_hard(skb, hlen))
+ if (!skb_gro_may_pull(skb, hlen))
ptr = skb_gro_header_slow(skb, hlen, offset);
return ptr;
}
-static inline void *skb_gro_network_header(struct sk_buff *skb)
+static inline void *skb_gro_network_header(const struct sk_buff *skb)
{
- return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
- skb_network_offset(skb);
+ if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
+ return skb_gro_header_fast(skb, skb_network_offset(skb));
+
+ return skb_network_header(skb);
}
-static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct iphdr *iph = skb_gro_network_header(skb);
@@ -421,7 +419,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
return uh;
}
-static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
@@ -448,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(gro_normal_batch))
+ if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
gro_normal_list(napi);
}
@@ -495,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
#endif
}
-extern struct list_head offload_base;
-
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
new file mode 100644
index 000000000000..003667a1efd6
--- /dev/null
+++ b/include/net/hotdata.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_HOTDATA_H
+#define _NET_HOTDATA_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+
+/* Read mostly data used in network fast paths. */
+struct net_hotdata {
+#if IS_ENABLED(CONFIG_INET)
+ struct packet_offload ip_packet_offload;
+ struct net_offload tcpv4_offload;
+ struct net_protocol tcp_protocol;
+ struct net_offload udpv4_offload;
+ struct net_protocol udp_protocol;
+ struct packet_offload ipv6_packet_offload;
+ struct net_offload tcpv6_offload;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol tcpv6_protocol;
+ struct inet6_protocol udpv6_protocol;
+#endif
+ struct net_offload udpv6_offload;
+#endif
+ struct list_head offload_base;
+ struct list_head ptype_all;
+ struct kmem_cache *skbuff_cache;
+ struct kmem_cache *skbuff_fclone_cache;
+ struct kmem_cache *skb_small_head_cache;
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+ u32 rps_cpu_mask;
+#endif
+ int gro_normal_batch;
+ int netdev_budget;
+ int netdev_budget_usecs;
+ int tstamp_prequeue;
+ int max_backlog;
+ int dev_tx_weight;
+ int dev_rx_weight;
+};
+
+#define inet_ehash_secret net_hotdata.tcp_protocol.secret
+#define udp_ehash_secret net_hotdata.udp_protocol.secret
+#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
+#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
+#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
+#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret
+
+extern struct net_hotdata net_hotdata;
+
+#endif /* _NET_HOTDATA_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index f07642264c1e..238ad3349456 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -144,7 +144,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
struct fib6_info *aca_rt;
- struct ifacaddr6 *aca_next;
+ struct ifacaddr6 __rcu *aca_next;
struct hlist_node aca_addr_lst;
int aca_users;
refcount_t aca_refcnt;
@@ -196,7 +196,7 @@ struct inet6_dev {
spinlock_t mc_report_lock; /* mld query report lock */
struct mutex mc_lock; /* mld global lock */
- struct ifacaddr6 *ac_list;
+ struct ifacaddr6 __rcu *ac_list;
rwlock_t lock;
refcount_t refcnt;
__u32 if_flags;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index d94c242eb3ed..f9ddd47dc4f8 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -274,6 +274,7 @@ enum {
INET_FLAGS_REPFLOW = 27,
INET_FLAGS_RTALERT_ISOLATE = 28,
INET_FLAGS_SNDFLOW = 29,
+ INET_FLAGS_RTALERT = 30,
};
/* cmsg flags for inet */
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
index 781d2d8b2f29..2cbbee6e806a 100644
--- a/include/net/ioam6.h
+++ b/include/net/ioam6.h
@@ -12,6 +12,7 @@
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <linux/rhashtable-types.h>
struct ioam6_namespace {
@@ -65,4 +66,7 @@ void ioam6_exit(void);
int ioam6_iptunnel_init(void);
void ioam6_iptunnel_exit(void);
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len);
+
#endif /* _NET_IOAM6_H */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 52a51c69aa9d..a30c6aa9e5cf 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
if (idev)
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
out:
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d4667b7797e3..9b2f69ba5e49 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -264,6 +264,7 @@ struct fib_dump_filter {
bool filter_set;
bool dump_routes;
bool dump_exceptions;
+ bool rtnl_held;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index cf25ea21d770..88a8e554f7a1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
return 0;
}
-static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+static inline bool ipv6_accept_ra(const struct inet6_dev *idev)
{
+ s32 accept_ra = READ_ONCE(idev->cnf.accept_ra);
+
/* If forwarding is enabled, RA are not accepted unless the special
* hybrid mode (accept_ra=2) is enabled.
*/
- return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 :
- idev->cnf.accept_ra;
+ return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 :
+ accept_ra;
}
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 0dfae6f51a32..7b17c52e8ce2 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -250,6 +250,7 @@ struct mctp_route {
struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
mctp_eid_t daddr);
+/* always takes ownership of skb */
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 8b8ed4e13d74..1ec408585373 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -4,6 +4,62 @@
#include <linux/netdevice.h>
+/* See the netdev.yaml spec for definition of each statistic */
+struct netdev_queue_stats_rx {
+ u64 bytes;
+ u64 packets;
+ u64 alloc_fail;
+};
+
+struct netdev_queue_stats_tx {
+ u64 bytes;
+ u64 packets;
+};
+
+/**
+ * struct netdev_stat_ops - netdev ops for fine grained stats
+ * @get_queue_stats_rx: get stats for a given Rx queue
+ * @get_queue_stats_tx: get stats for a given Tx queue
+ * @get_base_stats: get base stats (not belonging to any live instance)
+ *
+ * Query stats for a given object. The values of the statistics are undefined
+ * on entry (specifically they are *not* zero-initialized). Drivers should
+ * assign values only to the statistics they collect. Statistics which are not
+ * collected must be left undefined.
+ *
+ * Queue objects are not necessarily persistent, and only currently active
+ * queues are queried by the per-queue callbacks. This means that per-queue
+ * statistics will not generally add up to the total number of events for
+ * the device. The @get_base_stats callback allows filling in the delta
+ * between events for currently live queues and overall device history.
+ * When the statistics for the entire device are queried, first @get_base_stats
+ * is issued to collect the delta, and then a series of per-queue callbacks.
+ * Only statistics which are set in @get_base_stats will be reported
+ * at the device level, meaning that unlike in queue callbacks, setting
+ * a statistic to zero in @get_base_stats is a legitimate thing to do.
+ * This is because @get_base_stats has a second function of designating which
+ * statistics are in fact correct for the entire device (e.g. when history
+ * for some of the events is not maintained, and reliable "total" cannot
+ * be provided).
+ *
+ * Device drivers can assume that when collecting total device stats,
+ * the @get_base_stats and subsequent per-queue calls are performed
+ * "atomically" (without releasing the rtnl_lock).
+ *
+ * Device drivers are encouraged to reset the per-queue statistics when
+ * number of queues change. This is because the primary use case for
+ * per-queue statistics is currently to detect traffic imbalance.
+ */
+struct netdev_stat_ops {
+ void (*get_queue_stats_rx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_rx *stats);
+ void (*get_queue_stats_tx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_tx *stats);
+ void (*get_base_stats)(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx);
+};
+
/**
* DOC: Lockless queue stopping / waking helpers.
*
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 6647ad509faa..7ec9cc80f11c 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -47,6 +47,8 @@ struct nh_config {
bool nh_grp_res_has_idle_timer;
bool nh_grp_res_has_unbalanced_timer;
+ bool nh_hw_stats;
+
struct nlattr *nh_encap;
u16 nh_encap_type;
@@ -95,8 +97,14 @@ struct nh_res_table {
struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
};
+struct nh_grp_entry_stats {
+ u64_stats_t packets;
+ struct u64_stats_sync syncp;
+};
+
struct nh_grp_entry {
struct nexthop *nh;
+ struct nh_grp_entry_stats __percpu *stats;
u8 weight;
union {
@@ -114,6 +122,7 @@ struct nh_grp_entry {
struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */
+ u64 packets_hw;
};
struct nh_group {
@@ -124,6 +133,7 @@ struct nh_group {
bool resilient;
bool fdb_nh;
bool has_v4;
+ bool hw_stats;
struct nh_res_table __rcu *res_table;
struct nh_grp_entry nh_entries[] __counted_by(num_nh);
@@ -157,6 +167,7 @@ enum nexthop_event_type {
NEXTHOP_EVENT_REPLACE,
NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
NEXTHOP_EVENT_BUCKET_REPLACE,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
};
enum nh_notifier_info_type {
@@ -164,6 +175,7 @@ enum nh_notifier_info_type {
NH_NOTIFIER_INFO_TYPE_GRP,
NH_NOTIFIER_INFO_TYPE_RES_TABLE,
NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
+ NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
};
struct nh_notifier_single_info {
@@ -187,6 +199,7 @@ struct nh_notifier_grp_entry_info {
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
+ bool hw_stats;
struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
};
@@ -200,9 +213,21 @@ struct nh_notifier_res_bucket_info {
struct nh_notifier_res_table_info {
u16 num_nh_buckets;
+ bool hw_stats;
struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
};
+struct nh_notifier_grp_hw_stats_entry_info {
+ u32 id;
+ u64 packets;
+};
+
+struct nh_notifier_grp_hw_stats_info {
+ u16 num_nh;
+ bool hw_stats_used;
+ struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
+};
+
struct nh_notifier_info {
struct net *net;
struct netlink_ext_ack *extack;
@@ -213,6 +238,7 @@ struct nh_notifier_info {
struct nh_notifier_grp_info *nh_grp;
struct nh_notifier_res_table_info *nh_res_table;
struct nh_notifier_res_bucket_info *nh_res_bucket;
+ struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
};
};
@@ -225,6 +251,9 @@ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
bool offload, bool trap);
void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
unsigned long *activity);
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@ -317,7 +346,7 @@ static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
u8 rt_family)
{
- struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 5dee575fbe86..3d07abacf08b 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -196,7 +196,7 @@ struct nfc_dev {
};
#define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
-extern struct class nfc_class;
+extern const struct class nfc_class;
struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6aef8cb11cc8..b2499f88f8f8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -46,6 +46,7 @@ struct net_protocol {
* socket lookup?
*/
icmp_strict_tag_validation:1;
+ u32 secret;
};
#if IS_ENABLED(CONFIG_IPV6)
@@ -59,6 +60,7 @@ struct inet6_protocol {
__be32 info);
unsigned int flags; /* INET6_PROTO_xxx */
+ u32 secret;
};
#define INET6_PROTO_NOPOLICY 0x1
@@ -68,6 +70,7 @@ struct inet6_protocol {
struct net_offload {
struct offload_callbacks callbacks;
unsigned int flags; /* Flags used by IPv6 for now */
+ u32 secret;
};
/* This should be set for any extension header which is compatible with GSO. */
#define INET6_PROTO_GSO_EXTHDR 0x1
diff --git a/include/net/rps.h b/include/net/rps.h
new file mode 100644
index 000000000000..7660243e905b
--- /dev/null
+++ b/include/net/rps.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_RPS_H
+#define _NET_RPS_H
+
+#include <linux/types.h>
+#include <linux/static_key.h>
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+#ifdef CONFIG_RPS
+
+extern struct static_key_false rps_needed;
+extern struct static_key_false rfs_needed;
+
+/*
+ * This structure holds an RPS map which can be of variable length. The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+ unsigned int len;
+ struct rcu_head rcu;
+ u16 cpus[];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
+
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
+ * tail pointer for that CPU's input queue at the time of last enqueue, and
+ * a hardware filter index.
+ */
+struct rps_dev_flow {
+ u16 cpu;
+ u16 filter;
+ unsigned int last_qtail;
+};
+#define RPS_NO_FILTER 0xffff
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+ unsigned int mask;
+ struct rcu_head rcu;
+ struct rps_dev_flow flows[];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+ ((_num) * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
+ */
+struct rps_sock_flow_table {
+ u32 mask;
+
+ u32 ents[] ____cacheline_aligned_in_smp;
+};
+#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
+
+#define RPS_NO_CPU 0xffff
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+ u32 hash)
+{
+ unsigned int index = hash & table->mask;
+ u32 val = hash & ~net_hotdata.rps_cpu_mask;
+
+ /* We only give a hint, preemption can change CPU under us */
+ val |= raw_smp_processor_id();
+
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in get_rps_cpu().
+ */
+ if (READ_ONCE(table->ents[index]) != val)
+ WRITE_ONCE(table->ents[index], val);
+}
+
+#endif /* CONFIG_RPS */
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ if (!hash)
+ return;
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+ if (sock_flow_table)
+ rps_record_sock_flow(sock_flow_table, hash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (static_branch_unlikely(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
+ }
+#endif
+}
+
+#endif /* _NET_RPS_H */
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 6506221c5fe3..3bfb80bad173 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
+ RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
};
enum rtnl_kinds {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 934fdb977551..cefe0c4bdae3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -238,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
{
-#ifdef CONFIG_BQL
- /* Non-BQL migrated drivers will return 0, too. */
- return dql_avail(&txq->dql);
-#else
- return 0;
-#endif
+ return netdev_queue_dql_avail(txq);
}
struct Qdisc_class_ops {
diff --git a/include/net/sock.h b/include/net/sock.h
index 796a902cf4c1..b5e00702acc1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
-static inline void sock_rps_record_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void sock_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
- }
-#endif
-}
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
@@ -1443,6 +1408,7 @@ sk_memory_allocated(const struct sock *sk)
/* 1 MB per cpu, in page units */
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+extern int sysctl_mem_pcpu_rsv;
static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
@@ -1451,7 +1417,7 @@ sk_memory_allocated_add(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
@@ -1465,7 +1431,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 33bf92dff0af..6ae35199d3b3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
+enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -396,8 +396,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race);
-int tcp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
+enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
void tcp_clear_retrans(struct tcp_sock *tp);