diff options
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/addrconf.h | 2 | ||||
-rw-r--r-- | include/net/dropreason-core.h | 26 | ||||
-rw-r--r-- | include/net/eee.h | 38 | ||||
-rw-r--r-- | include/net/genetlink.h | 2 | ||||
-rw-r--r-- | include/net/gro.h | 39 | ||||
-rw-r--r-- | include/net/hotdata.h | 52 | ||||
-rw-r--r-- | include/net/if_inet6.h | 4 | ||||
-rw-r--r-- | include/net/inet_sock.h | 1 | ||||
-rw-r--r-- | include/net/ioam6.h | 4 | ||||
-rw-r--r-- | include/net/ip6_route.h | 2 | ||||
-rw-r--r-- | include/net/ip_fib.h | 1 | ||||
-rw-r--r-- | include/net/ipv6.h | 8 | ||||
-rw-r--r-- | include/net/mctp.h | 1 | ||||
-rw-r--r-- | include/net/netdev_queues.h | 56 | ||||
-rw-r--r-- | include/net/nexthop.h | 31 | ||||
-rw-r--r-- | include/net/nfc/nfc.h | 2 | ||||
-rw-r--r-- | include/net/protocol.h | 3 | ||||
-rw-r--r-- | include/net/rps.h | 125 | ||||
-rw-r--r-- | include/net/rtnetlink.h | 1 | ||||
-rw-r--r-- | include/net/sch_generic.h | 7 | ||||
-rw-r--r-- | include/net/sock.h | 40 | ||||
-rw-r--r-- | include/net/tcp.h | 6 |
22 files changed, 372 insertions, 79 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 30d6f1e84e46..9d06eb945509 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) if (unlikely(!idev)) return true; - return !!idev->cnf.ignore_routes_with_linkdown; + return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 6d3a20163260..9707ab54fdd5 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -30,6 +30,7 @@ FN(TCP_AOFAILURE) \ FN(SOCKET_BACKLOG) \ FN(TCP_FLAGS) \ + FN(TCP_ABORT_ON_DATA) \ FN(TCP_ZEROWINDOW) \ FN(TCP_OLD_DATA) \ FN(TCP_OVERWINDOW) \ @@ -37,6 +38,7 @@ FN(TCP_RFC7323_PAWS) \ FN(TCP_OLD_SEQUENCE) \ FN(TCP_INVALID_SEQUENCE) \ + FN(TCP_INVALID_ACK_SEQUENCE) \ FN(TCP_RESET) \ FN(TCP_INVALID_SYN) \ FN(TCP_CLOSE) \ @@ -54,6 +56,7 @@ FN(NEIGH_QUEUEFULL) \ FN(NEIGH_DEAD) \ FN(TC_EGRESS) \ + FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ FN(CPU_BACKLOG) \ FN(XDP) \ @@ -105,7 +108,13 @@ enum skb_drop_reason { SKB_CONSUMED, /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ SKB_DROP_REASON_NOT_SPECIFIED, - /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ + /** + * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used. + * Reason could be one of three cases: + * 1) no established/listening socket found during lookup process + * 2) no valid request socket during 3WHS process + * 3) no valid child socket during 3WHS process + */ SKB_DROP_REASON_NO_SOCKET, /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ SKB_DROP_REASON_PKT_TOO_SMALL, @@ -198,6 +207,11 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */ SKB_DROP_REASON_TCP_FLAGS, /** + * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to + * LINUX_MIB_TCPABORTONDATA + */ + SKB_DROP_REASON_TCP_ABORT_ON_DATA, + /** * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero, * see LINUX_MIB_TCPZEROWINDOWDROP */ @@ -221,13 +235,19 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_OFOMERGE, /** * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED + * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED */ SKB_DROP_REASON_TCP_RFC7323_PAWS, /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */ SKB_DROP_REASON_TCP_OLD_SEQUENCE, /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** + * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ + * field because ack sequence is not in the window between snd_una + * and snd_nxt + */ + SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE, /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */ SKB_DROP_REASON_TCP_RESET, /** @@ -271,6 +291,8 @@ enum skb_drop_reason { SKB_DROP_REASON_NEIGH_DEAD, /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ SKB_DROP_REASON_TC_EGRESS, + /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */ + SKB_DROP_REASON_SECURITY_HOOK, /** * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( * failed to enqueue to current qdisc) diff --git a/include/net/eee.h b/include/net/eee.h new file mode 100644 index 000000000000..84837aba3cd9 --- /dev/null +++ b/include/net/eee.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _EEE_H +#define _EEE_H + +#include <linux/types.h> + +struct eee_config { + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_enabled; +}; + +static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg) +{ + /* eee_enabled is the master on/off */ + if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled) + return false; + + return true; +} + +static inline void eeecfg_to_eee(struct ethtool_keee *eee, + const struct eee_config *eeecfg) +{ + eee->tx_lpi_timer = eeecfg->tx_lpi_timer; + eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled; + eee->eee_enabled = eeecfg->eee_enabled; +} + +static inline void eee_to_eeecfg(struct eee_config *eeecfg, + const struct ethtool_keee *eee) +{ + eeecfg->tx_lpi_timer = eee->tx_lpi_timer; + eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled; + eeecfg->eee_enabled = eee->eee_enabled; +} + +#endif diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ecadba836ae5..9ece6e5a3ea8 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -153,7 +153,7 @@ static inline void *genl_info_userhdr(const struct genl_info *info) /* Report that a root attribute is missing */ #define GENL_REQ_ATTR_CHECK(info, attr) ({ \ - struct genl_info *__info = (info); \ + const struct genl_info *__info = (info); \ \ NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \ }) diff --git a/include/net/gro.h b/include/net/gro.h index b435f0ddbf64..d6fc8fbd3730 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -9,6 +9,7 @@ #include <net/ip6_checksum.h> #include <linux/skbuff.h> #include <net/udp.h> +#include <net/hotdata.h> struct napi_gro_cb { union { @@ -139,21 +140,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void *skb_gro_header_fast(struct sk_buff *skb, +static inline void *skb_gro_header_fast(const struct sk_buff *skb, unsigned int offset) { return NAPI_GRO_CB(skb)->frag0 + offset; } -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +static inline bool skb_gro_may_pull(const struct sk_buff *skb, + unsigned int hlen) { - return NAPI_GRO_CB(skb)->frag0_len < hlen; -} - -static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); } static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, @@ -162,28 +158,30 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, if (!pskb_may_pull(skb, hlen)) return NULL; - skb_gro_frag0_invalidate(skb); return skb->data + offset; } -static inline void *skb_gro_header(struct sk_buff *skb, - unsigned int hlen, unsigned int offset) +static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) { void *ptr; ptr = skb_gro_header_fast(skb, offset); - if (skb_gro_header_hard(skb, hlen)) + if (!skb_gro_may_pull(skb, hlen)) ptr = skb_gro_header_slow(skb, hlen, offset); return ptr; } -static inline void *skb_gro_network_header(struct sk_buff *skb) +static inline void *skb_gro_network_header(const struct sk_buff *skb) { - return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + - skb_network_offset(skb); + if (skb_gro_may_pull(skb, skb_gro_offset(skb))) + return skb_gro_header_fast(skb, skb_network_offset(skb)); + + return skb_network_header(skb); } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct iphdr *iph = skb_gro_network_header(skb); @@ -421,7 +419,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) return uh; } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct ipv6hdr *iph = skb_gro_network_header(skb); @@ -448,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, { list_add_tail(&skb->list, &napi->rx_list); napi->rx_count += segs; - if (napi->rx_count >= READ_ONCE(gro_normal_batch)) + if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) gro_normal_list(napi); } @@ -495,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int * #endif } -extern struct list_head offload_base; - #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/hotdata.h b/include/net/hotdata.h new file mode 100644 index 000000000000..003667a1efd6 --- /dev/null +++ b/include/net/hotdata.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_HOTDATA_H +#define _NET_HOTDATA_H + +#include <linux/types.h> +#include <linux/netdevice.h> +#include <net/protocol.h> + +/* Read mostly data used in network fast paths. */ +struct net_hotdata { +#if IS_ENABLED(CONFIG_INET) + struct packet_offload ip_packet_offload; + struct net_offload tcpv4_offload; + struct net_protocol tcp_protocol; + struct net_offload udpv4_offload; + struct net_protocol udp_protocol; + struct packet_offload ipv6_packet_offload; + struct net_offload tcpv6_offload; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol tcpv6_protocol; + struct inet6_protocol udpv6_protocol; +#endif + struct net_offload udpv6_offload; +#endif + struct list_head offload_base; + struct list_head ptype_all; + struct kmem_cache *skbuff_cache; + struct kmem_cache *skbuff_fclone_cache; + struct kmem_cache *skb_small_head_cache; +#ifdef CONFIG_RPS + struct rps_sock_flow_table __rcu *rps_sock_flow_table; + u32 rps_cpu_mask; +#endif + int gro_normal_batch; + int netdev_budget; + int netdev_budget_usecs; + int tstamp_prequeue; + int max_backlog; + int dev_tx_weight; + int dev_rx_weight; +}; + +#define inet_ehash_secret net_hotdata.tcp_protocol.secret +#define udp_ehash_secret net_hotdata.udp_protocol.secret +#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret +#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret +#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret +#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret + +extern struct net_hotdata net_hotdata; + +#endif /* _NET_HOTDATA_H */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f07642264c1e..238ad3349456 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -144,7 +144,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; - struct ifacaddr6 *aca_next; + struct ifacaddr6 __rcu *aca_next; struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; @@ -196,7 +196,7 @@ struct inet6_dev { spinlock_t mc_report_lock; /* mld query report lock */ struct mutex mc_lock; /* mld global lock */ - struct ifacaddr6 *ac_list; + struct ifacaddr6 __rcu *ac_list; rwlock_t lock; refcount_t refcnt; __u32 if_flags; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index d94c242eb3ed..f9ddd47dc4f8 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_REPFLOW = 27, INET_FLAGS_RTALERT_ISOLATE = 28, INET_FLAGS_SNDFLOW = 29, + INET_FLAGS_RTALERT = 30, }; /* cmsg flags for inet */ diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 781d2d8b2f29..2cbbee6e806a 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -12,6 +12,7 @@ #include <linux/net.h> #include <linux/ipv6.h> #include <linux/ioam6.h> +#include <linux/ioam6_genl.h> #include <linux/rhashtable-types.h> struct ioam6_namespace { @@ -65,4 +66,7 @@ void ioam6_exit(void); int ioam6_iptunnel_init(void); void ioam6_iptunnel_exit(void); +void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp, + void *opt, unsigned int opt_len); + #endif /* _NET_IOAM6_H */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 52a51c69aa9d..a30c6aa9e5cf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); out: diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index d4667b7797e3..9b2f69ba5e49 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -264,6 +264,7 @@ struct fib_dump_filter { bool filter_set; bool dump_routes; bool dump_exceptions; + bool rtnl_held; unsigned char protocol; unsigned char rt_type; unsigned int flags; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf25ea21d770..88a8e554f7a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) return 0; } -static inline bool ipv6_accept_ra(struct inet6_dev *idev) +static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { + s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); + /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ - return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 : - idev->cnf.accept_ra; + return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : + accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ diff --git a/include/net/mctp.h b/include/net/mctp.h index 0dfae6f51a32..7b17c52e8ce2 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -250,6 +250,7 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); +/* always takes ownership of skb */ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 8b8ed4e13d74..1ec408585373 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,62 @@ #include <linux/netdevice.h> +/* See the netdev.yaml spec for definition of each statistic */ +struct netdev_queue_stats_rx { + u64 bytes; + u64 packets; + u64 alloc_fail; +}; + +struct netdev_queue_stats_tx { + u64 bytes; + u64 packets; +}; + +/** + * struct netdev_stat_ops - netdev ops for fine grained stats + * @get_queue_stats_rx: get stats for a given Rx queue + * @get_queue_stats_tx: get stats for a given Tx queue + * @get_base_stats: get base stats (not belonging to any live instance) + * + * Query stats for a given object. The values of the statistics are undefined + * on entry (specifically they are *not* zero-initialized). Drivers should + * assign values only to the statistics they collect. Statistics which are not + * collected must be left undefined. + * + * Queue objects are not necessarily persistent, and only currently active + * queues are queried by the per-queue callbacks. This means that per-queue + * statistics will not generally add up to the total number of events for + * the device. The @get_base_stats callback allows filling in the delta + * between events for currently live queues and overall device history. + * When the statistics for the entire device are queried, first @get_base_stats + * is issued to collect the delta, and then a series of per-queue callbacks. + * Only statistics which are set in @get_base_stats will be reported + * at the device level, meaning that unlike in queue callbacks, setting + * a statistic to zero in @get_base_stats is a legitimate thing to do. + * This is because @get_base_stats has a second function of designating which + * statistics are in fact correct for the entire device (e.g. when history + * for some of the events is not maintained, and reliable "total" cannot + * be provided). + * + * Device drivers can assume that when collecting total device stats, + * the @get_base_stats and subsequent per-queue calls are performed + * "atomically" (without releasing the rtnl_lock). + * + * Device drivers are encouraged to reset the per-queue statistics when + * number of queues change. This is because the primary use case for + * per-queue statistics is currently to detect traffic imbalance. + */ +struct netdev_stat_ops { + void (*get_queue_stats_rx)(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *stats); + void (*get_queue_stats_tx)(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *stats); + void (*get_base_stats)(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx); +}; + /** * DOC: Lockless queue stopping / waking helpers. * diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 6647ad509faa..7ec9cc80f11c 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -47,6 +47,8 @@ struct nh_config { bool nh_grp_res_has_idle_timer; bool nh_grp_res_has_unbalanced_timer; + bool nh_hw_stats; + struct nlattr *nh_encap; u16 nh_encap_type; @@ -95,8 +97,14 @@ struct nh_res_table { struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); }; +struct nh_grp_entry_stats { + u64_stats_t packets; + struct u64_stats_sync syncp; +}; + struct nh_grp_entry { struct nexthop *nh; + struct nh_grp_entry_stats __percpu *stats; u8 weight; union { @@ -114,6 +122,7 @@ struct nh_grp_entry { struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ + u64 packets_hw; }; struct nh_group { @@ -124,6 +133,7 @@ struct nh_group { bool resilient; bool fdb_nh; bool has_v4; + bool hw_stats; struct nh_res_table __rcu *res_table; struct nh_grp_entry nh_entries[] __counted_by(num_nh); @@ -157,6 +167,7 @@ enum nexthop_event_type { NEXTHOP_EVENT_REPLACE, NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, NEXTHOP_EVENT_BUCKET_REPLACE, + NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, }; enum nh_notifier_info_type { @@ -164,6 +175,7 @@ enum nh_notifier_info_type { NH_NOTIFIER_INFO_TYPE_GRP, NH_NOTIFIER_INFO_TYPE_RES_TABLE, NH_NOTIFIER_INFO_TYPE_RES_BUCKET, + NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, }; struct nh_notifier_single_info { @@ -187,6 +199,7 @@ struct nh_notifier_grp_entry_info { struct nh_notifier_grp_info { u16 num_nh; bool is_fdb; + bool hw_stats; struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); }; @@ -200,9 +213,21 @@ struct nh_notifier_res_bucket_info { struct nh_notifier_res_table_info { u16 num_nh_buckets; + bool hw_stats; struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; +struct nh_notifier_grp_hw_stats_entry_info { + u32 id; + u64 packets; +}; + +struct nh_notifier_grp_hw_stats_info { + u16 num_nh; + bool hw_stats_used; + struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); +}; + struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; @@ -213,6 +238,7 @@ struct nh_notifier_info { struct nh_notifier_grp_info *nh_grp; struct nh_notifier_res_table_info *nh_res_table; struct nh_notifier_res_bucket_info *nh_res_bucket; + struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; }; }; @@ -225,6 +251,9 @@ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, bool offload, bool trap); void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, unsigned long *activity); +void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, + unsigned int nh_idx, + u64 delta_packets); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); @@ -317,7 +346,7 @@ static inline int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, u8 rt_family) { - struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 5dee575fbe86..3d07abacf08b 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -196,7 +196,7 @@ struct nfc_dev { }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) -extern struct class nfc_class; +extern const struct class nfc_class; struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, diff --git a/include/net/protocol.h b/include/net/protocol.h index 6aef8cb11cc8..b2499f88f8f8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -46,6 +46,7 @@ struct net_protocol { * socket lookup? */ icmp_strict_tag_validation:1; + u32 secret; }; #if IS_ENABLED(CONFIG_IPV6) @@ -59,6 +60,7 @@ struct inet6_protocol { __be32 info); unsigned int flags; /* INET6_PROTO_xxx */ + u32 secret; }; #define INET6_PROTO_NOPOLICY 0x1 @@ -68,6 +70,7 @@ struct inet6_protocol { struct net_offload { struct offload_callbacks callbacks; unsigned int flags; /* Flags used by IPv6 for now */ + u32 secret; }; /* This should be set for any extension header which is compatible with GSO. */ #define INET6_PROTO_GSO_EXTHDR 0x1 diff --git a/include/net/rps.h b/include/net/rps.h new file mode 100644 index 000000000000..7660243e905b --- /dev/null +++ b/include/net/rps.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_RPS_H +#define _NET_RPS_H + +#include <linux/types.h> +#include <linux/static_key.h> +#include <net/sock.h> +#include <net/hotdata.h> + +#ifdef CONFIG_RPS + +extern struct static_key_false rps_needed; +extern struct static_key_false rfs_needed; + +/* + * This structure holds an RPS map which can be of variable length. The + * map is an array of CPUs. + */ +struct rps_map { + unsigned int len; + struct rcu_head rcu; + u16 cpus[]; +}; +#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) + +/* + * The rps_dev_flow structure contains the mapping of a flow to a CPU, the + * tail pointer for that CPU's input queue at the time of last enqueue, and + * a hardware filter index. + */ +struct rps_dev_flow { + u16 cpu; + u16 filter; + unsigned int last_qtail; +}; +#define RPS_NO_FILTER 0xffff + +/* + * The rps_dev_flow_table structure contains a table of flow mappings. + */ +struct rps_dev_flow_table { + unsigned int mask; + struct rcu_head rcu; + struct rps_dev_flow flows[]; +}; +#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ + ((_num) * sizeof(struct rps_dev_flow))) + +/* + * The rps_sock_flow_table contains mappings of flows to the last CPU + * on which they were processed by the application (set in recvmsg). + * Each entry is a 32bit value. Upper part is the high-order bits + * of flow hash, lower part is CPU number. + * rps_cpu_mask is used to partition the space, depending on number of + * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, + * meaning we use 32-6=26 bits for the hash. + */ +struct rps_sock_flow_table { + u32 mask; + + u32 ents[] ____cacheline_aligned_in_smp; +}; +#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) + +#define RPS_NO_CPU 0xffff + +static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, + u32 hash) +{ + unsigned int index = hash & table->mask; + u32 val = hash & ~net_hotdata.rps_cpu_mask; + + /* We only give a hint, preemption can change CPU under us */ + val |= raw_smp_processor_id(); + + /* The following WRITE_ONCE() is paired with the READ_ONCE() + * here, and another one in get_rps_cpu(). + */ + if (READ_ONCE(table->ents[index]) != val) + WRITE_ONCE(table->ents[index], val); +} + +#endif /* CONFIG_RPS */ + +static inline void sock_rps_record_flow_hash(__u32 hash) +{ +#ifdef CONFIG_RPS + struct rps_sock_flow_table *sock_flow_table; + + if (!hash) + return; + rcu_read_lock(); + sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); + if (sock_flow_table) + rps_record_sock_flow(sock_flow_table, hash); + rcu_read_unlock(); +#endif +} + +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (static_branch_unlikely(&rfs_needed)) { + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). + */ + sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); + } + } +#endif +} + +#endif /* _NET_RPS_H */ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 6506221c5fe3..3bfb80bad173 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), + RTNL_FLAG_DUMP_UNLOCKED = BIT(2), }; enum rtnl_kinds { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 934fdb977551..cefe0c4bdae3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -238,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) { -#ifdef CONFIG_BQL - /* Non-BQL migrated drivers will return 0, too. */ - return dql_avail(&txq->dql); -#else - return 0; -#endif + return netdev_queue_dql_avail(txq); } struct Qdisc_class_ops { diff --git a/include/net/sock.h b/include/net/sock.h index 796a902cf4c1..b5e00702acc1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk) WRITE_ONCE(sk->sk_incoming_cpu, cpu); } -static inline void sock_rps_record_flow_hash(__u32 hash) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_record_sock_flow(sock_flow_table, hash); - rcu_read_unlock(); -#endif -} - -static inline void sock_rps_record_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - if (static_branch_unlikely(&rfs_needed)) { - /* Reading sk->sk_rxhash might incur an expensive cache line - * miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. - */ - if (sk->sk_state == TCP_ESTABLISHED) { - /* This READ_ONCE() is paired with the WRITE_ONCE() - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). - */ - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); - } - } -#endif -} static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) @@ -1443,6 +1408,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) +extern int sysctl_mem_pcpu_rsv; static inline void sk_memory_allocated_add(struct sock *sk, int amt) @@ -1451,7 +1417,7 @@ sk_memory_allocated_add(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -1465,7 +1431,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } diff --git a/include/net/tcp.h b/include/net/tcp.h index 33bf92dff0af..6ae35199d3b3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); -int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); +enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); @@ -396,8 +396,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); -int tcp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); +enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); |