summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/bluetooth/hci_core.h7
-rw-r--r--include/net/gro.h43
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip.h17
-rw-r--r--include/net/ipv6.h8
-rw-r--r--include/net/netfilter/nf_tables.h121
-rw-r--r--include/net/route.h4
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sock.h11
-rw-r--r--include/net/tcp.h31
-rw-r--r--include/net/vxlan.h17
-rw-r--r--include/net/xfrm.h1
12 files changed, 164 insertions, 109 deletions
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 9654567cfae3..e01d52cb668c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -593,9 +593,7 @@ struct hci_dev {
const char *fw_info;
struct dentry *debugfs;
-#ifdef CONFIG_DEV_COREDUMP
struct hci_devcoredump dump;
-#endif
struct device dev;
@@ -822,6 +820,7 @@ struct hci_conn_params {
struct hci_conn *conn;
bool explicit_connect;
+ /* Accessed without hdev->lock: */
hci_conn_flags_t flags;
u8 privacy_mode;
};
@@ -1573,7 +1572,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type);
void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
void hci_conn_params_clear_disabled(struct hci_dev *hdev);
+void hci_conn_params_free(struct hci_conn_params *param);
+void hci_pend_le_list_del_init(struct hci_conn_params *param);
+void hci_pend_le_list_add(struct hci_conn_params *param,
+ struct list_head *list);
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
bdaddr_t *addr,
u8 addr_type);
diff --git a/include/net/gro.h b/include/net/gro.h
index 75efa6fb8441..88644b3ca660 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -452,6 +452,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
gro_normal_list(napi);
}
+/* This function is the alternative of 'inet_iif' and 'inet_sdif'
+ * functions in case we can not rely on fields of IPCB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ *iif = inet_iif(skb) ?: skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
+/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
+ * functions in case we can not rely on fields of IP6CB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
+ *iif = skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
extern struct list_head offload_base;
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index caa20a905531..491ceb7ebe5d 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+ u32 mark = READ_ONCE(sk->sk_mark);
+
+ if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
- return sk->sk_mark;
+ return mark;
}
static inline int inet_request_bound_dev_if(const struct sock *sk,
@@ -221,8 +222,8 @@ struct inet_sock {
__s16 uc_ttl;
__u16 cmsg_flags;
struct ip_options_rcu __rcu *inet_opt;
+ atomic_t inet_id;
__be16 inet_sport;
- __u16 inet_id;
__u8 tos;
__u8 min_ttl;
diff --git a/include/net/ip.h b/include/net/ip.h
index 50d435855ae2..19adacd5ece0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
{
ipcm_init(ipcm);
- ipcm->sockc.mark = inet->sk.sk_mark;
+ ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
@@ -538,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
* generator as much as we can.
*/
if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
+ int val;
+
+ /* avoid atomic operations for TCP,
+ * as we hold socket lock at this point.
+ */
+ if (sk_is_tcp(sk)) {
+ sock_owned_by_me(sk);
+ val = atomic_read(&inet_sk(sk)->inet_id);
+ atomic_set(&inet_sk(sk)->inet_id, val + segs);
+ } else {
+ val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+ }
+ iph->id = htons(val);
return;
}
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7332296eca44..2acc4c808d45 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -752,12 +752,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
/* more secured version of ipv6_addr_hash() */
static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
{
- u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
-
- return jhash_3words(v,
- (__force u32)a->s6_addr32[2],
- (__force u32)a->s6_addr32[3],
- initval);
+ return jhash2((__force const u32 *)a->s6_addr32,
+ ARRAY_SIZE(a->s6_addr32), initval);
}
static inline bool ipv6_addr_loopback(const struct in6_addr *a)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 640441a2f926..e9ae567c037d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
*
* @list: table set list node
* @bindings: list of set bindings
+ * @refs: internal refcounting for async set destruction
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
@@ -533,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
+ * @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
@@ -541,6 +543,7 @@ struct nft_set_elem_expr {
struct nft_set {
struct list_head list;
struct list_head bindings;
+ refcount_t refs;
struct nft_table *table;
possible_net_t net;
char *name;
@@ -562,7 +565,8 @@ struct nft_set {
struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:14,
+ u16 flags:13,
+ dead:1,
genmask:2;
u8 klen;
u8 dlen;
@@ -596,7 +600,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -813,62 +816,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem);
-/**
- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- * @rcu: rcu head
- * @set: set the elements belong to
- * @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
- struct rcu_head rcu;
- const struct nft_set *set;
- unsigned int cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
- sizeof(struct nft_set_gc_batch_head)) / \
- sizeof(void *))
-
-/**
- * struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- * @head: GC batch head
- * @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
- struct nft_set_gc_batch_head head;
- void *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
- if (gcb != NULL)
- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
- gfp_t gfp)
-{
- if (gcb != NULL) {
- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
- return gcb;
- nft_set_gc_batch_complete(gcb);
- }
- return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
- void *elem)
-{
- gcb->elems[gcb->head.cnt++] = elem;
-}
-
struct nft_expr_ops;
/**
* struct nft_expr_type - nf_tables expression type
@@ -1557,39 +1504,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT 2
+#define NFT_SET_ELEM_DEAD_BIT 2
#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
/**
@@ -1732,6 +1670,38 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable_flags(trans) \
(((struct nft_trans_flowtable *)trans->data)->flags)
+#define NFT_TRANS_GC_BATCHCOUNT 256
+
+struct nft_trans_gc {
+ struct list_head list;
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+ u8 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1758,6 +1728,7 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
+ unsigned int gc_seq;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/net/route.h b/include/net/route.h
index 5a5c726472bd..8c2a8e7d8f8e 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
{
- flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
+ flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
@@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+ flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
src, dport, sport, sk->sk_uid);
}
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index d9076a7a430c..6506221c5fe3 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
u32 portid, const struct nlmsghdr *nlh);
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr);
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr);
struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
diff --git a/include/net/sock.h b/include/net/sock.h
index 2eb916d1ff64..690e22139543 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1323,6 +1323,7 @@ struct proto {
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
+ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
@@ -1420,6 +1421,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
return sk->sk_prot->memory_pressure != NULL;
}
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure &&
+ !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
static inline bool sk_under_memory_pressure(const struct sock *sk)
{
if (!sk->sk_prot->memory_pressure)
@@ -1429,7 +1436,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return !!*sk->sk_prot->memory_pressure;
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
static inline long
@@ -1506,7 +1513,7 @@ proto_memory_pressure(struct proto *prot)
{
if (!prot->memory_pressure)
return false;
- return !!*prot->memory_pressure;
+ return !!READ_ONCE(*prot->memory_pressure);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 226bce6d1e8c..0ca972ebd3dd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1509,25 +1509,38 @@ void tcp_leave_memory_pressure(struct sock *sk);
static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_intvl ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
+ * and do_tcp_setsockopt().
+ */
+ val = READ_ONCE(tp->keepalive_intvl);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_time ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
+ val = READ_ONCE(tp->keepalive_time);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_probes ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
+ * and do_tcp_setsockopt().
+ */
+ val = READ_ONCE(tp->keepalive_probes);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -2048,7 +2061,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ u32 val;
+
+ val = READ_ONCE(tp->notsent_lowat);
+
+ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0be91ca78d3a..6a9f8a5f387c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -386,10 +386,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
return features;
}
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
-/* IPv6 header + UDP + VXLAN + Ethernet header */
-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
+static inline int vxlan_headroom(u32 flags)
+{
+ /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */
+ /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */
+ return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr)) +
+ sizeof(struct udphdr) + sizeof(struct vxlanhdr) +
+ (flags & VXLAN_F_GPE ? 0 : ETH_HLEN);
+}
static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
{
@@ -551,12 +556,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
}
static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
- int hash,
+ u32 hash,
struct vxlan_rdst *rdst)
{
struct fib_nh_common *nhc;
- nhc = nexthop_path_fdb_result(nh, hash);
+ nhc = nexthop_path_fdb_result(nh, hash >> 1);
if (unlikely(!nhc))
return false;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 151ca95dd08d..363c7d510554 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
netdev_put(dev, &xso->dev_tracker);
}
}