From c9f1f58dc2eba550f208809d272bf0b14f41edba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2017 06:34:19 -0800 Subject: net: sk_pacing_shift_update() helper In commit 3a9b76fd0db9 ("tcp: allow drivers to tweak TSQ logic") I gave a code sample to set sk->sk_pacing_shift that was not complete. Better add a helper that can be used by drivers without worries, and maybe amended in the future. A wifi driver might use it from its ndo_start_xmit() Following call would setup TCP to allow up to ~8ms of queued data per flow. sk_pacing_shift_update(skb->sk, 7); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 9155da422692..9a9047268d37 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2407,4 +2407,15 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) return *proto->sysctl_rmem; } +/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) + * Some wifi drivers need to tweak it to get more chunks. + * They can use this helper from their ndo_start_xmit() + */ +static inline void sk_pacing_shift_update(struct sock *sk, int val) +{ + if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + return; + sk->sk_pacing_shift = val; +} + #endif /* _SOCK_H */ -- cgit From 648845ab7e200993dccd3948c719c858368c91e7 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Thu, 14 Dec 2017 05:51:58 -0800 Subject: sock: Move the socket inuse to namespace. In some case, we want to know how many sockets are in use in different _net_ namespaces. It's a key resource metric. This patch add a member in struct netns_core. This is a counter for socket-inuse in the _net_ namespace. The patch will add/sub counter in the sk_alloc, sk_clone_lock and __sk_free. This patch will not counter the socket created in kernel. It's not very useful for userspace to know how many kernel sockets we created. The main reasons for doing this are that: 1. When linux calls the 'do_exit' for process to exit, the functions 'exit_task_namespaces' and 'exit_task_work' will be called sequentially. 'exit_task_namespaces' may have destroyed the _net_ namespace, but 'sock_release' called in 'exit_task_work' may use the _net_ namespace if we counter the socket-inuse in sock_release. 2. socket and sock are in pair. More important, sock holds the _net_ namespace. We counter the socket-inuse in sock, for avoiding holding _net_ namespace again in socket. It's a easy way to maintain the code. Signed-off-by: Martin Zhang Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 9a9047268d37..0a32f3ce381c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1262,6 +1262,7 @@ proto_memory_pressure(struct proto *prot) /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); +int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) -- cgit From 986ffdfd08dbaae721e82720e6bfc2c307e732dd Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 20 Dec 2017 11:12:52 +0800 Subject: net: sock: replace sk_state_load with inet_sk_state_load and remove sk_state_store sk_state_load is only used by AF_INET/AF_INET6, so rename it to inet_sk_state_load and move it into inet_sock.h. sk_state_store is removed as it is not used any more. Signed-off-by: Yafang Shao Signed-off-by: David S. Miller --- include/net/sock.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 0a32f3ce381c..6c1db823f8b9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2333,31 +2333,6 @@ static inline bool sk_listener(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); } -/** - * sk_state_load - read sk->sk_state for lockless contexts - * @sk: socket pointer - * - * Paired with sk_state_store(). Used in places we do not hold socket lock : - * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... - */ -static inline int sk_state_load(const struct sock *sk) -{ - return smp_load_acquire(&sk->sk_state); -} - -/** - * sk_state_store - update sk->sk_state - * @sk: socket pointer - * @newstate: new state - * - * Paired with sk_state_load(). Should be used in contexts where - * state change might impact lockless readers. - */ -static inline void sk_state_store(struct sock *sk, int newstate) -{ - smp_store_release(&sk->sk_state, newstate); -} - void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); -- cgit From 54dc3e3324829d346c959ff774626d9c6c9a65b5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 4 Jan 2018 14:03:54 -0800 Subject: net: ipv6: Allow connect to linklocal address from socket bound to vrf Allow a process bound to a VRF to connect to a linklocal address. Currently, this fails because of a mismatch between the scope of the linklocal address and the sk_bound_dev_if inherited by the VRF binding: $ ssh -6 fe80::70b8:cff:fedd:ead8%eth1 ssh: connect to host fe80::70b8:cff:fedd:ead8%eth1 port 22: Invalid argument Relax the scope check to allow the socket to be bound to the same L3 device as the scope id. This makes ipv6 linklocal consistent with other relaxed checks enabled by commits 1ff23beebdd3 ("net: l3mdev: Allow send on enslaved interface") and 7bb387c5ab12a ("net: Allow IP_MULTICAST_IF to set index to L3 slave"). Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/sock.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 66fd3951e6f3..73b7830b0bb8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -72,6 +72,7 @@ #include #include #include +#include /* * This structure really needs to be cleaned up. @@ -2399,4 +2400,23 @@ static inline void sk_pacing_shift_update(struct sock *sk, int val) sk->sk_pacing_shift = val; } +/* if a socket is bound to a device, check that the given device + * index is either the same or that the socket is bound to an L3 + * master device and the given device index is also enslaved to + * that L3 master + */ +static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) +{ + int mdif; + + if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + return true; + + mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); + if (mdif && mdif == sk->sk_bound_dev_if) + return true; + + return false; +} + #endif /* _SOCK_H */ -- cgit