diff options
author | David S. Miller <davem@davemloft.net> | 2017-12-03 10:18:28 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-12-03 10:18:28 -0500 |
commit | c6d3c96f8d52b8f16de73e778717641e659c4ccc (patch) | |
tree | 192e4e3b567edec06ed12c2e7d7ac715ee7ba485 /include | |
parent | fb7516d42478ebc8e2f00efb76ef96f7b68fd8d3 (diff) | |
parent | 27da6d37e207e7ad9c692d3d0924f09e63a98e38 (diff) |
Merge branch 'tcp-2nd-listener-hash'
Martin KaFai Lau says:
====================
tcp: Add a 2nd listener hashtable (port+addr)
This patch set adds a 2nd listener hashtable. It is to resolve
the performance issue when a process is listening at many IP
addresses with the same port (e.g. [IP1]:443, [IP2]:443... [IPN]:443)
v2:
- Move the new lhash2 and lhash2_mask before the existing
listening_hash to avoid adding another cacheline
to inet_hashinfo (Suggested by Eric Dumazet, Thanks!)
- I take this chance to plug an existing 4 bytes hole while
adding 'unsigned int lhash2_mask'.
- Add some comments about lhash2 in inet_hashtables.h
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/net/inet_connection_sock.h | 2 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 29 | ||||
-rw-r--r-- | include/net/ip.h | 9 | ||||
-rw-r--r-- | include/net/ipv6.h | 17 |
4 files changed, 51 insertions, 6 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 0358745ea059..8e1bf9ae4a5e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -101,6 +102,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..9141e95529e7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -111,6 +111,7 @@ struct inet_bind_hashbucket { */ struct inet_listen_hashbucket { spinlock_t lock; + unsigned int count; struct hlist_head head; }; @@ -132,12 +133,13 @@ struct inet_hashinfo { /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ + struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; - unsigned int bhash_size; - /* 4 bytes hole on 64 bit */ - struct kmem_cache *bind_bucket_cachep; + /* The 2nd listener table hashed by local port and address */ + unsigned int lhash2_mask; + struct inet_listen_hashbucket *lhash2; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -145,14 +147,25 @@ struct inet_hashinfo { * Now align to a new cache line as all the following members * might be often dirty. */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. + /* All sockets in TCP_LISTEN state will be in listening_hash. + * This is the only table where wildcard'd TCP sockets can + * exist. listening_hash is only hashed by local port number. + * If lhash2 is initialized, the same socket will also be hashed + * to lhash2 by port and address. */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ + hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) + +static inline struct inet_listen_hashbucket * +inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) +{ + return &h->lhash2[hash & h->lhash2_mask]; +} + static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) @@ -208,6 +221,10 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); +void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + unsigned long numentries, int scale, + unsigned long low_limit, + unsigned long high_limit); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); diff --git a/include/net/ip.h b/include/net/ip.h index 9896f46cbbf1..fc9bf1b1fe2c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -26,12 +26,14 @@ #include <linux/ip.h> #include <linux/in.h> #include <linux/skbuff.h> +#include <linux/jhash.h> #include <net/inet_sock.h> #include <net/route.h> #include <net/snmp.h> #include <net/flow.h> #include <net/flow_dissector.h> +#include <net/netns/hash.h> #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ @@ -521,6 +523,13 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 ipv4_portaddr_hash(const struct net *net, + __be32 saddr, + unsigned int port) +{ + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; +} + bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f73797e2fa60..25be4715578c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -22,6 +22,7 @@ #include <net/flow.h> #include <net/flow_dissector.h> #include <net/snmp.h> +#include <net/netns/hash.h> #define SIN6_LEN_RFC2133 24 @@ -673,6 +674,22 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } +static inline u32 ipv6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); + else + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + /* * Check for a RFC 4843 ORCHID address * (Overlay Routable Cryptographic Hash Identifiers) |