diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 10:01:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 10:01:50 -0700 |
commit | 3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch) | |
tree | 3df72faaacd494d5ac8c9668df4f529b1b5e4457 /net/unix/af_unix.c | |
parent | e017507f37d5cb8b541df165a824958bc333bec3 (diff) | |
parent | 320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller:
1) Remove the ipv4 routing cache. Now lookups go directly into the FIB
trie and use prebuilt routes cached there.
No more garbage collection, no more rDOS attacks on the routing
cache. Instead we now get predictable and consistent performance,
no matter what the pattern of traffic we service.
This has been almost 2 years in the making. Special thanks to
Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who
have helped along the way.
I'm sure that with a change of this magnitude there will be some
kind of fallout, but such things ought the be simple to fix at this
point. Luckily I'm not European so I'll be around all of August to
fix things :-)
The major stages of this work here are each fronted by a forced
merge commit whose commit message contains a top-level description
of the motivations and implementation issues.
2) Pre-demux of established ipv4 TCP sockets, saves a route demux on
input.
3) TCP SYN/ACK performance tweaks from Eric Dumazet.
4) Add namespace support for netfilter L4 conntrack helpers, from Gao
Feng.
5) Add config mechanism for Energy Efficient Ethernet to ethtool, from
Yuval Mintz.
6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet.
7) Support for connection tracker helpers in userspace, from Pablo
Neira Ayuso.
8) Allow userspace driven TX load balancing functions in TEAM driver,
from Jiri Pirko.
9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with
embedded gotos.
10) TCP Small Queues, essentially minimize the amount of TCP data queued
up in the packet scheduler layer. Whereas the existing BQL (Byte
Queue Limits) limits the pkt_sched --> netdevice queuing levels,
this controls the TCP --> pkt_sched queueing levels.
From Eric Dumazet.
11) Reduce the number of get_page/put_page ops done on SKB fragments,
from Alexander Duyck.
12) Implement protection against blind resets in TCP (RFC 5961), from
Eric Dumazet.
13) Support the client side of TCP Fast Open, basically the ability to
send data in the SYN exchange, from Yuchung Cheng.
Basically, the sender queues up data with a sendmsg() call using
MSG_FASTOPEN, then they do the connect() which emits the queued up
fastopen data.
14) Avoid all the problems we get into in TCP when timers or PMTU events
hit a locked socket. The TCP Small Queues changes added a
tcp_release_cb() that allows us to queue work up to the
release_sock() caller, and that's what we use here too. From Eric
Dumazet.
15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits)
genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP
r8169: revert "add byte queue limit support".
ipv4: Change rt->rt_iif encoding.
net: Make skb->skb_iif always track skb->dev
ipv4: Prepare for change of rt->rt_iif encoding.
ipv4: Remove all RTCF_DIRECTSRC handliing.
ipv4: Really ignore ICMP address requests/replies.
decnet: Don't set RTCF_DIRECTSRC.
net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse.
ipv4: Remove redundant assignment
rds: set correct msg_namelen
openvswitch: potential NULL deref in sample()
tcp: dont drop MTU reduction indications
bnx2x: Add new 57840 device IDs
tcp: avoid oops in tcp_metrics and reset tcpm_stamp
niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value
niu: Fix to check for dma mapping errors.
net: Fix references to out-of-scope variables in put_cmsg_compat()
net: ethernet: davinci_emac: add pm_runtime support
net: ethernet: davinci_emac: Remove unnecessary #include
...
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 110 |
1 files changed, 62 insertions, 48 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f165..79981d97bc9c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,15 +115,24 @@ #include <net/checksum.h> #include <linux/security.h> -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -2239,47 +2248,54 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) + +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; + + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) + continue; + if (++count == offset) + break; } - return NULL; + + return sk; } -static struct sock *next_unix_socket(int *i, struct sock *s) +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; } - return NULL; -} -struct unix_iter_state { - struct seq_net_private p; - int i; -}; + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) -{ - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) - continue; - if (off == pos) - return s; - ++off; - } return NULL; } @@ -2287,22 +2303,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -2365,7 +2379,7 @@ static const struct seq_operations unix_seq_ops = { static int unix_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &unix_seq_ops, - sizeof(struct unix_iter_state)); + sizeof(struct seq_net_private)); } static const struct file_operations unix_seq_fops = { |