From ae1d48b23d5e79efbcf0cef4f0ebb9742361af59 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 11 Oct 2011 10:54:35 +0900 Subject: IPVS netns shutdown/startup dead-lock ip_vs_mutext is used by both netns shutdown code and startup and both implicit uses sk_lock-AF_INET mutex. cleanup CPU-1 startup CPU-2 ip_vs_dst_event() ip_vs_genl_set_cmd() sk_lock-AF_INET __ip_vs_mutex sk_lock-AF_INET __ip_vs_mutex * DEAD LOCK * A new mutex placed in ip_vs netns struct called sync_mutex is added. Comments from Julian and Simon added. This patch has been running for more than 3 month now and it seems to work. Ver. 3 IP_VS_SO_GET_DAEMON in do_ip_vs_get_ctl protected by sync_mutex instead of __ip_vs_mutex as sugested by Julian. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1aaf915656f3..8fa4430f99c1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -900,6 +900,7 @@ struct netns_ipvs { volatile int sync_state; volatile int master_syncid; volatile int backup_syncid; + struct mutex sync_mutex; /* multicast interface name */ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -- cgit From f36c23bb9f822904dacf83a329518d0a5fde7968 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 17 Oct 2011 19:07:30 -0400 Subject: udplite: fast-path computation of checksum coverage Commit 903ab86d195cca295379699299c5fc10beba31c7 of 1 March this year ("udp: Add lockless transmit path") introduced a new fast TX path that broke the checksum coverage computation of UDP-lite, which so far depended on up->len (only set if the socket is locked and 0 in the fast path). Fixed by providing both fast- and slow-path computation of checksum coverage. The latter can be removed when UDP(-lite)v6 also uses a lockless transmit path. Reported-by: Thomas Volkert Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/udplite.h | 63 +++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/udplite.h b/include/net/udplite.h index 673a024c6b2a..5f097ca7d5c5 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,40 +66,34 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) +/* Slow-path computation of checksum. Socket is locked. */ +static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) { + const struct udp_sock *up = udp_sk(skb->sk); int cscov = up->len; + __wsum csum = 0; - /* - * Sender has set `partial coverage' option on UDP-Lite socket - */ - if (up->pcflag & UDPLITE_SEND_CC) { + if (up->pcflag & UDPLITE_SEND_CC) { + /* + * Sender has set `partial coverage' option on UDP-Lite socket. + * The special case "up->pcslen == 0" signifies full coverage. + */ if (up->pcslen < up->len) { - /* up->pcslen == 0 means that full coverage is required, - * partial coverage only if 0 < up->pcslen < up->len */ - if (0 < up->pcslen) { - cscov = up->pcslen; - } - uh->len = htons(up->pcslen); + if (0 < up->pcslen) + cscov = up->pcslen; + udp_hdr(skb)->len = htons(up->pcslen); } - /* - * NOTE: Causes for the error case `up->pcslen > up->len': - * (i) Application error (will not be penalized). - * (ii) Payload too big for send buffer: data is split - * into several packets, each with its own header. - * In this case (e.g. last segment), coverage may - * exceed packet length. - * Since packets with coverage length > packet length are - * illegal, we fall back to the defaults here. - */ + /* + * NOTE: Causes for the error case `up->pcslen > up->len': + * (i) Application error (will not be penalized). + * (ii) Payload too big for send buffer: data is split + * into several packets, each with its own header. + * In this case (e.g. last segment), coverage may + * exceed packet length. + * Since packets with coverage length > packet length are + * illegal, we fall back to the defaults here. + */ } - return cscov; -} - -static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) -{ - int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); - __wsum csum = 0; skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ @@ -115,16 +109,21 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) return csum; } +/* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - struct sock *sk = skb->sk; - int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); + const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); - const int len = skb->len - off; + int len = skb->len - off; + if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { + if (0 < up->pcslen) + len = up->pcslen; + udp_hdr(skb)->len = htons(up->pcslen); + } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - return skb_checksum(skb, off, min(cscov, len), 0); + return skb_checksum(skb, off, len, 0); } extern void udplite4_register(void); -- cgit From da92b194cc36b5dc1fbd85206aeeffd80bee0c39 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Oct 2011 00:49:15 +0000 Subject: net: hold sock reference while processing tx timestamps The pair of functions, * skb_clone_tx_timestamp() * skb_complete_tx_timestamp() were designed to allow timestamping in PHY devices. The first function, called during the MAC driver's hard_xmit method, identifies PTP protocol packets, clones them, and gives them to the PHY device driver. The PHY driver may hold onto the packet and deliver it at a later time using the second function, which adds the packet to the socket's error queue. As pointed out by Johannes, nothing prevents the socket from disappearing while the cloned packet is sitting in the PHY driver awaiting a timestamp. This patch fixes the issue by taking a reference on the socket for each such packet. In addition, the comments regarding the usage of these function are expanded to highlight the rule that PHY drivers must use skb_complete_tx_timestamp() to release the packet, in order to release the socket reference, too. These functions first appeared in v2.6.36. Reported-by: Johannes Berg Signed-off-by: Richard Cochran Cc: Signed-off-by: Eric Dumazet Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- include/linux/skbuff.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 54fc4138955f..79f337c47388 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -420,7 +420,7 @@ struct phy_driver { /* * Requests a Tx timestamp for 'skb'. The phy driver promises - * to deliver it to the socket's error queue as soon as a + * to deliver it using skb_complete_tx_timestamp() as soon as a * timestamp becomes available. One of the PTP_CLASS_ values * is passed in 'type'. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8bd383caa363..0f966460a345 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2020,8 +2020,13 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) /** * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps * + * PHY drivers may accept clones of transmitted packets for + * timestamping via their phy_driver.txtstamp method. These drivers + * must call this function to return the skb back to the stack, with + * or without a timestamp. + * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps + * @hwtstamps: hardware time stamps, may be NULL if not available * */ void skb_complete_tx_timestamp(struct sk_buff *skb, -- cgit