summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-24 08:27:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-24 08:27:32 -0700
commiteb3d33900aa953bcdfe132a71bb03ee592ebbb47 (patch)
tree020bf904e5d5e9b3106228514f925b041492df28 /net/ipv4
parent514e250f67d2b2a8ab08dc9c3650af19a411c926 (diff)
parent950e2958a5e96406e6e5ff4190a638a54769f89b (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "It's been a while since my last pull request so quite a few fixes have piled up." Indeed. 1) Fix nf_{log,queue} compilation with PROC_FS disabled, from Pablo Neira Ayuso. 2) Fix data corruption on some tg3 chips with TSO enabled, from Michael Chan. 3) Fix double insertion of VLAN tags in be2net driver, from Sarveshwar Bandi. 4) Don't have TCP's MD5 support pass > PAGE_SIZE page offsets in scatter-gather entries into the crypto layer, the crypto layer can't handle that. From Eric Dumazet. 5) Fix lockdep splat in 802.1Q MRP code, also from Eric Dumazet. 6) Fix OOPS in netfilter log module when called from conntrack, from Hans Schillstrom. 7) FEC driver needs to use netif_tx_{lock,unlock}_bh() rather than the non-BH disabling variants. From Fabio Estevam. 8) TCP GSO can generate out-of-order packets, fix from Eric Dumazet. 9) vxlan driver doesn't update 'used' field of fdb entries when it should, from Sridhar Samudrala. 10) ipv6 should use kzalloc() to allocate inet6 socket cork options, otherwise we can OOPS in ip6_cork_release(). From Eric Dumazet. 11) Fix races in bonding set mode, from Nikolay Aleksandrov. 12) Fix checksum generation regression added by "r8169: fix 8168evl frame padding.", from Francois Romieu. 13) ip_gre can look at stale SKB data pointer, fix from Eric Dumazet. 14) Fix checksum handling when GSO is enabled in bnx2x driver with certain chips, from Yuval Mintz. 15) Fix double free in batman-adv, from Martin Hundebøll. 16) Fix device startup synchronization with firmware in tg3 driver, from Nithin Sujit. 17) perf networking dropmonitor doesn't work at all due to mixed up trace parameter ordering, from Ben Hutchings. 18) Fix proportional rate reduction handling in tcp_ack(), from Nandita Dukkipati. 19) IPSEC layer doesn't return an error when a valid state is detected, causing an OOPS. Fix from Timo Teräs. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (85 commits) be2net: bug fix on returning an invalid nic descriptor tcp: xps: fix reordering issues net: Revert unused variable changes. xfrm: properly handle invalid states as an error virtio_net: enable napi for all possible queues during open tcp: bug fix in proportional rate reduction. net: ethernet: sun: drop unused variable net: ethernet: korina: drop unused variable net: ethernet: apple: drop unused variable qmi_wwan: Added support for Cinterion's PLxx WWAN Interface perf: net_dropmonitor: Remove progress indicator perf: net_dropmonitor: Use bisection in symbol lookup perf: net_dropmonitor: Do not assume ordering of dictionaries perf: net_dropmonitor: Fix symbol-relative addresses perf: net_dropmonitor: Fix trace parameter order net: fec: use a more proper compatible string for MVF type device qlcnic: Fix updating netdev->features qlcnic: remove netdev->trans_start updates within the driver qlcnic: Return proper error codes from probe failure paths tg3: Update version to 3.132 ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c13
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_input.c23
-rw-r--r--net/ipv4/tcp_output.c10
5 files changed, 55 insertions, 23 deletions
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c625e4dad4b0..2a83591492dd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -235,7 +235,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
*/
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn;
- const struct iphdr *iph = (const struct iphdr *)skb->data;
+ const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
@@ -281,6 +281,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
else
itn = net_generic(net, ipgre_net_id);
+ iph = (const struct iphdr *)skb->data;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
iph->daddr, iph->saddr, tpi.key);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index f8a222cb6448..cf08218ddbcf 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -162,7 +162,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
return skb;
}
-static void ipt_ulog_packet(unsigned int hooknum,
+static void ipt_ulog_packet(struct net *net,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -174,7 +175,6 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len;
struct nlmsghdr *nlh;
struct timeval tv;
- struct net *net = dev_net(in ? in : out);
struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
@@ -291,12 +291,15 @@ alloc_failure:
static unsigned int
ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return XT_CONTINUE;
}
-static void ipt_logfn(u_int8_t pf,
+static void ipt_logfn(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -318,7 +321,7 @@ static void ipt_logfn(u_int8_t pf,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
static int ulog_tg_check(const struct xt_tgchk_param *par)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dcb116dde216..ab450c099aa4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2887,6 +2887,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
unsigned int mss;
struct sk_buff *gso_skb = skb;
__sum16 newcheck;
+ bool ooo_okay, copy_destructor;
if (!pskb_may_pull(skb, sizeof(*th)))
goto out;
@@ -2927,10 +2928,18 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
goto out;
}
+ copy_destructor = gso_skb->destructor == tcp_wfree;
+ ooo_okay = gso_skb->ooo_okay;
+ /* All segments but the first should have ooo_okay cleared */
+ skb->ooo_okay = 0;
+
segs = skb_segment(skb, features);
if (IS_ERR(segs))
goto out;
+ /* Only first segment might have ooo_okay set */
+ segs->ooo_okay = ooo_okay;
+
delta = htonl(oldlen + (thlen + mss));
skb = segs;
@@ -2950,6 +2959,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
thlen, skb->csum));
seq += mss;
+ if (copy_destructor) {
+ skb->destructor = gso_skb->destructor;
+ skb->sk = gso_skb->sk;
+ /* {tcp|sock}_wfree() use exact truesize accounting :
+ * sum(skb->truesize) MUST be exactly be gso_skb->truesize
+ * So we account mss bytes of 'true size' for each segment.
+ * The last segment will contain the remaining.
+ */
+ skb->truesize = mss;
+ gso_skb->truesize -= mss;
+ }
skb = skb->next;
th = tcp_hdr(skb);
@@ -2962,7 +2982,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
* is freed at TX completion, and not right now when gso_skb
* is freed by GSO engine
*/
- if (gso_skb->destructor == tcp_wfree) {
+ if (copy_destructor) {
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
swap(gso_skb->truesize, skb->truesize);
@@ -3269,8 +3289,11 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
for (i = 0; i < shi->nr_frags; ++i) {
const struct skb_frag_struct *f = &shi->frags[i];
- struct page *page = skb_frag_page(f);
- sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
+ unsigned int offset = f->page_offset;
+ struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
+
+ sg_set_page(&sg, page, skb_frag_size(f),
+ offset_in_page(offset));
if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
return 1;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 08bbe6096528..9c6225780bd5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2743,8 +2743,8 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int prior_sacked, bool is_dupack,
- int flag)
+ int prior_sacked, int prior_packets,
+ bool is_dupack, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2804,7 +2804,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
} else
do_lost = tcp_try_undo_partial(sk, pkts_acked);
- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
+ newly_acked_sacked = prior_packets - tp->packets_out +
+ tp->sacked_out - prior_sacked;
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack);
@@ -2818,7 +2819,8 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (is_dupack)
tcp_add_reno_sack(sk);
}
- newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
+ newly_acked_sacked = prior_packets - tp->packets_out +
+ tp->sacked_out - prior_sacked;
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
@@ -3330,9 +3332,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_dupack = false;
u32 prior_in_flight;
u32 prior_fackets;
- int prior_packets;
+ int prior_packets = tp->packets_out;
int prior_sacked = tp->sacked_out;
int pkts_acked = 0;
+ int previous_packets_out = 0;
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3403,14 +3406,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
tp->rcv_tstamp = tcp_time_stamp;
- prior_packets = tp->packets_out;
if (!prior_packets)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
+ previous_packets_out = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
- pkts_acked = prior_packets - tp->packets_out;
+ pkts_acked = previous_packets_out - tp->packets_out;
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */
@@ -3418,7 +3421,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
- is_dupack, flag);
+ prior_packets, is_dupack, flag);
} else {
if (flag & FLAG_DATA_ACKED)
tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3441,7 +3444,7 @@ no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
- is_dupack, flag);
+ prior_packets, is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3464,7 +3467,7 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
- is_dupack, flag);
+ prior_packets, is_dupack, flag);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 536d40929ba6..ec335fabd5cc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -874,11 +874,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- if (tcp_packets_in_flight(tp) == 0) {
+ if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- skb->ooo_okay = 1;
- } else
- skb->ooo_okay = 0;
+
+ /* if no packet is in qdisc/device queue, then allow XPS to select
+ * another queue.
+ */
+ skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);