From 1a2449a87bb7606113b1aa1a9d3c3e78ef189a1c Mon Sep 17 00:00:00 2001
From: Chris Leech <christopher.leech@intel.com>
Date: Tue, 23 May 2006 18:05:53 -0700
Subject: [I/OAT]: TCP recv offload to I/OAT

Locks down user pages and sets up for DMA in tcp_recvmsg, then calls
dma_async_try_early_copy in tcp_v4_do_rcv

Signed-off-by: Chris Leech <christopher.leech@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 301eee726b0f..a50eb306e9e2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1218,8 +1218,16 @@ process:
 	bh_lock_sock(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v6_do_rcv(sk, skb);
+#ifdef CONFIG_NET_DMA
+                struct tcp_sock *tp = tcp_sk(sk);
+                if (tp->ucopy.dma_chan)
+                        ret = tcp_v6_do_rcv(sk, skb);
+                else
+#endif
+		{
+			if (!tcp_prequeue(sk, skb))
+				ret = tcp_v6_do_rcv(sk, skb);
+		}
 	} else
 		sk_add_backlog(sk, skb);
 	bh_unlock_sock(sk);
-- 
cgit 


From 546be2405be119ef55467aace45f337a16e5d424 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 May 2006 23:03:58 -0700
Subject: [IPSEC] xfrm: Undo afinfo lock proliferation

The number of locks used to manage afinfo structures can easily be reduced
down to one each for policy and state respectively.  This is based on the
observation that the write locks are only held by module insertion/removal
which are very rare events so there is no need to further differentiate
between the insertion of modules like ipv6 versus esp6.

The removal of the read locks in xfrm4_policy.c/xfrm6_policy.c might look
suspicious at first.  However, after you realise that nobody ever takes
the corresponding write lock you'll feel better :)

As far as I can gather it's an attempt to guard against the removal of
the corresponding modules.  Since neither module can be unloaded at all
we can leave it to whoever fixes up IPv6 unloading :)

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 6 ------
 net/ipv6/xfrm6_state.c  | 1 -
 2 files changed, 7 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 88c840f1beb6..ee715f2691e9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -23,8 +23,6 @@
 static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
-static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
-
 static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
 {
 	int err = 0;
@@ -249,9 +247,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 
 static inline int xfrm6_garbage_collect(void)
 {
-	read_lock(&xfrm6_policy_afinfo.lock);
 	xfrm6_policy_afinfo.garbage_collect();
-	read_unlock(&xfrm6_policy_afinfo.lock);
 	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
 }
 
@@ -311,8 +307,6 @@ static struct dst_ops xfrm6_dst_ops = {
 
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.family =		AF_INET6,
-	.lock = 		RW_LOCK_UNLOCKED,
-	.type_map = 		&xfrm6_type_map,
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
 	.find_bundle =		__xfrm6_find_bundle,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index a5723024d3b3..b33296b3f6de 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -135,7 +135,6 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto,
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
-	.lock			= RW_LOCK_UNLOCKED,
 	.init_tempsel		= __xfrm6_init_tempsel,
 	.state_lookup		= __xfrm6_state_lookup,
 	.find_acq		= __xfrm6_find_acq,
-- 
cgit 


From b59f45d0b2878ab76f8053b0973654e6621828ee Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 May 2006 23:05:54 -0700
Subject: [IPSEC] xfrm: Abstract out encapsulation modes

This patch adds the structure xfrm_mode.  It is meant to represent
the operations carried out by transport/tunnel modes.

By doing this we allow additional encapsulation modes to be added
without clogging up the xfrm_input/xfrm_output paths.

Candidate modes include 4-to-6 tunnel mode, 6-to-4 tunnel mode, and
BEET modes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig                |  20 +++++++
 net/ipv6/Makefile               |   2 +
 net/ipv6/ip6_output.c           |   2 +
 net/ipv6/xfrm6_input.c          |  29 +---------
 net/ipv6/xfrm6_mode_transport.c |  73 ++++++++++++++++++++++++
 net/ipv6/xfrm6_mode_tunnel.c    | 121 ++++++++++++++++++++++++++++++++++++++++
 net/ipv6/xfrm6_output.c         |  63 +--------------------
 7 files changed, 224 insertions(+), 86 deletions(-)
 create mode 100644 net/ipv6/xfrm6_mode_transport.c
 create mode 100644 net/ipv6/xfrm6_mode_tunnel.c

(limited to 'net/ipv6')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index f8a107ab5592..e923d4dea418 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -106,6 +106,26 @@ config INET6_TUNNEL
 	tristate
 	default n
 
+config INET6_XFRM_MODE_TRANSPORT
+	tristate "IPv6: IPsec transport mode"
+	depends on IPV6
+	default IPV6
+	select XFRM
+	---help---
+	  Support for IPsec transport mode.
+
+	  If unsure, say Y.
+
+config INET6_XFRM_MODE_TUNNEL
+	tristate "IPv6: IPsec tunnel mode"
+	depends on IPV6
+	default IPV6
+	select XFRM
+	---help---
+	  Support for IPsec tunnel mode.
+
+	  If unsure, say Y.
+
 config IPV6_TUNNEL
 	tristate "IPv6: IPv6-in-IPv6 tunnel"
 	select INET6_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index a760b0988fbb..386e0a626948 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_INET6_ESP) += esp6.o
 obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
 obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
 obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
+obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
+obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
 
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e46048974f37..416f6e428a0a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -39,6 +39,7 @@
 #include <linux/in6.h>
 #include <linux/tcp.h>
 #include <linux/route.h>
+#include <linux/module.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -488,6 +489,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 	return offset;
 }
+EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 
 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 00cfdee18dca..0405d74ff910 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -13,21 +13,9 @@
 #include <linux/string.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
-static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
-{
-	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
-	struct ipv6hdr *inner_iph = skb->h.ipv6h;
-
-	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
-		IP6_ECN_set_ce(inner_iph);
-}
-
 int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
 {
 	int err;
@@ -81,21 +69,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi)
 
 		xfrm_vec[xfrm_nr++] = x;
 
+		if (x->mode->input(x, skb))
+			goto drop;
+
 		if (x->props.mode) { /* XXX */
-			if (nexthdr != IPPROTO_IPV6)
-				goto drop;
-			if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-				goto drop;
-			if (skb_cloned(skb) &&
-			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-				goto drop;
-			if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-				ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
-			if (!(x->props.flags & XFRM_STATE_NOECN))
-				ipip6_ecn_decapsulate(skb);
-			skb->mac.raw = memmove(skb->data - skb->mac_len,
-					       skb->mac.raw, skb->mac_len);
-			skb->nh.raw = skb->data;
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
new file mode 100644
index 000000000000..5efbbae08ef0
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -0,0 +1,73 @@
+/*
+ * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
+ *
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the encapsulation header.
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_transport_output(struct sk_buff *skb)
+{
+	struct xfrm_state *x = skb->dst->xfrm;
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	skb_push(skb, x->props.header_len);
+	iph = skb->nh.ipv6h;
+
+	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+	skb->nh.raw = prevhdr - x->props.header_len;
+	skb->h.raw = skb->data + hdr_len;
+	memmove(skb->data, iph, hdr_len);
+	return 0;
+}
+
+static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct xfrm_mode xfrm6_transport_mode = {
+	.input = xfrm6_transport_input,
+	.output = xfrm6_transport_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_TRANSPORT,
+};
+
+static int __init xfrm6_transport_init(void)
+{
+	return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6);
+}
+
+static void __exit xfrm6_transport_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_transport_init);
+module_exit(xfrm6_transport_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
new file mode 100644
index 000000000000..8af79be2edca
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -0,0 +1,121 @@
+/*
+ * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
+ *
+ * Copyright (C) 2002 USAGI/WIDE Project
+ * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
+	struct ipv6hdr *inner_iph = skb->h.ipv6h;
+
+	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+		IP6_ECN_set_ce(inner_iph);
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per RFC 2401.  The following fields
+ * in it shall be filled in by x->type->output:
+ *	payload_len
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_tunnel_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct xfrm_state *x = dst->xfrm;
+	struct ipv6hdr *iph, *top_iph;
+	int dsfield;
+
+	skb_push(skb, x->props.header_len);
+	iph = skb->nh.ipv6h;
+
+	skb->nh.raw = skb->data;
+	top_iph = skb->nh.ipv6h;
+	skb->nh.raw = &top_iph->nexthdr;
+	skb->h.ipv6h = top_iph + 1;
+
+	top_iph->version = 6;
+	top_iph->priority = iph->priority;
+	top_iph->flow_lbl[0] = iph->flow_lbl[0];
+	top_iph->flow_lbl[1] = iph->flow_lbl[1];
+	top_iph->flow_lbl[2] = iph->flow_lbl[2];
+	dsfield = ipv6_get_dsfield(top_iph);
+	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+	if (x->props.flags & XFRM_STATE_NOECN)
+		dsfield &= ~INET_ECN_MASK;
+	ipv6_change_dsfield(top_iph, 0, dsfield);
+	top_iph->nexthdr = IPPROTO_IPV6; 
+	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
+	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
+	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+	return 0;
+}
+
+static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err = -EINVAL;
+
+	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6)
+		goto out;
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto out;
+
+	if (skb_cloned(skb) &&
+	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+		goto out;
+
+	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+		ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+	if (!(x->props.flags & XFRM_STATE_NOECN))
+		ipip6_ecn_decapsulate(skb);
+	skb->mac.raw = memmove(skb->data - skb->mac_len,
+			       skb->mac.raw, skb->mac_len);
+	skb->nh.raw = skb->data;
+	err = 0;
+
+out:
+	return err;
+}
+
+static struct xfrm_mode xfrm6_tunnel_mode = {
+	.input = xfrm6_tunnel_input,
+	.output = xfrm6_tunnel_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_TUNNEL,
+};
+
+static int __init xfrm6_tunnel_init(void)
+{
+	return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
+}
+
+static void __exit xfrm6_tunnel_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_tunnel_init);
+module_exit(xfrm6_tunnel_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 80242172a5df..16e84254a252 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -14,68 +14,9 @@
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
 #include <linux/netfilter_ipv6.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
-/* Add encapsulation header.
- *
- * In transport mode, the IP header and mutable extension headers will be moved
- * forward to make space for the encapsulation header.
- *
- * In tunnel mode, the top IP header will be constructed per RFC 2401.
- * The following fields in it shall be filled in by x->type->output:
- *	payload_len
- *
- * On exit, skb->h will be set to the start of the encapsulation header to be
- * filled in by x->type->output and skb->nh will be set to the nextheader field
- * of the extension header directly preceding the encapsulation header, or in
- * its absence, that of the top IP header.  The value of skb->data will always
- * point to the top IP header.
- */
-static void xfrm6_encap(struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
-	struct ipv6hdr *iph, *top_iph;
-	int dsfield;
-
-	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
-
-	if (!x->props.mode) {
-		u8 *prevhdr;
-		int hdr_len;
-
-		hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
-		skb->nh.raw = prevhdr - x->props.header_len;
-		skb->h.raw = skb->data + hdr_len;
-		memmove(skb->data, iph, hdr_len);
-		return;
-	}
-
-	skb->nh.raw = skb->data;
-	top_iph = skb->nh.ipv6h;
-	skb->nh.raw = &top_iph->nexthdr;
-	skb->h.ipv6h = top_iph + 1;
-
-	top_iph->version = 6;
-	top_iph->priority = iph->priority;
-	top_iph->flow_lbl[0] = iph->flow_lbl[0];
-	top_iph->flow_lbl[1] = iph->flow_lbl[1];
-	top_iph->flow_lbl[2] = iph->flow_lbl[2];
-	dsfield = ipv6_get_dsfield(top_iph);
-	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
-	if (x->props.flags & XFRM_STATE_NOECN)
-		dsfield &= ~INET_ECN_MASK;
-	ipv6_change_dsfield(top_iph, 0, dsfield);
-	top_iph->nexthdr = IPPROTO_IPV6; 
-	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
-	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
-	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
-}
-
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
@@ -118,7 +59,9 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		if (err)
 			goto error;
 
-		xfrm6_encap(skb);
+		err = x->mode->output(skb);
+		if (err)
+			goto error;
 
 		err = x->type->output(x, skb);
 		if (err)
-- 
cgit 


From 31a4ab93025719e62e7cf7ce899f71c34ecde5a0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 27 May 2006 23:06:13 -0700
Subject: [IPSEC] proto: Move transport mode input path into
 xfrm_mode_transport

Now that we have xfrm_mode objects we can move the transport mode specific
input decapsulation code into xfrm_mode_transport.  This removes duplicate
code as well as unnecessary header movement in case of tunnel mode SAs
since we will discard the original IP header immediately.

This also fixes a minor bug for transport-mode ESP where the IP payload
length is set to the correct value minus the header length (with extension
headers for IPv6).

Of course the other neat thing is that we no longer have to allocate
temporary buffers to hold the IP headers for ESP and IPComp.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ah6.c                  | 10 +++-------
 net/ipv6/esp6.c                 | 20 ++++----------------
 net/ipv6/ipcomp6.c              | 27 +++++----------------------
 net/ipv6/xfrm6_mode_transport.c | 15 +++++++++++++++
 4 files changed, 27 insertions(+), 45 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 6778173a3dda..d31c0d6c0448 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -292,7 +292,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
 		memset(ah->auth_data, 0, ahp->icv_trunc_len);
-		skb_push(skb, skb->data - skb->nh.raw);
+		skb_push(skb, hdr_len);
 		ahp->icv(ahp, skb, ah->auth_data);
 		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
 			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
@@ -301,12 +301,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 
-	skb->nh.raw = skb_pull(skb, ah_hlen);
-	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-	skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
-	skb_pull(skb, hdr_len);
-	skb->h.raw = skb->data;
-
+	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len);
+	__skb_pull(skb, ah_hlen + hdr_len);
 
 	kfree(tmp_hdr);
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 22f046079037..a15a6f320f70 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -142,25 +142,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	int hdr_len = skb->h.raw - skb->nh.raw;
 	int nfrags;
-	unsigned char *tmp_hdr = NULL;
 	int ret = 0;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
 		ret = -EINVAL;
-		goto out_nofree;
+		goto out;
 	}
 
 	if (elen <= 0 || (elen & (blksize-1))) {
 		ret = -EINVAL;
-		goto out_nofree;
-	}
-
-	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
-	if (!tmp_hdr) {
-		ret = -ENOMEM;
-		goto out_nofree;
+		goto out;
 	}
-	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
 
 	/* If integrity check is required, do this. */
         if (esp->auth.icv_full_len) {
@@ -222,16 +214,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		/* ... check padding bits here. Silly. :-) */ 
 
 		pskb_trim(skb, skb->len - alen - padlen - 2);
-		skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
-		skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
-		memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 		ret = nexthdr[1];
 	}
 
+	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len;
+
 out:
-	kfree(tmp_hdr);
-out_nofree:
 	return ret;
 }
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 48636436028a..cec3be544b69 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -66,10 +66,8 @@ static LIST_HEAD(ipcomp6_tfms_list);
 static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = 0;
-	u8 nexthdr = 0;
-	int hdr_len = skb->h.raw - skb->nh.raw;
-	unsigned char *tmp_hdr = NULL;
 	struct ipv6hdr *iph;
+	struct ipv6_comp_hdr *ipch;
 	int plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
@@ -86,17 +84,9 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* Remove ipcomp header and decompress original payload */
 	iph = skb->nh.ipv6h;
-	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
-	if (!tmp_hdr)
-		goto out;
-	memcpy(tmp_hdr, iph, hdr_len);
-	nexthdr = *(u8 *)skb->data;
-	skb_pull(skb, sizeof(struct ipv6_comp_hdr)); 
-	skb->nh.raw += sizeof(struct ipv6_comp_hdr);
-	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-	iph = skb->nh.ipv6h;
-	iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
-	skb->h.raw = skb->data;
+	ipch = (void *)skb->data;
+	skb->h.raw = skb->nh.raw + sizeof(*ipch);
+	__skb_pull(skb, sizeof(*ipch));
 
 	/* decompression */
 	plen = skb->len;
@@ -125,18 +115,11 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_put(skb, dlen - plen);
 	memcpy(skb->data, scratch, dlen);
+	err = ipch->nexthdr;
 
-	iph = skb->nh.ipv6h;
-	iph->payload_len = htons(skb->len);
-	
 out_put_cpu:
 	put_cpu();
 out:
-	kfree(tmp_hdr);
-	if (err)
-		goto error_out;
-	return nexthdr;
-error_out:
 	return err;
 }
 
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 5efbbae08ef0..711d713e36d8 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -42,8 +42,23 @@ static int xfrm6_transport_output(struct sk_buff *skb)
 	return 0;
 }
 
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation header.
+ *
+ * On entry, skb->h shall point to where the IP header should be and skb->nh
+ * shall be set to where the IP header currently is.  skb->data shall point
+ * to the start of the payload.
+ */
 static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
+	int ihl = skb->data - skb->h.raw;
+
+	if (skb->h.raw != skb->nh.raw)
+		skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
+	skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
+					   sizeof(struct ipv6hdr));
+	skb->h.raw = skb->data;
 	return 0;
 }
 
-- 
cgit 


From 6442f1cf897643d4ca597f2f7d3464b765bae960 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 29 May 2006 18:21:53 -0700
Subject: [NETFILTER]: conntrack: don't call helpers for related ICMP messages

None of the existing helpers expects to get called for related ICMP
packets and some even drop them if they can't parse them.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 93bae36f2663..2a71c3b669f1 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -189,7 +189,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(*pskb, &ctinfo);
-	if (!ct)
+	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
 		goto out;
 
 	help = nfct_help(ct);
-- 
cgit 


From 39a27a35c5c1b5be499a0576a35c45a011788bf8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 29 May 2006 18:23:54 -0700
Subject: [NETFILTER]: conntrack: add sysctl to disable checksumming

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 86c6703265d0..ef18a7b7014b 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -233,7 +233,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff,
 		return -NF_ACCEPT;
 	}
 
-	if (hooknum == NF_IP6_PRE_ROUTING &&
+	if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
 		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
 			      "nf_ct_icmpv6: ICMPv6 checksum failed\n");
-- 
cgit 


From 984bc16cc92ea3c247bf34ad667cfb95331b9d3c Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Fri, 9 Jun 2006 00:29:17 -0700
Subject: [SECMARK]: Add secmark support to core networking.

Add a secmark field to the skbuff structure, to allow security subsystems to
place security markings on network packets.  This is similar to the nfmark
field, except is intended for implementing security policy, rather than than
networking policy.

This patch was already acked in principle by Dave Miller.

Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 416f6e428a0a..d29620f4910e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -459,6 +459,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	nf_bridge_get(to->nf_bridge);
 #endif
 #endif
+	skb_copy_secmark(to, from);
 }
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
-- 
cgit 


From 364c6badde0dd62a0a38e5ed67f85d87d6665780 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 9 Jun 2006 16:10:40 -0700
Subject: [NET]: Clean up skb_linearize

The linearisation operation doesn't need to be super-optimised.  So we can
replace __skb_linearize with __pskb_pull_tail which does the same thing but
is more general.

Also, most users of skb_linearize end up testing whether the skb is linear
or not so it helps to make skb_linearize do just that.

Some callers of skb_linearize also use it to copy cloned data, so it's
useful to have a new function skb_linearize_cow to copy the data if it's
either non-linear or cloned.

Last but not least, I've removed the gfp argument since nobody uses it
anymore.  If it's ever needed we can easily add it back.

Misc bugs fixed by this patch:

* via-velocity error handling (also, no SG => no frags)

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipcomp6.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index cec3be544b69..f28cd37feed3 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -65,7 +65,7 @@ static LIST_HEAD(ipcomp6_tfms_list);
 
 static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int err = 0;
+	int err = -ENOMEM;
 	struct ipv6hdr *iph;
 	struct ipv6_comp_hdr *ipch;
 	int plen, dlen;
@@ -74,11 +74,8 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	struct crypto_tfm *tfm;
 	int cpu;
 
-	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-		skb_linearize(skb, GFP_ATOMIC) != 0) {
-		err = -ENOMEM;
+	if (skb_linearize_cow(skb))
 		goto out;
-	}
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -142,10 +139,8 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 		goto out_ok;
 	}
 
-	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-		skb_linearize(skb, GFP_ATOMIC) != 0) {
+	if (skb_linearize_cow(skb))
 		goto out_ok;
-	}
 
 	/* compression */
 	plen = skb->len - hdr_len;
-- 
cgit 


From b38dfee3d616ffadb58d4215e3ff9d1d7921031e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 9 Jun 2006 16:13:01 -0700
Subject: [NET]: skb_trim audit

I found a few more spots where pskb_trim_rcsum could be used but were not.
This patch changes them to use it.

Also, sk_filter can get paged skb data.  Therefore we must use pskb_trim
instead of skb_trim.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3e319035f82d..c32a029e43f0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -456,13 +456,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		DEBUGP("queue: message is too short.\n");
 		goto err;
 	}
-	if (end-offset < skb->len) {
-		if (pskb_trim(skb, end - offset)) {
-			DEBUGP("Can't trim\n");
-			goto err;
-		}
-		if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-			skb->ip_summed = CHECKSUM_NONE;
+	if (pskb_trim_rcsum(skb, end - offset)) {
+		DEBUGP("Can't trim\n");
+		goto err;
 	}
 
 	/* Find out which fragments are in front and at the back of us
-- 
cgit 


From c5396a31b20991c856facbce18a2a56d1a14e8d0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 17 Jun 2006 22:48:48 -0700
Subject: [IPV6]: Sum real space for RTAs.

This patch fixes RTNLGRP_IPV6_IFINFO netlink notifications.  Issue
pointed out by Patrick McHardy <kaber@trash.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 445006ee4522..c2c26fa0943d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2860,6 +2860,11 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
 }
 
+/* Maximum length of ifa_cacheinfo attributes */
+#define INET6_IFADDR_RTA_SPACE \
+		RTA_SPACE(16) /* IFA_ADDRESS */ + \
+		RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */
+
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
@@ -3092,7 +3097,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
 static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
 
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb) {
@@ -3142,6 +3147,17 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
 #endif
 }
 
+/* Maximum length of ifinfomsg attributes */
+#define INET6_IFINFO_RTA_SPACE \
+		RTA_SPACE(IFNAMSIZ) /* IFNAME */ + \
+		RTA_SPACE(MAX_ADDR_LEN) /* ADDRESS */ +	\
+		RTA_SPACE(sizeof(u32)) /* MTU */ + \
+		RTA_SPACE(sizeof(int)) /* LINK */ + \
+		RTA_SPACE(0) /* PROTINFO */ + \
+		RTA_SPACE(sizeof(u32)) /* FLAGS */ + \
+		RTA_SPACE(sizeof(struct ifla_cacheinfo)) /* CACHEINFO */ + \
+		RTA_SPACE(sizeof(__s32[DEVCONF_MAX])) /* CONF */
+
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
@@ -3235,8 +3251,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
-	/* 128 bytes ?? */
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128);
+	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE);
 	
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb) {
@@ -3252,6 +3267,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
 }
 
+/* Maximum length of prefix_cacheinfo attributes */
+#define INET6_PREFIX_RTA_SPACE \
+		RTA_SPACE(sizeof(((struct prefix_info *)NULL)->prefix)) /* ADDRESS */ + \
+		RTA_SPACE(sizeof(struct prefix_cacheinfo)) /* CACHEINFO */
+
 static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
 			struct prefix_info *pinfo, u32 pid, u32 seq, 
 			int event, unsigned int flags)
@@ -3296,7 +3316,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 			 struct prefix_info *pinfo)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128);
+	int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE);
 
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb) {
-- 
cgit