From f6d8bd051c391c1c0458a30b2a7abcd939329259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Apr 2011 09:45:37 +0000 Subject: inet: add RCU protection to inet->opt We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 2391b24e8251..01fc40965848 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -36,7 +36,7 @@ * saddr is address of outgoing interface. */ -void ip_options_build(struct sk_buff * skb, struct ip_options * opt, +void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag) { unsigned char *iph = skb_network_header(skb); @@ -83,9 +83,9 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, * NOTE: dopt cannot point to skb. */ -int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) +int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) { - struct ip_options *sopt; + const struct ip_options *sopt; unsigned char *sptr, *dptr; int soffset, doffset; int optlen; @@ -95,10 +95,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) sopt = &(IPCB(skb)->opt); - if (sopt->optlen == 0) { - dopt->optlen = 0; + if (sopt->optlen == 0) return 0; - } sptr = skb_network_header(skb); dptr = dopt->__data; @@ -157,7 +155,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->optlen += optlen; } if (sopt->srr) { - unsigned char * start = sptr+sopt->srr; + unsigned char *start = sptr+sopt->srr; __be32 faddr; optlen = start[1]; @@ -499,19 +497,19 @@ void ip_options_undo(struct ip_options * opt) } } -static struct ip_options *ip_options_get_alloc(const int optlen) +static struct ip_options_rcu *ip_options_get_alloc(const int optlen) { - return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), + return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), GFP_KERNEL); } -static int ip_options_get_finish(struct net *net, struct ip_options **optp, - struct ip_options *opt, int optlen) +static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, + struct ip_options_rcu *opt, int optlen) { while (optlen & 3) - opt->__data[optlen++] = IPOPT_END; - opt->optlen = optlen; - if (optlen && ip_options_compile(net, opt, NULL)) { + opt->opt.__data[optlen++] = IPOPT_END; + opt->opt.optlen = optlen; + if (optlen && ip_options_compile(net, &opt->opt, NULL)) { kfree(opt); return -EINVAL; } @@ -520,29 +518,29 @@ static int ip_options_get_finish(struct net *net, struct ip_options **optp, return 0; } -int ip_options_get_from_user(struct net *net, struct ip_options **optp, +int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, unsigned char __user *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; - if (optlen && copy_from_user(opt->__data, data, optlen)) { + if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { kfree(opt); return -EFAULT; } return ip_options_get_finish(net, optp, opt, optlen); } -int ip_options_get(struct net *net, struct ip_options **optp, +int ip_options_get(struct net *net, struct ip_options_rcu **optp, unsigned char *data, int optlen) { - struct ip_options *opt = ip_options_get_alloc(optlen); + struct ip_options_rcu *opt = ip_options_get_alloc(optlen); if (!opt) return -ENOMEM; if (optlen) - memcpy(opt->__data, data, optlen); + memcpy(opt->opt.__data, data, optlen); return ip_options_get_finish(net, optp, opt, optlen); } -- cgit From 10949550bd1e50cc91c0f5085f7080a44b0871fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:26:57 -0400 Subject: ipv4: Kill spurious opt->srr check in ip_options_rcv_srr(). All call sites conditionalize the call to ip_options_rcv_srr() with a check of opt->srr, so no need to check it again there. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 01fc40965848..e82c304806bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -601,7 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned long orefdst; int err; - if (!opt->srr || !rt) + if (!rt) return 0; if (skb->pkt_type != PACKET_HOST) -- cgit From c30883bdff0b3544900a5c4aba18b8985436878f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:30:58 -0400 Subject: ipv4: Simplify iph->daddr overwrite in ip_options_rcv_srr(). We already copy the 4-byte nexthop from the options block into local variable "nexthop" for the route lookup. Re-use that variable instead of memcpy()'ing again when assigning to iph->daddr after the route lookup succeeds. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e82c304806bb..c5c26192b057 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -635,7 +635,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) if (rt2->rt_type != RTN_LOCAL) break; /* Superfast 8) loopback forward */ - memcpy(&iph->daddr, &optptr[srrptr-1], 4); + iph->daddr = nexthop; opt->is_changed = 1; } if (srrptr <= srrspace) { -- cgit From 0374d9ceb02eb12fcd65be9dd5df9c911ef93424 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 May 2011 17:15:50 -0400 Subject: ipv4: Kill spurious write to iph->daddr in ip_forward_options(). This code block executes when opt->srr_is_hit is set. It will be set only by ip_options_rcv_srr(). ip_options_rcv_srr() walks until it hits a matching nexthop in the SRR option addresses, and when it matches one 1) looks up the route for that nexthop and 2) on route lookup success it writes that nexthop value into iph->daddr. ip_forward_options() runs later, and again walks the SRR option addresses looking for the option matching the destination of the route stored in skb_rtable(). This route will be the same exact one looked up for the nexthop by ip_options_rcv_srr(). Therefore "rt->rt_dst == iph->daddr" must be true. All it really needs to do is record the route's source address in the matching SRR option adddress. It need not write iph->daddr again, since that has already been done by ip_options_rcv_srr() as detailed above. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c5c26192b057..c6474cd1cbfa 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -573,7 +573,6 @@ void ip_forward_options(struct sk_buff *skb) if (srrptr + 3 <= srrspace) { opt->is_changed = 1; ip_rt_get_source(&optptr[srrptr-1], rt); - ip_hdr(skb)->daddr = rt->rt_dst; optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); -- cgit From 8e36360ae876995e92d3a7538dda70548e64e685 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 May 2011 17:29:41 -0400 Subject: ipv4: Remove route key identity dependencies in ip_rt_get_source(). Pass in the sk_buff so that we can fetch the necessary keys from the packet header when working with input routes. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c6474cd1cbfa..89268baabc87 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -37,7 +37,7 @@ */ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, - __be32 daddr, struct rtable *rt, int is_frag) + __be32 daddr, struct rtable *rt, int is_frag) { unsigned char *iph = skb_network_header(skb); @@ -50,9 +50,9 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, if (!is_frag) { if (opt->rr_needaddr) - ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); + ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt); if (opt->ts_needaddr) - ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); + ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt); if (opt->ts_needtime) { struct timespec tv; __be32 midtime; @@ -553,7 +553,7 @@ void ip_forward_options(struct sk_buff *skb) if (opt->rr_needaddr) { optptr = (unsigned char *)raw + opt->rr; - ip_rt_get_source(&optptr[optptr[2]-5], rt); + ip_rt_get_source(&optptr[optptr[2]-5], skb, rt); opt->is_changed = 1; } if (opt->srr_is_hit) { @@ -572,13 +572,13 @@ void ip_forward_options(struct sk_buff *skb) } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; - ip_rt_get_source(&optptr[srrptr-1], rt); + ip_rt_get_source(&optptr[srrptr-1], skb, rt); optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); if (opt->ts_needaddr) { optptr = raw + opt->ts; - ip_rt_get_source(&optptr[optptr[2]-9], rt); + ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); opt->is_changed = 1; } } -- cgit From 7be799a70ba3dd90a59e8d2c72bbe06020005b3f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 May 2011 17:31:02 -0400 Subject: ipv4: Remove rt->rt_dst reference from ip_forward_options(). At this point iph->daddr equals what rt->rt_dst would hold. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_options.c') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 89268baabc87..c3118e1cd3bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -567,7 +567,7 @@ void ip_forward_options(struct sk_buff *skb) ) { if (srrptr + 3 > srrspace) break; - if (memcmp(&rt->rt_dst, &optptr[srrptr-1], 4) == 0) + if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) break; } if (srrptr + 3 <= srrspace) { -- cgit