diff options
Diffstat (limited to 'net/ipv6/raw.c')
| -rw-r--r-- | net/ipv6/raw.c | 678 |
1 files changed, 326 insertions, 352 deletions
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c45f7a5c36e9..b4cd05dba9b6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * RAW sockets for IPv6 * Linux INET6 implementation @@ -11,11 +12,6 @@ * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/errno.h> @@ -32,7 +28,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> #include <linux/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/net_namespace.h> @@ -63,44 +59,32 @@ #include <linux/seq_file.h> #include <linux/export.h> -static struct raw_hashinfo raw_v6_hashinfo = { - .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), -}; - -static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, - unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif) -{ - bool is_multicast = ipv6_addr_is_multicast(loc_addr); - - sk_for_each_from(sk) - if (inet_sk(sk)->inet_num == num) { - struct ipv6_pinfo *np = inet6_sk(sk); +#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ - if (!net_eq(sock_net(sk), net)) - continue; +struct raw_hashinfo raw_v6_hashinfo; +EXPORT_SYMBOL_GPL(raw_v6_hashinfo); - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) - continue; - - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) - continue; - - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) - goto found; - if (is_multicast && - inet6_mc_check(sk, loc_addr, rmt_addr)) - goto found; - continue; - } - goto found; - } - sk = NULL; -found: - return sk; +bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, + const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif, int sdif) +{ + if (inet_sk(sk)->inet_num != num || + !net_eq(sock_net(sk), net) || + (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || + !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, + dif, sdif)) + return false; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) || + (ipv6_addr_is_multicast(loc_addr) && + inet6_mc_check(sk, loc_addr, rmt_addr))) + return true; + + return false; } +EXPORT_SYMBOL_GPL(raw_v6_match); /* * 0 - deliver @@ -108,11 +92,14 @@ found: */ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *_hdr; + struct icmp6hdr _hdr; const struct icmp6hdr *hdr; + /* We require only the four bytes of the ICMPv6 header, not any + * additional bytes of message body in "struct icmp6hdr". + */ hdr = skb_header_pointer(skb, skb_transport_offset(skb), - sizeof(_hdr), &_hdr); + ICMPV6_HDRLEN, &_hdr); if (hdr) { const __u32 *data = &raw6_sk(sk)->filter.data[0]; unsigned int type = hdr->icmp6_type; @@ -153,29 +140,32 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); */ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { + struct net *net = dev_net(skb->dev); const struct in6_addr *saddr; const struct in6_addr *daddr; + struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; - struct net *net; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; - hash = nexthdr & (RAW_HTABLE_SIZE - 1); - - read_lock(&raw_v6_hashinfo.lock); - sk = sk_head(&raw_v6_hashinfo.ht[hash]); - - if (sk == NULL) - goto out; + hash = raw_hashfunc(net, nexthdr); + hlist = &raw_v6_hashinfo.ht[hash]; + rcu_read_lock(); + sk_for_each_rcu(sk, hlist) { + int filtered; - net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); + if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, + inet6_iif(skb), inet6_sdif(skb))) + continue; - while (sk) { - int filtered; + if (atomic_read(&sk->sk_rmem_alloc) >= + READ_ONCE(sk->sk_rcvbuf)) { + sk_drops_inc(sk); + continue; + } delivered = true; switch (nexthdr) { @@ -210,32 +200,22 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ - if (clone) { - nf_reset(clone); + if (clone) rawv6_rcv(sk, clone); - } } - sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - IP6CB(skb)->iif); } -out: - read_unlock(&raw_v6_hashinfo.lock); + rcu_read_unlock(); return delivered; } bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { - struct sock *raw_sk; - - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); - if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) - raw_sk = NULL; - - return raw_sk != NULL; + return ipv6_raw_deliver(skb, nexthdr); } /* This cleans up af_inet6 a bit. -DaveM */ -static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int rawv6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, + int addr_len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -246,6 +226,10 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&addr->sin6_addr); /* Raw sockets are IPv6 only */ @@ -275,7 +259,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) goto out_unlock; + } + if (sk->sk_bound_dev_if) { err = -ENODEV; dev = dev_get_by_index_rcu(sock_net(sk), sk->sk_bound_dev_if); @@ -287,7 +273,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; - if (!(addr_type & IPV6_ADDR_MULTICAST)) { + if (!(addr_type & IPV6_ADDR_MULTICAST) && + !ipv6_can_nonlocal_bind(sock_net(sk), inet)) { err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { @@ -297,7 +284,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; @@ -309,10 +296,9 @@ out: } static void rawv6_err(struct sock *sk, struct sk_buff *skb, - struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - struct inet_sock *inet = inet_sk(sk); + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -322,71 +308,69 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); - if (np->recverr) { + return; + } + if (recverr) { u8 *payload = skb->data; - if (!inet->hdrincl) + if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; - sk->sk_error_report(sk); + sk_error_report(sk); } } void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { + struct net *net = dev_net(skb->dev); + struct hlist_head *hlist; struct sock *sk; int hash; - const struct in6_addr *saddr, *daddr; - struct net *net; - - hash = nexthdr & (RAW_HTABLE_SIZE - 1); - read_lock(&raw_v6_hashinfo.lock); - sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk != NULL) { + hash = raw_hashfunc(net, nexthdr); + hlist = &raw_v6_hashinfo.ht[hash]; + rcu_read_lock(); + sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; - saddr = &ip6h->saddr; - daddr = &ip6h->daddr; - net = dev_net(skb->dev); - - while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - IP6CB(skb)->iif))) { - rawv6_err(sk, skb, NULL, type, code, - inner_offset, info); - sk = sk_next(sk); - } + + if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, + inet6_iif(skb), inet6_iif(skb))) + continue; + rawv6_err(sk, skb, type, code, inner_offset, info); } - read_unlock(&raw_v6_hashinfo.lock); + rcu_read_unlock(); } static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason reason; + if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { - atomic_inc(&sk->sk_drops); - kfree_skb(skb); + sk_drops_inc(sk); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } /* Charge it to the socket. */ skb_dst_drop(skb); - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } @@ -406,10 +390,11 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { - atomic_inc(&sk->sk_drops); - kfree_skb(skb); + sk_drops_inc(sk); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } + nf_reset_ct(skb); if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -428,10 +413,10 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) skb->len, inet->inet_num, 0)); - if (inet->hdrincl) { + if (inet_test_bit(HDRINCL, sk)) { if (skb_checksum_complete(skb)) { - atomic_inc(&sk->sk_drops); - kfree_skb(skb); + sk_drops_inc(sk); + sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } } @@ -446,12 +431,11 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) * we return it, otherwise we block. */ -static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) +static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct sk_buff *skb; size_t copied; int err; @@ -459,16 +443,13 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_OOB) return -EOPNOTSUPP; - if (addr_len) - *addr_len=sizeof(*sin6); - if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); - if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + if (np->rxopt.bits.rxpmtu && READ_ONCE(np->rxpmtu)) + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -479,13 +460,13 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, } if (skb_csum_unnecessary(skb)) { - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { - err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, 0, msg); if (err == -EINVAL) goto csum_copy_err; } @@ -499,10 +480,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - IP6CB(skb)->iif); + inet6_iif(skb)); + *addr_len = sizeof(*sin6); } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); @@ -529,6 +511,7 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -540,11 +523,15 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (!rp->checksum) goto send; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (!skb) goto out; offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); @@ -580,8 +567,11 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, } offset += skb_transport_offset(skb); - if (skb_copy_bits(skb, offset, &csum, 2)) - BUG(); + err = skb_copy_bits(skb, offset, &csum, 2); + if (err < 0) { + ip6_flush_pending_frames(sk); + goto out; + } /* in case cksum was not initialized */ if (unlikely(csum)) @@ -593,8 +583,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; - if (skb_store_bits(skb, offset, &csum, 2)) - BUG(); + BUG_ON(skb_store_bits(skb, offset, &csum, 2)); send: err = ip6_push_pending_frames(sk); @@ -602,15 +591,15 @@ out: return err; } -static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, +static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, - unsigned int flags) + unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; - struct rt6_info *rt = (struct rt6_info *)*dstp; + struct rt6_info *rt = dst_rt6_info(*dstp); int hlen = LL_RESERVED_SPACE(rt->dst.dev); int tlen = rt->dst.dev->needed_tailroom; @@ -618,20 +607,22 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct ipv6hdr)) + return -EINVAL; if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); - if (skb == NULL) + if (!skb) goto error; skb_reserve(skb, hlen); - skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; - skb_dst_set(skb, &rt->dst); - *dstp = NULL; + skb->protocol = htons(ETH_P_IPV6); + skb->priority = sockc->priority; + skb->mark = sockc->mark; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); @@ -639,97 +630,119 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb->ip_summed = CHECKSUM_NONE; + skb_setup_tx_timestamp(skb, sockc); + + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, from, 0, length); - if (err) - goto error_fault; + err = memcpy_from_msg(iph, msg, length); + if (err) { + err = -EFAULT; + kfree_skb(skb); + goto error; + } - IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + skb_dst_set(skb, &rt->dst); + *dstp = NULL; + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + + /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev + * in the error path. Since skb has been freed, the dst could + * have been queued for deletion. + */ + rcu_read_lock(); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); - if (err) - goto error; + if (err) { + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); + goto error_check; + } + rcu_read_unlock(); out: return 0; -error_fault: - err = -EFAULT; - kfree_skb(skb); error: - IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); - if (err == -ENOBUFS && !np->recverr) + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); +error_check: + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } -static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) +struct raw6_frag_vec { + struct msghdr *msg; + int hlen; + char c[4]; +}; + +static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) { - struct iovec *iov; - u8 __user *type = NULL; - u8 __user *code = NULL; - u8 len = 0; - int probed = 0; - int i; - - if (!msg->msg_iov) - return 0; + int err = 0; + switch (fl6->flowi6_proto) { + case IPPROTO_ICMPV6: + rfv->hlen = 2; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) { + fl6->fl6_icmp_type = rfv->c[0]; + fl6->fl6_icmp_code = rfv->c[1]; + } + break; + case IPPROTO_MH: + rfv->hlen = 4; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) + fl6->fl6_mh_type = rfv->c[2]; + } + return err; +} - for (i = 0; i < msg->msg_iovlen; i++) { - iov = &msg->msg_iov[i]; - if (!iov) - continue; +static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, + struct sk_buff *skb) +{ + struct raw6_frag_vec *rfv = from; - switch (fl6->flowi6_proto) { - case IPPROTO_ICMPV6: - /* check if one-byte field is readable or not. */ - if (iov->iov_base && iov->iov_len < 1) - break; - - if (!type) { - type = iov->iov_base; - /* check if code field is readable or not. */ - if (iov->iov_len > 1) - code = type + 1; - } else if (!code) - code = iov->iov_base; - - if (type && code) { - if (get_user(fl6->fl6_icmp_type, type) || - get_user(fl6->fl6_icmp_code, code)) - return -EFAULT; - probed = 1; - } - break; - case IPPROTO_MH: - if (iov->iov_base && iov->iov_len < 1) - break; - /* check if type field is readable or not. */ - if (iov->iov_len > 2 - len) { - u8 __user *p = iov->iov_base; - if (get_user(fl6->fl6_mh_type, &p[2 - len])) - return -EFAULT; - probed = 1; - } else - len += iov->iov_len; + if (offset < rfv->hlen) { + int copy = min(rfv->hlen - offset, len); - break; - default: - probed = 1; - break; - } - if (probed) - break; + if (skb->ip_summed == CHECKSUM_PARTIAL) + memcpy(to, rfv->c + offset, copy); + else + skb->csum = csum_block_add( + skb->csum, + csum_partial_copy_nocheck(rfv->c + offset, + to, copy), + odd); + + odd = 0; + offset += copy; + to += copy; + len -= copy; + + if (!len) + return 0; } - return 0; + + offset -= rfv->hlen; + + return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; - struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -737,11 +750,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; + struct raw6_frag_vec rfv; struct flowi6 fl6; + struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; - int hlimit = -1; - int tclass = -1; - int dontfrag = -1; + int hdrincl; u16 proto; int err; @@ -755,12 +768,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + hdrincl = inet_test_bit(HDRINCL, sk); + + ipcm6_init_sk(&ipc6, sk); + /* * Get and verify the address. */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ipc6.sockc.mark; + fl6.flowi6_uid = sk_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -774,20 +792,20 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!proto) proto = inet->inet_num; - else if (proto != inet->inet_num) + else if (proto != inet->inet_num && + inet->inet_num != IPPROTO_RAW) return -EINVAL; if (proto > 255) return -EINVAL; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (IS_ERR(flowlabel)) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -796,8 +814,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -808,7 +826,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EDESTADDRREQ; proto = inet->inet_num; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } @@ -819,31 +837,39 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); + ipc6.opt = opt; - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (IS_ERR(flowlabel)) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; - err = rawv6_probe_proto_opt(&fl6, msg); - if (err) - goto out; + fl6.flowi6_mark = ipc6.sockc.mark; + + if (!hdrincl) { + rfv.msg = msg; + rfv.hlen = 0; + err = rawv6_probe_proto_opt(&rfv, &fl6); + if (err) + goto out; + } if (!ipv6_addr_any(daddr)) fl6.daddr = *daddr; @@ -855,42 +881,37 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + if (hdrincl) + fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; + + fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + + dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } - - if (tclass < 0) - tclass = np->tclass; - - if (dontfrag < 0) - dontfrag = np->dontfrag; + if (ipc6.hlimit < 0) + ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: - if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); + if (hdrincl) + err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, + msg->msg_flags, &ipc6.sockc); else { + ipc6.opt = opt; lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, - len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, - msg->msg_flags, dontfrag); + err = ip6_append_data(sk, raw6_getfrag, &rfv, + len, 0, &ipc6, &fl6, dst_rt6_info(dst), + msg->msg_flags); if (err) ip6_flush_pending_frames(sk); @@ -902,23 +923,25 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); - return err<0?err:len; + txopt_put(opt_to_free); + return err < 0 ? err : len; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; } -static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, - char __user *optval, int optlen) +static int rawv6_seticmpfilter(struct sock *sk, int optname, + sockptr_t optval, int optlen) { switch (optname) { case ICMPV6_FILTER: if (optlen > sizeof(struct icmp6_filter)) optlen = sizeof(struct icmp6_filter); - if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen)) + if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; default: @@ -928,7 +951,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, return 0; } -static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, +static int rawv6_geticmpfilter(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int len; @@ -955,15 +978,23 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct raw6_sock *rp = raw6_sk(sk); int val; - if (get_user(val, (int __user *)optval)) + if (optlen < sizeof(val)) + return -EINVAL; + + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (optname) { + case IPV6_HDRINCL: + if (sk->sk_type != SOCK_RAW) + return -EINVAL; + inet_assign_bit(HDRINCL, sk, val); + return 0; case IPV6_CHECKSUM: if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { @@ -997,7 +1028,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, } static int rawv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { switch (level) { case SOL_RAW: @@ -1006,10 +1037,12 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; - return rawv6_seticmpfilter(sk, level, optname, optval, optlen); + return rawv6_seticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; + fallthrough; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } @@ -1017,38 +1050,19 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, return do_rawv6_setsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - switch (level) { - case SOL_RAW: - break; - case SOL_ICMPV6: - if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; - return rawv6_seticmpfilter(sk, level, optname, optval, optlen); - case SOL_IPV6: - if (optname == IPV6_CHECKSUM) - break; - default: - return compat_ipv6_setsockopt(sk, level, optname, - optval, optlen); - } - return do_rawv6_setsockopt(sk, level, optname, optval, optlen); -} -#endif - static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct raw6_sock *rp = raw6_sk(sk); int val, len; - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; switch (optname) { + case IPV6_HDRINCL: + val = inet_test_bit(HDRINCL, sk); + break; case IPV6_CHECKSUM: /* * We allow getsockopt() for IPPROTO_IPV6-level @@ -1069,7 +1083,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } @@ -1084,10 +1098,12 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; - return rawv6_geticmpfilter(sk, level, optname, optval, optlen); + return rawv6_geticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; + fallthrough; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -1095,52 +1111,29 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return do_rawv6_getsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - switch (level) { - case SOL_RAW: - break; - case SOL_ICMPV6: - if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; - return rawv6_geticmpfilter(sk, level, optname, optval, optlen); - case SOL_IPV6: - if (optname == IPV6_CHECKSUM) - break; - default: - return compat_ipv6_getsockopt(sk, level, optname, - optval, optlen); - } - return do_rawv6_getsockopt(sk, level, optname, optval, optlen); -} -#endif - -static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int rawv6_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); - - return put_user(amount, (int __user *)arg); + *karg = sk_wmem_alloc_get(sk); + return 0; } case SIOCINQ: { struct sk_buff *skb; - int amount = 0; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) - amount = skb_tail_pointer(skb) - - skb_transport_header(skb); + if (skb) + *karg = skb->len; + else + *karg = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); + return 0; } default: #ifdef CONFIG_IPV6_MROUTE - return ip6mr_ioctl(sk, cmd, (void __user *)arg); + return ip6mr_ioctl(sk, cmd, karg); #else return -ENOIOCTLCMD; #endif @@ -1177,14 +1170,13 @@ static void raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - - inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); + sk->sk_drop_counters = &rp->drop_counters; switch (inet_sk(sk)->inet_num) { case IPPROTO_ICMPV6: rp->checksum = 1; @@ -1205,8 +1197,8 @@ struct proto rawv6_prot = { .owner = THIS_MODULE, .close = rawv6_close, .destroy = raw6_destroy, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, + .connect = ip6_datagram_connect_v6_only, + .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, @@ -1218,12 +1210,14 @@ struct proto rawv6_prot = { .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), + .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6), + .useroffset = offsetof(struct raw6_sock, filter), + .usersize = sizeof_field(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_rawv6_setsockopt, - .compat_getsockopt = compat_rawv6_getsockopt, .compat_ioctl = compat_rawv6_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS @@ -1247,22 +1241,10 @@ static const struct seq_operations raw6_seq_ops = { .show = raw6_seq_show, }; -static int raw6_seq_open(struct inode *inode, struct file *file) -{ - return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops); -} - -static const struct file_operations raw6_seq_fops = { - .owner = THIS_MODULE, - .open = raw6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - static int __net_init raw6_init_net(struct net *net) { - if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops)) + if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops, + sizeof(struct raw_iter_state), &raw_v6_hashinfo)) return -ENOMEM; return 0; @@ -1290,7 +1272,7 @@ void raw6_proc_exit(void) #endif /* CONFIG_PROC_FS */ /* Same as inet6_dgram_ops, sans udp_poll. */ -static const struct proto_ops inet6_sockraw_ops = { +const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -1301,6 +1283,7 @@ static const struct proto_ops inet6_sockraw_ops = { .getname = inet6_getname, .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ + .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ @@ -1308,10 +1291,8 @@ static const struct proto_ops inet6_sockraw_ops = { .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet6_compat_ioctl, #endif }; @@ -1320,19 +1301,12 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; int __init rawv6_init(void) { - int ret; - - ret = inet6_register_protosw(&rawv6_protosw); - if (ret) - goto out; -out: - return ret; + return inet6_register_protosw(&rawv6_protosw); } void rawv6_exit(void) |
