diff options
Diffstat (limited to 'net/ipv4/ip_sockglue.c')
| -rw-r--r-- | net/ipv4/ip_sockglue.c | 1294 |
1 files changed, 623 insertions, 671 deletions
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 84ec3703c909..6d9c5c20b1c4 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -47,8 +47,6 @@ #include <linux/errqueue.h> #include <linux/uaccess.h> -#include <linux/bpfilter.h> - /* * SOL_IP control messages. */ @@ -130,20 +128,20 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { - char *secdata; - u32 seclen, secid; + struct lsm_context ctx; + u32 secid; int err; err = security_socket_getpeersec_dgram(NULL, skb, &secid); if (err) return; - err = security_secid_to_secctx(secid, &secdata, &seclen); - if (err) + err = security_secid_to_secctx(secid, &ctx); + if (err < 0) return; - put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); - security_release_secctx(secdata, seclen); + put_cmsg(msg, SOL_IP, SCM_SECURITY, ctx.len, ctx.context); + security_release_secctx(&ctx); } static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) @@ -171,8 +169,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset) { - struct inet_sock *inet = inet_sk(sk); - unsigned int flags = inet->cmsg_flags; + unsigned long flags = inet_cmsg_flags(inet_sk(sk)); + + if (!flags) + return; /* Ordered by supposed usage frequency */ if (flags & IP_CMSG_PKTINFO) { @@ -267,7 +267,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, } #endif if (cmsg->cmsg_level == SOL_SOCKET) { - err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); + err = __sock_cmsg_send(sk, cmsg, &ipc->sockc); if (err) return err; continue; @@ -280,7 +280,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, err = cmsg->cmsg_len - sizeof(struct cmsghdr); /* Our caller is responsible for freeing ipc->opt */ - err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), + err = ip_options_get(net, &ipc->opt, + KERNEL_SOCKPTR(CMSG_DATA(cmsg)), err < 40 ? err : 40); if (err) return err; @@ -314,9 +315,16 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; - ipc->priority = rt_tos2priority(ipc->tos); + ipc->sockc.priority = rt_tos2priority(ipc->tos); + break; + case IP_PROTOCOL: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 1 || val > 255) + return -EINVAL; + ipc->protocol = val; break; - default: return -EINVAL; } @@ -389,6 +397,18 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } +static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb, + struct sock_ee_data_rfc4884 *out) +{ + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr), + icmp_hdr(skb)->un.reserved[1] * 4); + } +} + void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { @@ -411,21 +431,24 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, serr->port = port; if (skb_pull(skb, payload - skb->data)) { + if (inet_test_bit(RECVERR_RFC4884, sk)) + ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); + skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb) == 0) return; } kfree_skb(skb); } +EXPORT_SYMBOL_GPL(ip_icmp_error); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) { - struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; struct iphdr *iph; struct sk_buff *skb; - if (!inet->recverr) + if (!inet_test_bit(RECVERR, sk)) return; skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); @@ -486,7 +509,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk, * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). */ info = PKTINFO_SKB_CB(skb); - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || + if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) || !info->ipi_ifindex) return false; @@ -544,7 +567,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - if (inet_sk(sk)->cmsg_flags) + if (inet_cmsg_flags(inet_sk(sk))) ip_cmsg_recv(msg, skb); } @@ -560,40 +583,38 @@ out: return err; } -static void __ip_sock_set_tos(struct sock *sk, int val) +void __ip_sock_set_tos(struct sock *sk, int val) { + u8 old_tos = inet_sk(sk)->tos; + if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; - val |= inet_sk(sk)->tos & INET_ECN_MASK; + val |= old_tos & INET_ECN_MASK; } - if (inet_sk(sk)->tos != val) { - inet_sk(sk)->tos = val; - sk->sk_priority = rt_tos2priority(val); + if (old_tos != val) { + WRITE_ONCE(inet_sk(sk)->tos, val); + WRITE_ONCE(sk->sk_priority, rt_tos2priority(val)); sk_dst_reset(sk); } } void ip_sock_set_tos(struct sock *sk, int val) { - lock_sock(sk); + sockopt_lock_sock(sk); __ip_sock_set_tos(sk, val); - release_sock(sk); + sockopt_release_sock(sk); } EXPORT_SYMBOL(ip_sock_set_tos); void ip_sock_set_freebind(struct sock *sk) { - lock_sock(sk); - inet_sk(sk)->freebind = true; - release_sock(sk); + inet_set_bit(FREEBIND, sk); } EXPORT_SYMBOL(ip_sock_set_freebind); void ip_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet_sk(sk)->recverr = true; - release_sock(sk); + inet_set_bit(RECVERR, sk); } EXPORT_SYMBOL(ip_sock_set_recverr); @@ -601,18 +622,14 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val) { if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) return -EINVAL; - lock_sock(sk); - inet_sk(sk)->pmtudisc = val; - release_sock(sk); + WRITE_ONCE(inet_sk(sk)->pmtudisc, val); return 0; } EXPORT_SYMBOL(ip_sock_set_mtu_discover); void ip_sock_set_pktinfo(struct sock *sk) { - lock_sock(sk); - inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO; - release_sock(sk); + inet_set_bit(PKTINFO, sk); } EXPORT_SYMBOL(ip_sock_set_pktinfo); @@ -647,12 +664,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, struct sockaddr_storage *group, struct sockaddr_storage *list) { - int msize = IP_MSFILTER_SIZE(numsrc); struct ip_msfilter *msf; struct sockaddr_in *psin; int err, i; - msf = kmalloc(msize, GFP_KERNEL); + msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL); if (!msf) return -ENOBUFS; @@ -668,7 +684,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, if (psin->sin_family != AF_INET) goto Eaddrnotavail; - msf->imsf_slist[i] = psin->sin_addr.s_addr; + msf->imsf_slist_flex[i] = psin->sin_addr.s_addr; } err = ip_mc_msfilter(sk, msf, ifindex); kfree(msf); @@ -679,20 +695,48 @@ Eaddrnotavail: return -EADDRNOTAVAIL; } +static int copy_group_source_from_sockptr(struct group_source_req *greqs, + sockptr_t optval, int optlen) +{ + if (in_compat_syscall()) { + struct compat_group_source_req gr32; + + if (optlen != sizeof(gr32)) + return -EINVAL; + if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) + return -EFAULT; + greqs->gsr_interface = gr32.gsr_interface; + greqs->gsr_group = gr32.gsr_group; + greqs->gsr_source = gr32.gsr_source; + } else { + if (optlen != sizeof(*greqs)) + return -EINVAL; + if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) + return -EFAULT; + } + + return 0; +} + static int do_mcast_group_source(struct sock *sk, int optname, - struct group_source_req *greqs) + sockptr_t optval, int optlen) { + struct group_source_req greqs; struct ip_mreq_source mreqs; struct sockaddr_in *psin; int omode, add, err; - if (greqs->gsr_group.ss_family != AF_INET || - greqs->gsr_source.ss_family != AF_INET) + err = copy_group_source_from_sockptr(&greqs, optval, optlen); + if (err) + return err; + + if (greqs.gsr_group.ss_family != AF_INET || + greqs.gsr_source.ss_family != AF_INET) return -EADDRNOTAVAIL; - psin = (struct sockaddr_in *)&greqs->gsr_group; + psin = (struct sockaddr_in *)&greqs.gsr_group; mreqs.imr_multiaddr = psin->sin_addr.s_addr; - psin = (struct sockaddr_in *)&greqs->gsr_source; + psin = (struct sockaddr_in *)&greqs.gsr_source; mreqs.imr_sourceaddr = psin->sin_addr.s_addr; mreqs.imr_interface = 0; /* use index for mc_source */ @@ -705,29 +749,152 @@ static int do_mcast_group_source(struct sock *sk, int optname, } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct ip_mreqn mreq; - psin = (struct sockaddr_in *)&greqs->gsr_group; + psin = (struct sockaddr_in *)&greqs.gsr_group; mreq.imr_multiaddr = psin->sin_addr; mreq.imr_address.s_addr = 0; - mreq.imr_ifindex = greqs->gsr_interface; + mreq.imr_ifindex = greqs.gsr_interface; err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); if (err && err != -EADDRINUSE) return err; - greqs->gsr_interface = mreq.imr_ifindex; + greqs.gsr_interface = mreq.imr_ifindex; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } - return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); + return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); +} + +static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) +{ + struct group_filter *gsf = NULL; + int err; + + if (optlen < GROUP_FILTER_SIZE(0)) + return -EINVAL; + if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) + return -ENOBUFS; + + gsf = memdup_sockptr(optval, optlen); + if (IS_ERR(gsf)) + return PTR_ERR(gsf); + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + err = -ENOBUFS; + if (gsf->gf_numsrc >= 0x1ffffff || + gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) + goto out_free_gsf; + + err = -EINVAL; + if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) + goto out_free_gsf; + + err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, + gsf->gf_fmode, &gsf->gf_group, + gsf->gf_slist_flex); +out_free_gsf: + kfree(gsf); + return err; +} + +static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, + int optlen) +{ + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); + struct compat_group_filter *gf32; + unsigned int n; + void *p; + int err; + + if (optlen < size0) + return -EINVAL; + if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ + + err = -EFAULT; + if (copy_from_sockptr(gf32, optval, optlen)) + goto out_free_gsf; + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + n = gf32->gf_numsrc; + err = -ENOBUFS; + if (n >= 0x1ffffff) + goto out_free_gsf; + + err = -EINVAL; + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) + goto out_free_gsf; + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + err = -ENOBUFS; + if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) + goto out_free_gsf; + err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, + &gf32->gf_group, gf32->gf_slist_flex); +out_free_gsf: + kfree(p); + return err; +} + +static int ip_mcast_join_leave(struct sock *sk, int optname, + sockptr_t optval, int optlen) +{ + struct ip_mreqn mreq = { }; + struct sockaddr_in *psin; + struct group_req greq; + + if (optlen < sizeof(struct group_req)) + return -EINVAL; + if (copy_from_sockptr(&greq, optval, sizeof(greq))) + return -EFAULT; + + psin = (struct sockaddr_in *)&greq.gr_group; + if (psin->sin_family != AF_INET) + return -EINVAL; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + if (optname == MCAST_JOIN_GROUP) + return ip_mc_join_group(sk, &mreq); + return ip_mc_leave_group(sk, &mreq); +} + +static int compat_ip_mcast_join_leave(struct sock *sk, int optname, + sockptr_t optval, int optlen) +{ + struct compat_group_req greq; + struct ip_mreqn mreq = { }; + struct sockaddr_in *psin; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + if (copy_from_sockptr(&greq, optval, sizeof(greq))) + return -EFAULT; + + psin = (struct sockaddr_in *)&greq.gr_group; + if (psin->sin_family != AF_INET) + return -EINVAL; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + + if (optname == MCAST_JOIN_GROUP) + return ip_mc_join_group(sk, &mreq); + return ip_mc_leave_group(sk, &mreq); } -static int do_ip_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +DEFINE_STATIC_KEY_FALSE(ip4_min_ttl); + +int do_ip_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); - int val = 0, err; + int val = 0, err, retv; bool needs_rtnl = setsockopt_needs_rtnl(optname); switch (optname) { @@ -755,13 +922,15 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_RECVORIGDSTADDR: case IP_CHECKSUM: case IP_RECVFRAGSIZE: + case IP_RECVERR_RFC4884: + case IP_LOCAL_PORT_RANGE: if (optlen >= sizeof(int)) { - if (get_user(val, (int __user *) optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; } else if (optlen >= sizeof(char)) { unsigned char ucval; - if (get_user(ucval, (unsigned char __user *) optval)) + if (copy_from_sockptr(&ucval, optval, sizeof(ucval))) return -EFAULT; val = (int) ucval; } @@ -769,15 +938,144 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ - if (optname == IP_ROUTER_ALERT) - return ip_ra_control(sk, val ? 1 : 0, NULL); + if (optname == IP_ROUTER_ALERT) { + retv = ip_ra_control(sk, val ? 1 : 0, NULL); + if (retv == 0) + inet_assign_bit(RTALERT, sk, val); + return retv; + } if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); + /* Handle options that can be set without locking the socket. */ + switch (optname) { + case IP_PKTINFO: + inet_assign_bit(PKTINFO, sk, val); + return 0; + case IP_RECVTTL: + inet_assign_bit(TTL, sk, val); + return 0; + case IP_RECVTOS: + inet_assign_bit(TOS, sk, val); + return 0; + case IP_RECVOPTS: + inet_assign_bit(RECVOPTS, sk, val); + return 0; + case IP_RETOPTS: + inet_assign_bit(RETOPTS, sk, val); + return 0; + case IP_PASSSEC: + inet_assign_bit(PASSSEC, sk, val); + return 0; + case IP_RECVORIGDSTADDR: + inet_assign_bit(ORIGDSTADDR, sk, val); + return 0; + case IP_RECVFRAGSIZE: + if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) + return -EINVAL; + inet_assign_bit(RECVFRAGSIZE, sk, val); + return 0; + case IP_RECVERR: + inet_assign_bit(RECVERR, sk, val); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; + case IP_RECVERR_RFC4884: + if (val < 0 || val > 1) + return -EINVAL; + inet_assign_bit(RECVERR_RFC4884, sk, val); + return 0; + case IP_FREEBIND: + if (optlen < 1) + return -EINVAL; + inet_assign_bit(FREEBIND, sk, val); + return 0; + case IP_HDRINCL: + if (sk->sk_type != SOCK_RAW) + return -ENOPROTOOPT; + inet_assign_bit(HDRINCL, sk, val); + return 0; + case IP_MULTICAST_LOOP: + if (optlen < 1) + return -EINVAL; + inet_assign_bit(MC_LOOP, sk, val); + return 0; + case IP_MULTICAST_ALL: + if (optlen < 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + inet_assign_bit(MC_ALL, sk, val); + return 0; + case IP_TRANSPARENT: + if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (optlen < 1) + return -EINVAL; + inet_assign_bit(TRANSPARENT, sk, val); + return 0; + case IP_NODEFRAG: + if (sk->sk_type != SOCK_RAW) + return -ENOPROTOOPT; + inet_assign_bit(NODEFRAG, sk, val); + return 0; + case IP_BIND_ADDRESS_NO_PORT: + inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val); + return 0; + case IP_TTL: + if (optlen < 1) + return -EINVAL; + if (val != -1 && (val < 1 || val > 255)) + return -EINVAL; + WRITE_ONCE(inet->uc_ttl, val); + return 0; + case IP_MINTTL: + if (optlen < 1) + return -EINVAL; + if (val < 0 || val > 255) + return -EINVAL; + + if (val) + static_branch_enable(&ip4_min_ttl); + + WRITE_ONCE(inet->min_ttl, val); + return 0; + case IP_MULTICAST_TTL: + if (sk->sk_type == SOCK_STREAM) + return -EINVAL; + if (optlen < 1) + return -EINVAL; + if (val == -1) + val = 1; + if (val < 0 || val > 255) + return -EINVAL; + WRITE_ONCE(inet->mc_ttl, val); + return 0; + case IP_MTU_DISCOVER: + return ip_sock_set_mtu_discover(sk, val); + case IP_TOS: /* This sets both TOS and Precedence */ + ip_sock_set_tos(sk, val); + return 0; + case IP_LOCAL_PORT_RANGE: + { + u16 lo = val; + u16 hi = val >> 16; + + if (optlen != sizeof(u32)) + return -EINVAL; + if (lo != 0 && hi != 0 && lo > hi) + return -EINVAL; + + WRITE_ONCE(inet->local_port_range, val); + return 0; + } + } + err = 0; if (needs_rtnl) rtnl_lock(); - lock_sock(sk); + sockopt_lock_sock(sk); switch (optname) { case IP_OPTIONS: @@ -786,13 +1084,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optlen > 40) goto e_inval; - err = ip_options_get_from_user(sock_net(sk), &opt, - optval, optlen); + err = ip_options_get(sock_net(sk), &opt, optval, optlen); if (err) break; old = rcu_dereference_protected(inet->inet_opt, lockdep_sock_is_held(sk)); - if (inet->is_icsk) { + if (inet_test_bit(IS_ICSK, sk)) { struct inet_connection_sock *icsk = inet_csk(sk); #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET || @@ -814,122 +1111,19 @@ static int do_ip_setsockopt(struct sock *sk, int level, kfree_rcu(old, rcu); break; } - case IP_PKTINFO: - if (val) - inet->cmsg_flags |= IP_CMSG_PKTINFO; - else - inet->cmsg_flags &= ~IP_CMSG_PKTINFO; - break; - case IP_RECVTTL: - if (val) - inet->cmsg_flags |= IP_CMSG_TTL; - else - inet->cmsg_flags &= ~IP_CMSG_TTL; - break; - case IP_RECVTOS: - if (val) - inet->cmsg_flags |= IP_CMSG_TOS; - else - inet->cmsg_flags &= ~IP_CMSG_TOS; - break; - case IP_RECVOPTS: - if (val) - inet->cmsg_flags |= IP_CMSG_RECVOPTS; - else - inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; - break; - case IP_RETOPTS: - if (val) - inet->cmsg_flags |= IP_CMSG_RETOPTS; - else - inet->cmsg_flags &= ~IP_CMSG_RETOPTS; - break; - case IP_PASSSEC: - if (val) - inet->cmsg_flags |= IP_CMSG_PASSSEC; - else - inet->cmsg_flags &= ~IP_CMSG_PASSSEC; - break; - case IP_RECVORIGDSTADDR: - if (val) - inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; - else - inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; - break; case IP_CHECKSUM: if (val) { - if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { + if (!(inet_test_bit(CHECKSUM, sk))) { inet_inc_convert_csum(sk); - inet->cmsg_flags |= IP_CMSG_CHECKSUM; + inet_set_bit(CHECKSUM, sk); } } else { - if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { + if (inet_test_bit(CHECKSUM, sk)) { inet_dec_convert_csum(sk); - inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; + inet_clear_bit(CHECKSUM, sk); } } break; - case IP_RECVFRAGSIZE: - if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) - goto e_inval; - if (val) - inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; - else - inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; - break; - case IP_TOS: /* This sets both TOS and Precedence */ - __ip_sock_set_tos(sk, val); - break; - case IP_TTL: - if (optlen < 1) - goto e_inval; - if (val != -1 && (val < 1 || val > 255)) - goto e_inval; - inet->uc_ttl = val; - break; - case IP_HDRINCL: - if (sk->sk_type != SOCK_RAW) { - err = -ENOPROTOOPT; - break; - } - inet->hdrincl = val ? 1 : 0; - break; - case IP_NODEFRAG: - if (sk->sk_type != SOCK_RAW) { - err = -ENOPROTOOPT; - break; - } - inet->nodefrag = val ? 1 : 0; - break; - case IP_BIND_ADDRESS_NO_PORT: - inet->bind_address_no_port = val ? 1 : 0; - break; - case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) - goto e_inval; - inet->pmtudisc = val; - break; - case IP_RECVERR: - inet->recverr = !!val; - if (!val) - skb_queue_purge(&sk->sk_error_queue); - break; - case IP_MULTICAST_TTL: - if (sk->sk_type == SOCK_STREAM) - goto e_inval; - if (optlen < 1) - goto e_inval; - if (val == -1) - val = 1; - if (val < 0 || val > 255) - goto e_inval; - inet->mc_ttl = val; - break; - case IP_MULTICAST_LOOP: - if (optlen < 1) - goto e_inval; - inet->mc_loop = !!val; - break; case IP_UNICAST_IF: { struct net_device *dev = NULL; @@ -941,7 +1135,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, ifindex = (__force int)ntohl((__force __be32)val); if (ifindex == 0) { - inet->uc_index = 0; + WRITE_ONCE(inet->uc_index, 0); err = 0; break; } @@ -955,11 +1149,10 @@ static int do_ip_setsockopt(struct sock *sk, int level, dev_put(dev); err = -EINVAL; - if (sk->sk_bound_dev_if && - (!midx || midx != sk->sk_bound_dev_if)) + if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if) break; - inet->uc_index = ifindex; + WRITE_ONCE(inet->uc_index, ifindex); err = 0; break; } @@ -980,25 +1173,25 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq, optval, sizeof(mreq))) + if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); if (optlen >= sizeof(struct ip_mreq)) { - if (copy_from_user(&mreq, optval, - sizeof(struct ip_mreq))) + if (copy_from_sockptr(&mreq, optval, + sizeof(struct ip_mreq))) break; } else if (optlen >= sizeof(struct in_addr)) { - if (copy_from_user(&mreq.imr_address, optval, - sizeof(struct in_addr))) + if (copy_from_sockptr(&mreq.imr_address, optval, + sizeof(struct in_addr))) break; } } if (!mreq.imr_ifindex) { if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { - inet->mc_index = 0; - inet->mc_addr = 0; + WRITE_ONCE(inet->mc_index, 0); + WRITE_ONCE(inet->mc_addr, 0); err = 0; break; } @@ -1020,11 +1213,11 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EINVAL; if (sk->sk_bound_dev_if && mreq.imr_ifindex != sk->sk_bound_dev_if && - (!midx || midx != sk->sk_bound_dev_if)) + midx != sk->sk_bound_dev_if) break; - inet->mc_index = mreq.imr_ifindex; - inet->mc_addr = mreq.imr_address.s_addr; + WRITE_ONCE(inet->mc_index, mreq.imr_ifindex); + WRITE_ONCE(inet->mc_addr, mreq.imr_address.s_addr); err = 0; break; } @@ -1035,18 +1228,19 @@ static int do_ip_setsockopt(struct sock *sk, int level, struct ip_mreqn mreq; err = -EPROTO; - if (inet_sk(sk)->is_icsk) + if (inet_test_bit(IS_ICSK, sk)) break; if (optlen < sizeof(struct ip_mreq)) goto e_inval; err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq, optval, sizeof(mreq))) + if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); - if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) + if (copy_from_sockptr(&mreq, optval, + sizeof(struct ip_mreq))) break; } @@ -1062,18 +1256,18 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optlen < IP_MSFILTER_SIZE(0)) goto e_inval; - if (optlen > sysctl_optmem_max) { + if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) { err = -ENOBUFS; break; } - msf = memdup_user(optval, optlen); + msf = memdup_sockptr(optval, optlen); if (IS_ERR(msf)) { err = PTR_ERR(msf); break; } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { + msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { kfree(msf); err = -ENOBUFS; break; @@ -1097,7 +1291,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optlen != sizeof(struct ip_mreq_source)) goto e_inval; - if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { + if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) { err = -EFAULT; break; } @@ -1127,129 +1321,43 @@ static int do_ip_setsockopt(struct sock *sk, int level, } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - { - struct group_req greq; - struct sockaddr_in *psin; - struct ip_mreqn mreq; - - if (optlen < sizeof(struct group_req)) - goto e_inval; - err = -EFAULT; - if (copy_from_user(&greq, optval, sizeof(greq))) - break; - psin = (struct sockaddr_in *)&greq.gr_group; - if (psin->sin_family != AF_INET) - goto e_inval; - memset(&mreq, 0, sizeof(mreq)); - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_ifindex = greq.gr_interface; - - if (optname == MCAST_JOIN_GROUP) - err = ip_mc_join_group(sk, &mreq); + if (in_compat_syscall()) + err = compat_ip_mcast_join_leave(sk, optname, optval, + optlen); else - err = ip_mc_leave_group(sk, &mreq); + err = ip_mcast_join_leave(sk, optname, optval, optlen); break; - } case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: - { - struct group_source_req greqs; - - if (optlen != sizeof(struct group_source_req)) - goto e_inval; - if (copy_from_user(&greqs, optval, sizeof(greqs))) { - err = -EFAULT; - break; - } - err = do_mcast_group_source(sk, optname, &greqs); + err = do_mcast_group_source(sk, optname, optval, optlen); break; - } case MCAST_MSFILTER: - { - struct group_filter *gsf = NULL; - - if (optlen < GROUP_FILTER_SIZE(0)) - goto e_inval; - if (optlen > sysctl_optmem_max) { - err = -ENOBUFS; - break; - } - gsf = memdup_user(optval, optlen); - if (IS_ERR(gsf)) { - err = PTR_ERR(gsf); - break; - } - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { - err = -ENOBUFS; - goto mc_msf_out; - } - if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { - err = -EINVAL; - goto mc_msf_out; - } - err = set_mcast_msfilter(sk, gsf->gf_interface, - gsf->gf_numsrc, gsf->gf_fmode, - &gsf->gf_group, gsf->gf_slist); -mc_msf_out: - kfree(gsf); - break; - } - case IP_MULTICAST_ALL: - if (optlen < 1) - goto e_inval; - if (val != 0 && val != 1) - goto e_inval; - inet->mc_all = val; - break; - - case IP_FREEBIND: - if (optlen < 1) - goto e_inval; - inet->freebind = !!val; + if (in_compat_syscall()) + err = compat_ip_set_mcast_msfilter(sk, optval, optlen); + else + err = ip_set_mcast_msfilter(sk, optval, optlen); break; - case IP_IPSEC_POLICY: case IP_XFRM_POLICY: err = -EPERM; - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) break; err = xfrm_user_policy(sk, optname, optval, optlen); break; - case IP_TRANSPARENT: - if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { - err = -EPERM; - break; - } - if (optlen < 1) - goto e_inval; - inet->transparent = !!val; - break; - - case IP_MINTTL: - if (optlen < 1) - goto e_inval; - if (val < 0 || val > 255) - goto e_inval; - inet->min_ttl = val; - break; - default: err = -ENOPROTOOPT; break; } - release_sock(sk); + sockopt_release_sock(sk); if (needs_rtnl) rtnl_unlock(); return err; e_inval: - release_sock(sk); + sockopt_release_sock(sk); if (needs_rtnl) rtnl_unlock(); return -EINVAL; @@ -1259,15 +1367,16 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || + bool prepare = inet_test_bit(PKTINFO, sk) || ipv6_sk_rxinfo(sk); if (prepare && skb_rtable(skb)) { @@ -1293,11 +1402,12 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } -int ip_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { int err; @@ -1305,11 +1415,6 @@ int ip_setsockopt(struct sock *sk, int level, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_SET_REPLACE && - optname < BPFILTER_IPT_SET_MAX) - err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); -#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && @@ -1322,138 +1427,6 @@ int ip_setsockopt(struct sock *sk, int level, } EXPORT_SYMBOL(ip_setsockopt); -#ifdef CONFIG_COMPAT -int compat_ip_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - int err; - - if (level != SOL_IP) - return -ENOPROTOOPT; - - switch (optname) { - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - { - struct compat_group_req __user *gr32 = (void __user *)optval; - struct group_req greq; - struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; - struct ip_mreqn mreq; - - if (optlen < sizeof(struct compat_group_req)) - return -EINVAL; - - if (get_user(greq.gr_interface, &gr32->gr_interface) || - copy_from_user(&greq.gr_group, &gr32->gr_group, - sizeof(greq.gr_group))) - return -EFAULT; - - if (psin->sin_family != AF_INET) - return -EINVAL; - - memset(&mreq, 0, sizeof(mreq)); - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_ifindex = greq.gr_interface; - - rtnl_lock(); - lock_sock(sk); - if (optname == MCAST_JOIN_GROUP) - err = ip_mc_join_group(sk, &mreq); - else - err = ip_mc_leave_group(sk, &mreq); - release_sock(sk); - rtnl_unlock(); - return err; - } - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - { - struct compat_group_source_req __user *gsr32 = (void __user *)optval; - struct group_source_req greqs; - - if (optlen != sizeof(struct compat_group_source_req)) - return -EINVAL; - - if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || - copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, - sizeof(greqs.gsr_group)) || - copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, - sizeof(greqs.gsr_source))) - return -EFAULT; - - rtnl_lock(); - lock_sock(sk); - err = do_mcast_group_source(sk, optname, &greqs); - release_sock(sk); - rtnl_unlock(); - return err; - } - case MCAST_MSFILTER: - { - const int size0 = offsetof(struct compat_group_filter, gf_slist); - struct compat_group_filter *gf32; - unsigned int n; - void *p; - - if (optlen < size0) - return -EINVAL; - if (optlen > sysctl_optmem_max - 4) - return -ENOBUFS; - - p = kmalloc(optlen + 4, GFP_KERNEL); - if (!p) - return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ - if (copy_from_user(gf32, optval, optlen)) { - err = -EFAULT; - goto mc_msf_out; - } - - n = gf32->gf_numsrc; - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (n >= 0x1ffffff) { - err = -ENOBUFS; - goto mc_msf_out; - } - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { - err = -EINVAL; - goto mc_msf_out; - } - - rtnl_lock(); - lock_sock(sk); - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) - err = -ENOBUFS; - else - err = set_mcast_msfilter(sk, gf32->gf_interface, - n, gf32->gf_fmode, - &gf32->gf_group, gf32->gf_slist); - release_sock(sk); - rtnl_unlock(); -mc_msf_out: - kfree(p); - return err; - } - } - - err = do_ip_setsockopt(sk, level, optname, optval, optlen); -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_HDRINCL && - optname != IP_IPSEC_POLICY && - optname != IP_XFRM_POLICY && - !ip_mroute_opt(optname)) - err = compat_nf_setsockopt(sk, PF_INET, optname, optval, - optlen); -#endif - return err; -} -EXPORT_SYMBOL(compat_ip_setsockopt); -#endif - /* * Get the options. Note for future reference. The GET of IP options gets * the _received_ ones. The set sets the _sent_ ones. @@ -1469,8 +1442,70 @@ static bool getsockopt_needs_rtnl(int optname) return false; } -static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen, unsigned int flags) +static int ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval, + sockptr_t optlen, int len) +{ + const int size0 = offsetof(struct group_filter, gf_slist_flex); + struct group_filter gsf; + int num, gsf_size; + int err; + + if (len < size0) + return -EINVAL; + if (copy_from_sockptr(&gsf, optval, size0)) + return -EFAULT; + + num = gsf.gf_numsrc; + err = ip_mc_gsfget(sk, &gsf, optval, + offsetof(struct group_filter, gf_slist_flex)); + if (err) + return err; + if (gsf.gf_numsrc < num) + num = gsf.gf_numsrc; + gsf_size = GROUP_FILTER_SIZE(num); + if (copy_to_sockptr(optlen, &gsf_size, sizeof(int)) || + copy_to_sockptr(optval, &gsf, size0)) + return -EFAULT; + return 0; +} + +static int compat_ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval, + sockptr_t optlen, int len) +{ + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); + struct compat_group_filter gf32; + struct group_filter gf; + int num; + int err; + + if (len < size0) + return -EINVAL; + if (copy_from_sockptr(&gf32, optval, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + err = ip_mc_gsfget(sk, &gf, optval, + offsetof(struct compat_group_filter, gf_slist_flex)); + if (err) + return err; + if (gf.gf_numsrc < num) + num = gf.gf_numsrc; + len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); + if (copy_to_sockptr(optlen, &len, sizeof(int)) || + copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode), + &gf.gf_fmode, sizeof(gf.gf_fmode)) || + copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc), + &gf.gf_numsrc, sizeof(gf.gf_numsrc))) + return -EFAULT; + return 0; +} + +int do_ip_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen) { struct inet_sock *inet = inet_sk(sk); bool needs_rtnl = getsockopt_needs_rtnl(optname); @@ -1483,93 +1518,116 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (ip_mroute_opt(optname)) return ip_mroute_getsockopt(sk, optname, optval, optlen); - if (get_user(len, optlen)) + if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0) return -EINVAL; - if (needs_rtnl) - rtnl_lock(); - lock_sock(sk); - + /* Handle options that can be read without locking the socket. */ switch (optname) { + case IP_PKTINFO: + val = inet_test_bit(PKTINFO, sk); + goto copyval; + case IP_RECVTTL: + val = inet_test_bit(TTL, sk); + goto copyval; + case IP_RECVTOS: + val = inet_test_bit(TOS, sk); + goto copyval; + case IP_RECVOPTS: + val = inet_test_bit(RECVOPTS, sk); + goto copyval; + case IP_RETOPTS: + val = inet_test_bit(RETOPTS, sk); + goto copyval; + case IP_PASSSEC: + val = inet_test_bit(PASSSEC, sk); + goto copyval; + case IP_RECVORIGDSTADDR: + val = inet_test_bit(ORIGDSTADDR, sk); + goto copyval; + case IP_CHECKSUM: + val = inet_test_bit(CHECKSUM, sk); + goto copyval; + case IP_RECVFRAGSIZE: + val = inet_test_bit(RECVFRAGSIZE, sk); + goto copyval; + case IP_RECVERR: + val = inet_test_bit(RECVERR, sk); + goto copyval; + case IP_RECVERR_RFC4884: + val = inet_test_bit(RECVERR_RFC4884, sk); + goto copyval; + case IP_FREEBIND: + val = inet_test_bit(FREEBIND, sk); + goto copyval; + case IP_HDRINCL: + val = inet_test_bit(HDRINCL, sk); + goto copyval; + case IP_MULTICAST_LOOP: + val = inet_test_bit(MC_LOOP, sk); + goto copyval; + case IP_MULTICAST_ALL: + val = inet_test_bit(MC_ALL, sk); + goto copyval; + case IP_TRANSPARENT: + val = inet_test_bit(TRANSPARENT, sk); + goto copyval; + case IP_NODEFRAG: + val = inet_test_bit(NODEFRAG, sk); + goto copyval; + case IP_BIND_ADDRESS_NO_PORT: + val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); + goto copyval; + case IP_ROUTER_ALERT: + val = inet_test_bit(RTALERT, sk); + goto copyval; + case IP_TTL: + val = READ_ONCE(inet->uc_ttl); + if (val < 0) + val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl); + goto copyval; + case IP_MINTTL: + val = READ_ONCE(inet->min_ttl); + goto copyval; + case IP_MULTICAST_TTL: + val = READ_ONCE(inet->mc_ttl); + goto copyval; + case IP_MTU_DISCOVER: + val = READ_ONCE(inet->pmtudisc); + goto copyval; + case IP_TOS: + val = READ_ONCE(inet->tos); + goto copyval; case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; struct ip_options *opt = (struct ip_options *)optbuf; struct ip_options_rcu *inet_opt; - inet_opt = rcu_dereference_protected(inet->inet_opt, - lockdep_sock_is_held(sk)); + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); opt->optlen = 0; if (inet_opt) memcpy(optbuf, &inet_opt->opt, sizeof(struct ip_options) + inet_opt->opt.optlen); - release_sock(sk); + rcu_read_unlock(); - if (opt->optlen == 0) - return put_user(0, optlen); + if (opt->optlen == 0) { + len = 0; + return copy_to_sockptr(optlen, &len, sizeof(int)); + } ip_options_undo(opt); len = min_t(unsigned int, len, opt->optlen); - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, opt->__data, len)) + if (copy_to_sockptr(optval, opt->__data, len)) return -EFAULT; return 0; } - case IP_PKTINFO: - val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; - break; - case IP_RECVTTL: - val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; - break; - case IP_RECVTOS: - val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; - break; - case IP_RECVOPTS: - val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; - break; - case IP_RETOPTS: - val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; - break; - case IP_PASSSEC: - val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; - break; - case IP_RECVORIGDSTADDR: - val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; - break; - case IP_CHECKSUM: - val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; - break; - case IP_RECVFRAGSIZE: - val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; - break; - case IP_TOS: - val = inet->tos; - break; - case IP_TTL: - { - struct net *net = sock_net(sk); - val = (inet->uc_ttl == -1 ? - net->ipv4.sysctl_ip_default_ttl : - inet->uc_ttl); - break; - } - case IP_HDRINCL: - val = inet->hdrincl; - break; - case IP_NODEFRAG: - val = inet->nodefrag; - break; - case IP_BIND_ADDRESS_NO_PORT: - val = inet->bind_address_no_port; - break; - case IP_MTU_DISCOVER: - val = inet->pmtudisc; - break; case IP_MTU: { struct dst_entry *dst; @@ -1579,37 +1637,72 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, val = dst_mtu(dst); dst_release(dst); } - if (!val) { - release_sock(sk); + if (!val) return -ENOTCONN; + goto copyval; + } + case IP_PKTOPTIONS: + { + struct msghdr msg; + + if (sk->sk_type != SOCK_STREAM) + return -ENOPROTOOPT; + + if (optval.is_kernel) { + msg.msg_control_is_user = false; + msg.msg_control = optval.kernel; + } else { + msg.msg_control_is_user = true; + msg.msg_control_user = optval.user; } - break; + msg.msg_controllen = len; + msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; + + if (inet_test_bit(PKTINFO, sk)) { + struct in_pktinfo info; + + info.ipi_addr.s_addr = READ_ONCE(inet->inet_rcv_saddr); + info.ipi_spec_dst.s_addr = READ_ONCE(inet->inet_rcv_saddr); + info.ipi_ifindex = READ_ONCE(inet->mc_index); + put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); + } + if (inet_test_bit(TTL, sk)) { + int hlim = READ_ONCE(inet->mc_ttl); + + put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); + } + if (inet_test_bit(TOS, sk)) { + int tos = READ_ONCE(inet->rcv_tos); + put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); + } + len -= msg.msg_controllen; + return copy_to_sockptr(optlen, &len, sizeof(int)); } - case IP_RECVERR: - val = inet->recverr; - break; - case IP_MULTICAST_TTL: - val = inet->mc_ttl; - break; - case IP_MULTICAST_LOOP: - val = inet->mc_loop; - break; case IP_UNICAST_IF: - val = (__force int)htonl((__u32) inet->uc_index); - break; + val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index)); + goto copyval; case IP_MULTICAST_IF: { struct in_addr addr; len = min_t(unsigned int, len, sizeof(struct in_addr)); - addr.s_addr = inet->mc_addr; - release_sock(sk); + addr.s_addr = READ_ONCE(inet->mc_addr); - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &addr, len)) + if (copy_to_sockptr(optval, &addr, len)) return -EFAULT; return 0; } + case IP_LOCAL_PORT_RANGE: + val = READ_ONCE(inet->local_port_range); + goto copyval; + } + + if (needs_rtnl) + rtnl_lock(); + sockopt_lock_sock(sk); + + switch (optname) { case IP_MSFILTER: { struct ip_msfilter msf; @@ -1618,109 +1711,47 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, err = -EINVAL; goto out; } - if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { + if (copy_from_sockptr(&msf, optval, IP_MSFILTER_SIZE(0))) { err = -EFAULT; goto out; } - err = ip_mc_msfget(sk, &msf, - (struct ip_msfilter __user *)optval, optlen); + err = ip_mc_msfget(sk, &msf, optval, optlen); goto out; } case MCAST_MSFILTER: - { - struct group_filter __user *p = (void __user *)optval; - struct group_filter gsf; - const int size0 = offsetof(struct group_filter, gf_slist); - int num; - - if (len < size0) { - err = -EINVAL; - goto out; - } - if (copy_from_user(&gsf, p, size0)) { - err = -EFAULT; - goto out; - } - num = gsf.gf_numsrc; - err = ip_mc_gsfget(sk, &gsf, p->gf_slist); - if (err) - goto out; - if (gsf.gf_numsrc < num) - num = gsf.gf_numsrc; - if (put_user(GROUP_FILTER_SIZE(num), optlen) || - copy_to_user(p, &gsf, size0)) - err = -EFAULT; + if (in_compat_syscall()) + err = compat_ip_get_mcast_msfilter(sk, optval, optlen, + len); + else + err = ip_get_mcast_msfilter(sk, optval, optlen, len); goto out; - } - case IP_MULTICAST_ALL: - val = inet->mc_all; - break; - case IP_PKTOPTIONS: - { - struct msghdr msg; - - release_sock(sk); - - if (sk->sk_type != SOCK_STREAM) - return -ENOPROTOOPT; - - msg.msg_control_is_user = true; - msg.msg_control_user = optval; - msg.msg_controllen = len; - msg.msg_flags = flags; - - if (inet->cmsg_flags & IP_CMSG_PKTINFO) { - struct in_pktinfo info; - - info.ipi_addr.s_addr = inet->inet_rcv_saddr; - info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; - info.ipi_ifindex = inet->mc_index; - put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); - } - if (inet->cmsg_flags & IP_CMSG_TTL) { - int hlim = inet->mc_ttl; - put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); - } - if (inet->cmsg_flags & IP_CMSG_TOS) { - int tos = inet->rcv_tos; - put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); - } - len -= msg.msg_controllen; - return put_user(len, optlen); - } - case IP_FREEBIND: - val = inet->freebind; - break; - case IP_TRANSPARENT: - val = inet->transparent; - break; - case IP_MINTTL: - val = inet->min_ttl; + case IP_PROTOCOL: + val = inet_sk(sk)->inet_num; break; default: - release_sock(sk); + sockopt_release_sock(sk); return -ENOPROTOOPT; } - release_sock(sk); - + sockopt_release_sock(sk); +copyval: if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { unsigned char ucval = (unsigned char)val; len = 1; - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &ucval, 1)) + if (copy_to_sockptr(optval, &ucval, 1)) return -EFAULT; } else { len = min_t(unsigned int, sizeof(int), len); - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; - if (copy_to_user(optval, &val, len)) + if (copy_to_sockptr(optval, &val, len)) return -EFAULT; } return 0; out: - release_sock(sk); + sockopt_release_sock(sk); if (needs_rtnl) rtnl_unlock(); return err; @@ -1731,12 +1762,9 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_GET_INFO && - optname < BPFILTER_IPT_GET_MAX) - err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); -#endif + err = do_ip_getsockopt(sk, level, optname, + USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); + #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1755,79 +1783,3 @@ int ip_getsockopt(struct sock *sk, int level, return err; } EXPORT_SYMBOL(ip_getsockopt); - -#ifdef CONFIG_COMPAT -int compat_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - int err; - - if (optname == MCAST_MSFILTER) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); - struct compat_group_filter __user *p = (void __user *)optval; - struct compat_group_filter gf32; - struct group_filter gf; - int ulen, err; - int num; - - if (level != SOL_IP) - return -EOPNOTSUPP; - - if (get_user(ulen, optlen)) - return -EFAULT; - - if (ulen < size0) - return -EINVAL; - - if (copy_from_user(&gf32, p, size0)) - return -EFAULT; - - gf.gf_interface = gf32.gf_interface; - gf.gf_fmode = gf32.gf_fmode; - num = gf.gf_numsrc = gf32.gf_numsrc; - gf.gf_group = gf32.gf_group; - - rtnl_lock(); - lock_sock(sk); - err = ip_mc_gsfget(sk, &gf, p->gf_slist); - release_sock(sk); - rtnl_unlock(); - if (err) - return err; - if (gf.gf_numsrc < num) - num = gf.gf_numsrc; - ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); - if (put_user(ulen, optlen) || - put_user(gf.gf_fmode, &p->gf_fmode) || - put_user(gf.gf_numsrc, &p->gf_numsrc)) - return -EFAULT; - return 0; - } - - err = do_ip_getsockopt(sk, level, optname, optval, optlen, - MSG_CMSG_COMPAT); - -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_GET_INFO && - optname < BPFILTER_IPT_GET_MAX) - err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); -#endif -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && - !ip_mroute_opt(optname)) { - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); - if (err >= 0) - err = put_user(len, optlen); - return err; - } -#endif - return err; -} -EXPORT_SYMBOL(compat_ip_getsockopt); -#endif |
