diff options
Diffstat (limited to 'net/socket.c')
| -rw-r--r-- | net/socket.c | 1788 |
1 files changed, 892 insertions, 896 deletions
diff --git a/net/socket.c b/net/socket.c index 976426d03f09..136b98c54fb3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -52,9 +52,12 @@ * Based upon Swansea University Computer Society NET3.039 */ +#include <linux/bpf-cgroup.h> +#include <linux/ethtool.h> #include <linux/mm.h> #include <linux/socket.h> #include <linux/file.h> +#include <linux/splice.h> #include <linux/net.h> #include <linux/interrupt.h> #include <linux/thread_info.h> @@ -64,7 +67,6 @@ #include <linux/seq_file.h> #include <linux/mutex.h> #include <linux/if_bridge.h> -#include <linux/if_frad.h> #include <linux/if_vlan.h> #include <linux/ptp_classify.h> #include <linux/init.h> @@ -86,6 +88,7 @@ #include <linux/xattr.h> #include <linux/nospec.h> #include <linux/indirect_call_wrapper.h> +#include <linux/io_uring/net.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -104,6 +107,10 @@ #include <linux/sockios.h> #include <net/busy_poll.h> #include <linux/errqueue.h> +#include <linux/ptp_clock_kernel.h> +#include <trace/events/sock.h> + +#include "core/dev.h" #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; @@ -123,19 +130,19 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); -static ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +static void sock_splice_eof(struct file *file); #ifdef CONFIG_PROC_FS static void sock_show_fdinfo(struct seq_file *m, struct file *f) { struct socket *sock = f->private_data; + const struct proto_ops *ops = READ_ONCE(sock->ops); - if (sock->ops->show_fdinfo) - sock->ops->show_fdinfo(m, sock); + if (ops->show_fdinfo) + ops->show_fdinfo(m, sock); } #else #define sock_show_fdinfo NULL @@ -148,7 +155,6 @@ static void sock_show_fdinfo(struct seq_file *m, struct file *f) static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, .read_iter = sock_read_iter, .write_iter = sock_write_iter, .poll = sock_poll, @@ -156,15 +162,65 @@ static const struct file_operations socket_file_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_sock_ioctl, #endif + .uring_cmd = io_uring_cmd_sock, .mmap = sock_mmap, .release = sock_close, .fasync = sock_fasync, - .sendpage = sock_sendpage, - .splice_write = generic_splice_sendpage, + .splice_write = splice_to_socket, .splice_read = sock_splice_read, + .splice_eof = sock_splice_eof, .show_fdinfo = sock_show_fdinfo, }; +static const char * const pf_family_names[] = { + [PF_UNSPEC] = "PF_UNSPEC", + [PF_UNIX] = "PF_UNIX/PF_LOCAL", + [PF_INET] = "PF_INET", + [PF_AX25] = "PF_AX25", + [PF_IPX] = "PF_IPX", + [PF_APPLETALK] = "PF_APPLETALK", + [PF_NETROM] = "PF_NETROM", + [PF_BRIDGE] = "PF_BRIDGE", + [PF_ATMPVC] = "PF_ATMPVC", + [PF_X25] = "PF_X25", + [PF_INET6] = "PF_INET6", + [PF_ROSE] = "PF_ROSE", + [PF_DECnet] = "PF_DECnet", + [PF_NETBEUI] = "PF_NETBEUI", + [PF_SECURITY] = "PF_SECURITY", + [PF_KEY] = "PF_KEY", + [PF_NETLINK] = "PF_NETLINK/PF_ROUTE", + [PF_PACKET] = "PF_PACKET", + [PF_ASH] = "PF_ASH", + [PF_ECONET] = "PF_ECONET", + [PF_ATMSVC] = "PF_ATMSVC", + [PF_RDS] = "PF_RDS", + [PF_SNA] = "PF_SNA", + [PF_IRDA] = "PF_IRDA", + [PF_PPPOX] = "PF_PPPOX", + [PF_WANPIPE] = "PF_WANPIPE", + [PF_LLC] = "PF_LLC", + [PF_IB] = "PF_IB", + [PF_MPLS] = "PF_MPLS", + [PF_CAN] = "PF_CAN", + [PF_TIPC] = "PF_TIPC", + [PF_BLUETOOTH] = "PF_BLUETOOTH", + [PF_IUCV] = "PF_IUCV", + [PF_RXRPC] = "PF_RXRPC", + [PF_ISDN] = "PF_ISDN", + [PF_PHONET] = "PF_PHONET", + [PF_IEEE802154] = "PF_IEEE802154", + [PF_CAIF] = "PF_CAIF", + [PF_ALG] = "PF_ALG", + [PF_NFC] = "PF_NFC", + [PF_VSOCK] = "PF_VSOCK", + [PF_KCM] = "PF_KCM", + [PF_QIPCRTR] = "PF_QIPCRTR", + [PF_SMC] = "PF_SMC", + [PF_XDP] = "PF_XDP", + [PF_MCTP] = "PF_MCTP", +}; + /* * The protocol list. Each protocol is registered in here. */ @@ -220,28 +276,41 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *k static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, void __user *uaddr, int __user *ulen) { - int err; int len; BUG_ON(klen > sizeof(struct sockaddr_storage)); - err = get_user(len, ulen); - if (err) - return err; + + if (can_do_masked_user_access()) + ulen = masked_user_access_begin(ulen); + else if (!user_access_begin(ulen, 4)) + return -EFAULT; + + unsafe_get_user(len, ulen, efault_end); + if (len > klen) len = klen; - if (len < 0) - return -EINVAL; + /* + * "fromlen shall refer to the value before truncation.." + * 1003.1g + */ + if (len >= 0) + unsafe_put_user(klen, ulen, efault_end); + + user_access_end(); + if (len) { + if (len < 0) + return -EINVAL; if (audit_sockaddr(klen, kaddr)) return -ENOMEM; if (copy_to_user(uaddr, kaddr, len)) return -EFAULT; } - /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g - */ - return __put_user(klen, ulen); + return 0; + +efault_end: + user_access_end(); + return -EFAULT; } static struct kmem_cache *sock_inode_cachep __ro_after_init; @@ -250,7 +319,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); + ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wq.wait); @@ -288,7 +357,7 @@ static void init_inodecache(void) 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD | SLAB_ACCOUNT), + SLAB_ACCOUNT), init_once); BUG_ON(sock_inode_cachep == NULL); } @@ -304,7 +373,7 @@ static const struct super_operations sockfs_ops = { */ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) { - return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", + return dynamic_dname(buffer, buflen, "socket:[%lu]", d_inode(dentry)->i_ino); } @@ -334,6 +403,7 @@ static const struct xattr_handler sockfs_xattr_handler = { }; static int sockfs_security_xattr_set(const struct xattr_handler *handler, + struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *suffix, const void *value, size_t size, int flags) @@ -347,7 +417,7 @@ static const struct xattr_handler sockfs_security_xattr_handler = { .set = sockfs_security_xattr_set, }; -static const struct xattr_handler *sockfs_xattr_handlers[] = { +static const struct xattr_handler * const sockfs_xattr_handlers[] = { &sockfs_xattr_handler, &sockfs_security_xattr_handler, NULL @@ -397,7 +467,9 @@ static struct file_system_type sock_fs_type = { * * Returns the &file bound with @sock, implicitly storing it * in sock->file. If dname is %NULL, sets to "". - * On failure the return is a ERR pointer (see linux/err.h). + * + * On failure @sock is released, and an ERR pointer is returned. + * * This function uses GFP_KERNEL internally. */ @@ -416,9 +488,15 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) return file; } + file->f_mode |= FMODE_NOWAIT; sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return file; } EXPORT_SYMBOL(sock_alloc_file); @@ -445,17 +523,15 @@ static int sock_map_fd(struct socket *sock, int flags) /** * sock_from_file - Return the &socket bounded to @file. * @file: file - * @err: pointer to an error code return * - * On failure returns %NULL and assigns -ENOTSOCK to @err. + * On failure returns %NULL. */ -struct socket *sock_from_file(struct file *file, int *err) +struct socket *sock_from_file(struct file *file) { - if (file->f_op == &socket_file_ops) - return file->private_data; /* set in sock_map_fd */ + if (likely(file->f_op == &socket_file_ops)) + return file->private_data; /* set in sock_alloc_file */ - *err = -ENOTSOCK; return NULL; } EXPORT_SYMBOL(sock_from_file); @@ -484,30 +560,15 @@ struct socket *sockfd_lookup(int fd, int *err) return NULL; } - sock = sock_from_file(file, err); - if (!sock) + sock = sock_from_file(file); + if (!sock) { + *err = -ENOTSOCK; fput(file); + } return sock; } EXPORT_SYMBOL(sockfd_lookup); -static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) -{ - struct fd f = fdget(fd); - struct socket *sock; - - *err = -EBADF; - if (f.file) { - sock = sock_from_file(f.file, err); - if (likely(sock)) { - *fput_needed = f.flags; - return sock; - } - fdput(f); - } - return NULL; -} - static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, size_t size) { @@ -536,17 +597,20 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } -static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +static int sockfs_setattr(struct mnt_idmap *idmap, + struct dentry *dentry, struct iattr *iattr) { - int err = simple_setattr(dentry, iattr); + int err = simple_setattr(&nop_mnt_idmap, dentry, iattr); if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(d_inode(dentry)); - if (sock->sk) - sock->sk->sk_uid = iattr->ia_uid; - else + if (sock->sk) { + /* Paired with READ_ONCE() in sk_uid() */ + WRITE_ONCE(sock->sk->sk_uid, iattr->ia_uid); + } else { err = -ENOENT; + } } return err; @@ -586,23 +650,16 @@ struct socket *sock_alloc(void) } EXPORT_SYMBOL(sock_alloc); -/** - * sock_release - close a socket - * @sock: socket to close - * - * The socket is released from the protocol stack if it has a release - * callback, and the inode is then released if the socket is bound to - * an inode not a file. - */ - static void __sock_release(struct socket *sock, struct inode *inode) { - if (sock->ops) { - struct module *owner = sock->ops->owner; + const struct proto_ops *ops = READ_ONCE(sock->ops); + + if (ops) { + struct module *owner = ops->owner; if (inode) inode_lock(inode); - sock->ops->release(sock); + ops->release(sock); sock->sk = NULL; if (inode) inode_unlock(inode); @@ -620,18 +677,26 @@ static void __sock_release(struct socket *sock, struct inode *inode) sock->file = NULL; } +/** + * sock_release - close a socket + * @sock: socket to close + * + * The socket is released from the protocol stack if it has a release + * callback, and the inode is then released if the socket is bound to + * an inode not a file. + */ void sock_release(struct socket *sock) { __sock_release(sock, NULL); } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) +void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) - flags |= SKBTX_HW_TSTAMP; + flags |= SKBTX_HW_TSTAMP_NOBPF; if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; @@ -639,6 +704,9 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; + if (tsflags & SOF_TIMESTAMPING_TX_COMPLETION) + flags |= SKBTX_COMPLETION_TSTAMP; + *tx_flags = flags; } EXPORT_SYMBOL(__sock_tx_timestamp); @@ -647,15 +715,33 @@ INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, size_t)); INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *, size_t)); + +static noinline void call_trace_sock_send_length(struct sock *sk, int ret, + int flags) +{ + trace_sock_send_length(sk, ret, 0); +} + static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg, + int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg, inet_sendmsg, sock, msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); + + if (trace_sock_send_length_enabled()) + call_trace_sock_send_length(sock->sk, ret, 0); return ret; } +static int __sock_sendmsg(struct socket *sock, struct msghdr *msg) +{ + int err = security_socket_sendmsg(sock, msg, + msg_data_left(msg)); + + return err ?: sock_sendmsg_nosec(sock, msg); +} + /** * sock_sendmsg - send a message through @sock * @sock: socket @@ -666,10 +752,21 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) */ int sock_sendmsg(struct socket *sock, struct msghdr *msg) { - int err = security_socket_sendmsg(sock, msg, - msg_data_left(msg)); + struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name; + struct sockaddr_storage address; + int save_len = msg->msg_namelen; + int ret; - return err ?: sock_sendmsg_nosec(sock, msg); + if (msg->msg_name) { + memcpy(&address, msg->msg_name, msg->msg_namelen); + msg->msg_name = &address; + } + + ret = __sock_sendmsg(sock, msg); + msg->msg_name = save_addr; + msg->msg_namelen = save_len; + + return ret; } EXPORT_SYMBOL(sock_sendmsg); @@ -688,38 +785,11 @@ EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { - iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); + iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size); return sock_sendmsg(sock, msg); } EXPORT_SYMBOL(kernel_sendmsg); -/** - * kernel_sendmsg_locked - send a message through @sock (kernel-space) - * @sk: sock - * @msg: message header - * @vec: output s/g array - * @num: output s/g array length - * @size: total message data size - * - * Builds the message data with @vec and sends it through @sock. - * Returns the number of bytes sent, or an error code. - * Caller must hold @sk. - */ - -int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, - struct kvec *vec, size_t num, size_t size) -{ - struct socket *sock = sk->sk_socket; - - if (!sock->ops->sendmsg_locked) - return sock_no_sendmsg_locked(sk, msg, size); - - iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); - - return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); -} -EXPORT_SYMBOL(kernel_sendmsg_locked); - static bool skb_is_err_queue(const struct sk_buff *skb) { /* pkt_type of skbs enqueued on the error queue are set to @@ -743,7 +813,28 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); } -static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) +static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index) +{ + bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC; + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + struct net_device *orig_dev; + ktime_t hwtstamp; + + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) { + *if_index = orig_dev->ifindex; + hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles); + } else { + hwtstamp = shhwtstamps->hwtstamp; + } + rcu_read_unlock(); + + return hwtstamp; +} + +static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb, + int if_index) { struct scm_ts_pktinfo ts_pktinfo; struct net_device *orig_dev; @@ -753,17 +844,66 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); - rcu_read_lock(); - orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); - if (orig_dev) - ts_pktinfo.if_index = orig_dev->ifindex; - rcu_read_unlock(); + if (!if_index) { + rcu_read_lock(); + orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); + if (orig_dev) + if_index = orig_dev->ifindex; + rcu_read_unlock(); + } + ts_pktinfo.if_index = if_index; ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, sizeof(ts_pktinfo), &ts_pktinfo); } +bool skb_has_tx_timestamp(struct sk_buff *skb, const struct sock *sk) +{ + const struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + u32 tsflags = READ_ONCE(sk->sk_tsflags); + + if (serr->ee.ee_errno != ENOMSG || + serr->ee.ee_origin != SO_EE_ORIGIN_TIMESTAMPING) + return false; + + /* software time stamp available and wanted */ + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && skb->tstamp) + return true; + /* hardware time stamps available and wanted */ + return (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && + skb_hwtstamps(skb)->hwtstamp; +} + +int skb_get_tx_timestamp(struct sk_buff *skb, struct sock *sk, + struct timespec64 *ts) +{ + u32 tsflags = READ_ONCE(sk->sk_tsflags); + ktime_t hwtstamp; + int if_index = 0; + + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec64_cond(skb->tstamp, ts)) + return SOF_TIMESTAMPING_TX_SOFTWARE; + + if (!(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) || + skb_is_swtx_tstamp(skb, false)) + return -ENOENT; + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV) + hwtstamp = get_timestamp(sk, skb, &if_index); + else + hwtstamp = skb_hwtstamps(skb)->hwtstamp; + + if (tsflags & SOF_TIMESTAMPING_BIND_PHC) + hwtstamp = ptp_convert_timestamp(&hwtstamp, + READ_ONCE(sk->sk_bind_phc)); + if (!ktime_to_timespec64_cond(hwtstamp, ts)) + return -ENOENT; + + return SOF_TIMESTAMPING_TX_HARDWARE; +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -773,10 +913,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); struct scm_timestamping_internal tss; - int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + int if_index; + ktime_t hwtstamp; + u32 tsflags; /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ @@ -818,17 +960,36 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, } memset(&tss, 0, sizeof(tss)); - if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && + tsflags = READ_ONCE(sk->sk_tsflags); + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE && + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || + skb_is_err_queue(skb) || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) && ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && - (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && - !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { - empty = 0; - if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && - !skb_is_err_queue(skb)) - put_ts_pktinfo(msg, skb); + (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && + (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || + skb_is_err_queue(skb) || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) && + !skb_is_swtx_tstamp(skb, false_tstamp)) { + if_index = 0; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV) + hwtstamp = get_timestamp(sk, skb, &if_index); + else + hwtstamp = shhwtstamps->hwtstamp; + + if (tsflags & SOF_TIMESTAMPING_BIND_PHC) + hwtstamp = ptp_convert_timestamp(&hwtstamp, + READ_ONCE(sk->sk_bind_phc)); + + if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) { + empty = 0; + + if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && + !skb_is_err_queue(skb)) + put_ts_pktinfo(msg, skb, if_index); + } } if (!empty) { if (sock_flag(sk, SOCK_TSTAMP_NEW)) @@ -844,6 +1005,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); +#ifdef CONFIG_WIRELESS void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { @@ -859,6 +1021,7 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack); } EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); +#endif static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -868,24 +1031,57 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static void sock_recv_mark(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVMARK) && skb) { + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ + __u32 mark = skb->mark; + + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark); + } +} + +static void sock_recv_priority(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVPRIORITY) && skb) { + __u32 priority = skb->priority; + + put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, sizeof(__u32), &priority); + } +} + +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); + sock_recv_mark(msg, sk, skb); + sock_recv_priority(msg, sk, skb); } -EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, size_t, int)); INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *, size_t, int)); + +static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags) +{ + trace_sock_recv_length(sk, ret, flags); +} + static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { - return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg, - inet_recvmsg, sock, msg, msg_data_left(msg), - flags); + int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg, + inet6_recvmsg, + inet_recvmsg, sock, msg, + msg_data_left(msg), flags); + if (trace_sock_recv_length_enabled()) + call_trace_sock_recv_length(sock->sk, ret, flags); + return ret; } /** @@ -925,36 +1121,33 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { msg->msg_control_is_user = false; - iov_iter_kvec(&msg->msg_iter, READ, vec, num, size); + iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size); return sock_recvmsg(sock, msg, flags); } EXPORT_SYMBOL(kernel_recvmsg); -static ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more) +static ssize_t sock_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { - struct socket *sock; - int flags; - - sock = file->private_data; + struct socket *sock = file->private_data; + const struct proto_ops *ops; - flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ - flags |= more; + ops = READ_ONCE(sock->ops); + if (unlikely(!ops->splice_read)) + return copy_splice_read(file, ppos, pipe, len, flags); - return kernel_sendpage(sock, page, offset, size, flags); + return ops->splice_read(sock, ppos, pipe, len, flags); } -static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +static void sock_splice_eof(struct file *file) { struct socket *sock = file->private_data; + const struct proto_ops *ops; - if (unlikely(!sock->ops->splice_read)) - return generic_file_splice_read(file, ppos, pipe, len, flags); - - return sock->ops->splice_read(sock, ppos, pipe, len, flags); + ops = READ_ONCE(sock->ops); + if (ops->splice_eof) + ops->splice_eof(sock); } static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -996,7 +1189,10 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = sock_sendmsg(sock, &msg); + if (iocb->ki_flags & IOCB_NOSIGNAL) + msg.msg_flags |= MSG_NOSIGNAL; + + res = __sock_sendmsg(sock, &msg); *from = msg.msg_iter; return res; } @@ -1007,9 +1203,11 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); +static int (*br_ioctl_hook)(struct net *net, unsigned int cmd, + void __user *uarg); -void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) +void brioctl_set(int (*hook)(struct net *net, unsigned int cmd, + void __user *uarg)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -1017,6 +1215,21 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) } EXPORT_SYMBOL(brioctl_set); +int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg) +{ + int err = -ENOPKG; + + if (!br_ioctl_hook) + request_module("bridge"); + + mutex_lock(&br_ioctl_mutex); + if (br_ioctl_hook) + err = br_ioctl_hook(net, cmd, uarg); + mutex_unlock(&br_ioctl_mutex); + + return err; +} + static DEFINE_MUTEX(vlan_ioctl_mutex); static int (*vlan_ioctl_hook) (struct net *, void __user *arg); @@ -1028,24 +1241,17 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) } EXPORT_SYMBOL(vlan_ioctl_set); -static DEFINE_MUTEX(dlci_ioctl_mutex); -static int (*dlci_ioctl_hook) (unsigned int, void __user *); - -void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) -{ - mutex_lock(&dlci_ioctl_mutex); - dlci_ioctl_hook = hook; - mutex_unlock(&dlci_ioctl_mutex); -} -EXPORT_SYMBOL(dlci_ioctl_set); - static long sock_do_ioctl(struct net *net, struct socket *sock, unsigned int cmd, unsigned long arg) { + const struct proto_ops *ops = READ_ONCE(sock->ops); + struct ifreq ifr; + bool need_copyout; int err; void __user *argp = (void __user *)arg; + void __user *data; - err = sock->ops->ioctl(sock, cmd, arg); + err = ops->ioctl(sock, cmd, arg); /* * If this ioctl is unknown try to hand it down @@ -1054,25 +1260,16 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, if (err != -ENOIOCTLCMD) return err; - if (cmd == SIOCGIFCONF) { - struct ifconf ifc; - if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) - return -EFAULT; - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); - rtnl_unlock(); - if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) - err = -EFAULT; - } else { - struct ifreq ifr; - bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + if (!is_socket_ioctl_cmd(cmd)) + return -ENOTTY; + + if (get_user_ifreq(&ifr, &data, argp)) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, data, &need_copyout); + if (!err && need_copyout) + if (put_user_ifreq(&ifr, argp)) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); - if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) - return -EFAULT; - } + return err; } @@ -1081,21 +1278,9 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ -/** - * get_net_ns - increment the refcount of the network namespace - * @ns: common namespace (net) - * - * Returns the net's common namespace. - */ - -struct ns_common *get_net_ns(struct ns_common *ns) -{ - return &get_net(container_of(ns, struct net, ns))->ns; -} -EXPORT_SYMBOL_GPL(get_net_ns); - static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { + const struct proto_ops *ops; struct socket *sock; struct sock *sk; void __user *argp = (void __user *)arg; @@ -1103,16 +1288,18 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct net *net; sock = file->private_data; + ops = READ_ONCE(sock->ops); sk = sock->sk; net = sock_net(sk); if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { struct ifreq ifr; + void __user *data; bool need_copyout; - if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + if (get_user_ifreq(&ifr, &data, argp)) return -EFAULT; - err = dev_ioctl(net, cmd, &ifr, &need_copyout); + err = dev_ioctl(net, cmd, &ifr, data, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + if (put_user_ifreq(&ifr, argp)) return -EFAULT; } else #ifdef CONFIG_WEXT_CORE @@ -1137,14 +1324,9 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) case SIOCSIFBR: case SIOCBRADDBR: case SIOCBRDELBR: - err = -ENOPKG; - if (!br_ioctl_hook) - request_module("bridge"); - - mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) - err = br_ioctl_hook(net, cmd, argp); - mutex_unlock(&br_ioctl_mutex); + case SIOCBRADDIF: + case SIOCBRDELIF: + err = br_ioctl_call(net, cmd, argp); break; case SIOCGIFVLAN: case SIOCSIFVLAN: @@ -1157,17 +1339,6 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; - case SIOCADDDLCI: - case SIOCDELDLCI: - err = -ENOPKG; - if (!dlci_ioctl_hook) - request_module("dlci"); - - mutex_lock(&dlci_ioctl_mutex); - if (dlci_ioctl_hook) - err = dlci_ioctl_hook(cmd, argp); - mutex_unlock(&dlci_ioctl_mutex); - break; case SIOCGSKNS: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -1177,24 +1348,29 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: - if (!sock->ops->gettstamp) { + if (!ops->gettstamp) { err = -ENOIOCTLCMD; break; } - err = sock->ops->gettstamp(sock, argp, - cmd == SIOCGSTAMP_OLD, - !IS_ENABLED(CONFIG_64BIT)); + err = ops->gettstamp(sock, argp, + cmd == SIOCGSTAMP_OLD, + !IS_ENABLED(CONFIG_64BIT)); break; case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: - if (!sock->ops->gettstamp) { + if (!ops->gettstamp) { err = -ENOIOCTLCMD; break; } - err = sock->ops->gettstamp(sock, argp, - cmd == SIOCGSTAMP_NEW, - false); + err = ops->gettstamp(sock, argp, + cmd == SIOCGSTAMP_NEW, + false); break; + + case SIOCGIFCONF: + err = dev_ifconf(net, argp); + break; + default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -1249,9 +1425,10 @@ EXPORT_SYMBOL(sock_create_lite); static __poll_t sock_poll(struct file *file, poll_table *wait) { struct socket *sock = file->private_data; + const struct proto_ops *ops = READ_ONCE(sock->ops); __poll_t events = poll_requested_events(wait), flag = 0; - if (!sock->ops->poll) + if (!ops->poll) return 0; if (sk_can_busy_loop(sock->sk)) { @@ -1263,14 +1440,14 @@ static __poll_t sock_poll(struct file *file, poll_table *wait) flag = POLL_BUSY_LOOP; } - return sock->ops->poll(file, sock, wait) | flag; + return ops->poll(file, sock, wait) | flag; } static int sock_mmap(struct file *file, struct vm_area_struct *vma) { struct socket *sock = file->private_data; - return sock->ops->mmap(file, sock, vma); + return READ_ONCE(sock->ops)->mmap(file, sock, vma); } static int sock_close(struct inode *inode, struct file *filp) @@ -1326,7 +1503,7 @@ int sock_wake_async(struct socket_wq *wq, int how, int band) case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; - /* fall through */ + fallthrough; case SOCK_WAKE_IO: call_kill: kill_fasync(&wq->fasync_list, SIGIO, band); @@ -1426,8 +1603,15 @@ int __sock_create(struct net *net, int family, int type, int protocol, rcu_read_unlock(); err = pf->create(net, sock, protocol, kern); - if (err < 0) + if (err < 0) { + /* ->create should release the allocated sock->sk object on error + * and make sure sock->sk is set to NULL to avoid use-after-free + */ + DEBUG_NET_WARN_ONCE(sock->sk, + "%ps must clear sock->sk on failure, family: %d, type: %d, protocol: %d\n", + pf->create, family, type, protocol); goto out_module_put; + } /* * Now to bump the refcnt of the [loadable] module that owns this @@ -1498,11 +1682,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct } EXPORT_SYMBOL(sock_create_kern); -int __sys_socket(int family, int type, int protocol) +static struct socket *__sys_socket_create(int family, int type, int protocol) { - int retval; struct socket *sock; - int flags; + int retval; /* Check the SOCK_* constants for consistency. */ BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); @@ -1510,17 +1693,65 @@ int __sys_socket(int family, int type, int protocol) BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); - flags = type & ~SOCK_TYPE_MASK; - if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; + if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return ERR_PTR(-EINVAL); type &= SOCK_TYPE_MASK; + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) + return ERR_PTR(retval); + + return sock; +} + +struct file *__sys_socket_file(int family, int type, int protocol) +{ + struct socket *sock; + int flags; + + sock = __sys_socket_create(family, type, protocol); + if (IS_ERR(sock)) + return ERR_CAST(sock); + + flags = type & ~SOCK_TYPE_MASK; if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - retval = sock_create(family, type, protocol, &sock); - if (retval < 0) - return retval; + return sock_alloc_file(sock, flags, NULL); +} + +/* A hook for bpf progs to attach to and update socket protocol. + * + * A static noinline declaration here could cause the compiler to + * optimize away the function. A global noinline declaration will + * keep the definition, but may optimize away the callsite. + * Therefore, __weak is needed to ensure that the call is still + * emitted, by telling the compiler that we don't know what the + * function might eventually be. + */ + +__bpf_hook_start(); + +__weak noinline int update_socket_protocol(int family, int type, int protocol) +{ + return protocol; +} + +__bpf_hook_end(); + +int __sys_socket(int family, int type, int protocol) +{ + struct socket *sock; + int flags; + + sock = __sys_socket_create(family, type, + update_socket_protocol(family, type, protocol)); + if (IS_ERR(sock)) + return PTR_ERR(sock); + + flags = type & ~SOCK_TYPE_MASK; + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); } @@ -1593,7 +1824,7 @@ int __sys_socketpair(int family, int type, int protocol, int __user *usockvec) goto out; } - err = sock1->ops->socketpair(sock1, sock2); + err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2); if (unlikely(err < 0)) { sock_release(sock2); sock_release(sock1); @@ -1632,6 +1863,20 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, return __sys_socketpair(family, type, protocol, usockvec); } +int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen) +{ + int err; + + err = security_socket_bind(sock, (struct sockaddr *)address, + addrlen); + if (!err) + err = READ_ONCE(sock->ops)->bind(sock, + (struct sockaddr_unsized *)address, + addrlen); + return err; +} + /* * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. @@ -1644,23 +1889,20 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) { struct socket *sock; struct sockaddr_storage address; - int err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock) { - err = move_addr_to_kernel(umyaddr, addrlen, &address); - if (!err) { - err = security_socket_bind(sock, - (struct sockaddr *)&address, - addrlen); - if (!err) - err = sock->ops->bind(sock, - (struct sockaddr *) - &address, addrlen); - } - fput_light(sock->file, fput_needed); - } - return err; + CLASS(fd, f)(fd); + int err; + + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; + + err = move_addr_to_kernel(umyaddr, addrlen, &address); + if (unlikely(err)) + return err; + + return __sys_bind_socket(sock, &address, addrlen); } SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) @@ -1673,26 +1915,32 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) * necessary for a listen, and if that works, we mark the socket as * ready for listening. */ +int __sys_listen_socket(struct socket *sock, int backlog) +{ + int somaxconn, err; + + somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn); + if ((unsigned int)backlog > somaxconn) + backlog = somaxconn; + + err = security_socket_listen(sock, backlog); + if (!err) + err = READ_ONCE(sock->ops)->listen(sock, backlog); + return err; +} int __sys_listen(int fd, int backlog) { + CLASS(fd, f)(fd); struct socket *sock; - int err, fput_needed; - int somaxconn; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock) { - somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; - if ((unsigned int)backlog > somaxconn) - backlog = somaxconn; - err = security_socket_listen(sock, backlog); - if (!err) - err = sock->ops->listen(sock, backlog); + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; - fput_light(sock->file, fput_needed); - } - return err; + return __sys_listen_socket(sock, backlog); } SYSCALL_DEFINE2(listen, int, fd, int, backlog) @@ -1700,65 +1948,49 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) return __sys_listen(fd, backlog); } -int __sys_accept4_file(struct file *file, unsigned file_flags, +struct file *do_accept(struct file *file, struct proto_accept_arg *arg, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags, - unsigned long nofile) + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; - int err, len, newfd; + int err, len; struct sockaddr_storage address; + const struct proto_ops *ops; - if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; - - if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) - flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - - sock = sock_from_file(file, &err); + sock = sock_from_file(file); if (!sock) - goto out; + return ERR_PTR(-ENOTSOCK); - err = -ENFILE; newsock = sock_alloc(); if (!newsock) - goto out; + return ERR_PTR(-ENFILE); + ops = READ_ONCE(sock->ops); newsock->type = sock->type; - newsock->ops = sock->ops; + newsock->ops = ops; /* * We don't need try_module_get here, as the listening socket (sock) * has the protocol module (sock->ops->owner) held. */ - __module_get(newsock->ops->owner); + __module_get(ops->owner); - newfd = __get_unused_fd_flags(flags, nofile); - if (unlikely(newfd < 0)) { - err = newfd; - sock_release(newsock); - goto out; - } newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); - if (IS_ERR(newfile)) { - err = PTR_ERR(newfile); - put_unused_fd(newfd); - goto out; - } + if (IS_ERR(newfile)) + return newfile; err = security_socket_accept(sock, newsock); if (err) goto out_fd; - err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags, - false); + arg->flags |= sock->file->f_flags; + err = ops->accept(sock, newsock, arg); if (err < 0) goto out_fd; if (upeer_sockaddr) { - len = newsock->ops->getname(newsock, - (struct sockaddr *)&address, 2); + len = ops->getname(newsock, (struct sockaddr *)&address, 2); if (len < 0) { err = -ECONNABORTED; goto out_fd; @@ -1770,16 +2002,24 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, } /* File flags are not inherited via accept() unlike another OSes. */ - - fd_install(newfd, newfile); - err = newfd; -out: - return err; + return newfile; out_fd: fput(newfile); - put_unused_fd(newfd); - goto out; + return ERR_PTR(err); +} + +static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) +{ + struct proto_accept_arg arg = { }; + if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + + if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) + flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + + return FD_ADD(flags, do_accept(file, &arg, upeer_sockaddr, upeer_addrlen, flags)); } /* @@ -1797,19 +2037,12 @@ out_fd: int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags) { - int ret = -EBADF; - struct fd f; - - f = fdget(fd); - if (f.file) { - ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, - upeer_addrlen, flags, - rlimit(RLIMIT_NOFILE)); - if (f.flags) - fput(f.file); - } + CLASS(fd, f)(fd); - return ret; + if (fd_empty(f)) + return -EBADF; + return __sys_accept4_file(fd_file(f), upeer_sockaddr, + upeer_addrlen, flags); } SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, @@ -1842,38 +2075,37 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address, struct socket *sock; int err; - sock = sock_from_file(file, &err); - if (!sock) + sock = sock_from_file(file); + if (!sock) { + err = -ENOTSOCK; goto out; + } err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); if (err) goto out; - err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, - sock->file->f_flags | file_flags); + err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr_unsized *)address, + addrlen, sock->file->f_flags | file_flags); out: return err; } int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) { - int ret = -EBADF; - struct fd f; + struct sockaddr_storage address; + CLASS(fd, f)(fd); + int ret; - f = fdget(fd); - if (f.file) { - struct sockaddr_storage address; + if (fd_empty(f)) + return -EBADF; - ret = move_addr_to_kernel(uservaddr, addrlen, &address); - if (!ret) - ret = __sys_connect_file(f.file, &address, addrlen, 0); - if (f.flags) - fput(f.file); - } + ret = move_addr_to_kernel(uservaddr, addrlen, &address); + if (ret) + return ret; - return ret; + return __sys_connect_file(fd_file(f), &address, addrlen, 0); } SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, @@ -1882,78 +2114,53 @@ SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, return __sys_connect(fd, uservaddr, addrlen); } -/* - * Get the local address ('name') of a socket object. Move the obtained - * name to user space. - */ - -int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +int do_getsockname(struct socket *sock, int peer, + struct sockaddr __user *usockaddr, int __user *usockaddr_len) { - struct socket *sock; struct sockaddr_storage address; - int err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; + int err; - err = security_socket_getsockname(sock); + if (peer) + err = security_socket_getpeername(sock); + else + err = security_socket_getsockname(sock); if (err) - goto out_put; - - err = sock->ops->getname(sock, (struct sockaddr *)&address, 0); + return err; + err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, peer); if (err < 0) - goto out_put; - /* "err" is actually length in this case */ - err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); - -out_put: - fput_light(sock->file, fput_needed); -out: - return err; -} - -SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, - int __user *, usockaddr_len) -{ - return __sys_getsockname(fd, usockaddr, usockaddr_len); + return err; + /* "err" is actually length in this case */ + return move_addr_to_user(&address, err, usockaddr, usockaddr_len); } /* - * Get the remote address ('name') of a socket object. Move the obtained - * name to user space. + * Get the remote or local address ('name') of a socket object. Move the + * obtained name to user space. */ - -int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len, int peer) { struct socket *sock; - struct sockaddr_storage address; - int err, fput_needed; + CLASS(fd, f)(fd); - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_getpeername(sock); - if (err) { - fput_light(sock->file, fput_needed); - return err; - } + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; + return do_getsockname(sock, peer, usockaddr, usockaddr_len); +} - err = sock->ops->getname(sock, (struct sockaddr *)&address, 1); - if (err >= 0) - /* "err" is actually length in this case */ - err = move_addr_to_user(&address, err, usockaddr, - usockaddr_len); - fput_light(sock->file, fput_needed); - } - return err; +SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) +{ + return __sys_getsockname(fd, usockaddr, usockaddr_len, 0); } SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len) { - return __sys_getpeername(fd, usockaddr, usockaddr_len); + return __sys_getsockname(fd, usockaddr, usockaddr_len, 1); } /* @@ -1968,36 +2175,35 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr_storage address; int err; struct msghdr msg; - struct iovec iov; - int fput_needed; - err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); + err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter); if (unlikely(err)) return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; + + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; msg.msg_name = NULL; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_namelen = 0; + msg.msg_ubuf = NULL; if (addr) { err = move_addr_to_kernel(addr, addr_len, &address); if (err < 0) - goto out_put; + return err; msg.msg_name = (struct sockaddr *)&address; msg.msg_namelen = addr_len; } + flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg); - -out_put: - fput_light(sock->file, fput_needed); -out: - return err; + return __sock_sendmsg(sock, &msg); } SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, @@ -2025,28 +2231,26 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len) { - struct socket *sock; - struct iovec iov; - struct msghdr msg; struct sockaddr_storage address; + struct msghdr msg = { + /* Save some cycles and don't copy the address if not needed */ + .msg_name = addr ? (struct sockaddr *)&address : NULL, + }; + struct socket *sock; int err, err2; - int fput_needed; - err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter); + err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter); if (unlikely(err)) return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - msg.msg_control = NULL; - msg.msg_controllen = 0; - /* Save some cycles and don't copy the address if not needed */ - msg.msg_name = addr ? (struct sockaddr *)&address : NULL; - /* We assume all kernel code knows the size of sockaddr_storage */ - msg.msg_namelen = 0; - msg.msg_iocb = NULL; - msg.msg_flags = 0; + CLASS(fd, f)(fd); + + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); @@ -2057,9 +2261,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, if (err2 < 0) err = err2; } - - fput_light(sock->file, fput_needed); -out: return err; } @@ -2080,62 +2281,71 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } -/* - * Set a socket option. Because we don't know the option lengths we have - * to pass the user mode parameter for the protocols to sort out. - */ +static bool sock_use_custom_sol_socket(const struct socket *sock) +{ + return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); +} -static int __sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) +int do_sock_setsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, int optlen) { - mm_segment_t oldfs = get_fs(); + const struct proto_ops *ops; char *kernel_optval = NULL; - int err, fput_needed; - struct socket *sock; + int err; if (optlen < 0) return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_setsockopt(sock, level, optname); - if (err) - goto out_put; + err = security_socket_setsockopt(sock, level, optname); + if (err) + goto out_put; - err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, - &optname, optval, &optlen, + if (!compat) + err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, + optval, &optlen, &kernel_optval); + if (err < 0) + goto out_put; + if (err > 0) { + err = 0; + goto out_put; + } - if (err < 0) { - goto out_put; - } else if (err > 0) { - err = 0; - goto out_put; - } - - if (kernel_optval) { - set_fs(KERNEL_DS); - optval = (char __user __force *)kernel_optval; - } - - if (level == SOL_SOCKET) - err = - sock_setsockopt(sock, level, optname, optval, + if (kernel_optval) + optval = KERNEL_SOCKPTR(kernel_optval); + ops = READ_ONCE(sock->ops); + if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else if (unlikely(!ops->setsockopt)) + err = -EOPNOTSUPP; + else + err = ops->setsockopt(sock, level, optname, optval, optlen); - else - err = - sock->ops->setsockopt(sock, level, optname, optval, - optlen); - - if (kernel_optval) { - set_fs(oldfs); - kfree(kernel_optval); - } + kfree(kernel_optval); out_put: - fput_light(sock->file, fput_needed); - } return err; } +EXPORT_SYMBOL(do_sock_setsockopt); + +/* Set a socket option. Because we don't know the option lengths we have + * to pass the user mode parameter for the protocols to sort out. + */ +int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, + int optlen) +{ + sockptr_t optval = USER_SOCKPTR(user_optval); + bool compat = in_compat_syscall(); + struct socket *sock; + CLASS(fd, f)(fd); + + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; + + return do_sock_setsockopt(sock, compat, level, optname, optval, optlen); +} SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, char __user *, optval, int, optlen) @@ -2143,43 +2353,65 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return __sys_setsockopt(fd, level, optname, optval, optlen); } -/* - * Get a socket option. Because we don't know the option lengths we have - * to pass a user mode parameter for the protocols to sort out. - */ +INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, + int optname)); -static int __sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +int do_sock_getsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, sockptr_t optlen) { - int err, fput_needed; - struct socket *sock; - int max_optlen; + int max_optlen __maybe_unused = 0; + const struct proto_ops *ops; + int err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_getsockopt(sock, level, optname); - if (err) - goto out_put; + err = security_socket_getsockopt(sock, level, optname); + if (err) + return err; - max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + if (!compat) + copy_from_sockptr(&max_optlen, optlen, sizeof(int)); - if (level == SOL_SOCKET) - err = - sock_getsockopt(sock, level, optname, optval, - optlen); - else - err = - sock->ops->getsockopt(sock, level, optname, optval, - optlen); + ops = READ_ONCE(sock->ops); + if (level == SOL_SOCKET) { + err = sk_getsockopt(sock->sk, level, optname, optval, optlen); + } else if (unlikely(!ops->getsockopt)) { + err = -EOPNOTSUPP; + } else { + if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, + "Invalid argument type")) + return -EOPNOTSUPP; - err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, - optval, optlen, - max_optlen, err); -out_put: - fput_light(sock->file, fput_needed); + err = ops->getsockopt(sock, level, optname, optval.user, + optlen.user); } + + if (!compat) + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, + optval, optlen, max_optlen, + err); + return err; } +EXPORT_SYMBOL(do_sock_getsockopt); + +/* + * Get a socket option. Because we don't know the option lengths we have + * to pass a user mode parameter for the protocols to sort out. + */ +int __sys_getsockopt(int fd, int level, int optname, char __user *optval, + int __user *optlen) +{ + struct socket *sock; + CLASS(fd, f)(fd); + + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; + + return do_sock_getsockopt(sock, in_compat_syscall(), level, optname, + USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); +} SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, char __user *, optval, int __user *, optlen) @@ -2191,19 +2423,29 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, * Shutdown a socket. */ +int __sys_shutdown_sock(struct socket *sock, int how) +{ + int err; + + err = security_socket_shutdown(sock, how); + if (!err) + err = READ_ONCE(sock->ops)->shutdown(sock, how); + + return err; +} + int __sys_shutdown(int fd, int how) { - int err, fput_needed; struct socket *sock; + CLASS(fd, f)(fd); - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_shutdown(sock, how); - if (!err) - err = sock->ops->shutdown(sock, how); - fput_light(sock->file, fput_needed); - } - return err; + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; + + return __sys_shutdown_sock(sock, how); } SYSCALL_DEFINE2(shutdown, int, fd, int, how) @@ -2223,24 +2465,20 @@ struct used_address { unsigned int name_len; }; -int __copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec __user **uiov, size_t *nsegs) +int __copy_msghdr(struct msghdr *kmsg, + struct user_msghdr *msg, + struct sockaddr __user **save_addr) { - struct user_msghdr msg; ssize_t err; - if (copy_from_user(&msg, umsg, sizeof(*umsg))) - return -EFAULT; - kmsg->msg_control_is_user = true; - kmsg->msg_control_user = msg.msg_control; - kmsg->msg_controllen = msg.msg_controllen; - kmsg->msg_flags = msg.msg_flags; + kmsg->msg_get_inq = 0; + kmsg->msg_control_user = msg->msg_control; + kmsg->msg_controllen = msg->msg_controllen; + kmsg->msg_flags = msg->msg_flags; - kmsg->msg_namelen = msg.msg_namelen; - if (!msg.msg_name) + kmsg->msg_namelen = msg->msg_namelen; + if (!msg->msg_name) kmsg->msg_namelen = 0; if (kmsg->msg_namelen < 0) @@ -2250,11 +2488,11 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, kmsg->msg_namelen = sizeof(struct sockaddr_storage); if (save_addr) - *save_addr = msg.msg_name; + *save_addr = msg->msg_name; - if (msg.msg_name && kmsg->msg_namelen) { + if (msg->msg_name && kmsg->msg_namelen) { if (!save_addr) { - err = move_addr_to_kernel(msg.msg_name, + err = move_addr_to_kernel(msg->msg_name, kmsg->msg_namelen, kmsg->msg_name); if (err < 0) @@ -2265,12 +2503,11 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, kmsg->msg_namelen = 0; } - if (msg.msg_iovlen > UIO_MAXIOV) + if (msg->msg_iovlen > UIO_MAXIOV) return -EMSGSIZE; kmsg->msg_iocb = NULL; - *uiov = msg.msg_iov; - *nsegs = msg.msg_iovlen; + kmsg->msg_ubuf = NULL; return 0; } @@ -2282,12 +2519,14 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, struct user_msghdr msg; ssize_t err; - err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov, - &msg.msg_iovlen); + if (copy_from_user(&msg, umsg, sizeof(*umsg))) + return -EFAULT; + + err = __copy_msghdr(kmsg, &msg, save_addr); if (err) return err; - err = import_iovec(save_addr ? READ : WRITE, + err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV, iov, &kmsg->msg_iter); return err < 0 ? err : 0; @@ -2332,6 +2571,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, msg_sys->msg_control = ctl_buf; msg_sys->msg_control_is_user = false; } + flags &= ~MSG_INTERNAL_SENDMSG_FLAGS; msg_sys->msg_flags = flags; if (sock->file->f_flags & O_NONBLOCK) @@ -2349,7 +2589,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys); + err = __sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. @@ -2368,9 +2608,9 @@ out: return err; } -int sendmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct iovec **iov) +static int sendmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct iovec **iov) { int err; @@ -2415,32 +2655,27 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, unsigned int flags) { - /* disallow ancillary data requests from this path */ - if (msg->msg_control || msg->msg_controllen) - return -EINVAL; - return ____sys_sendmsg(sock, msg, flags, NULL, 0); } long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat) { - int fput_needed, err; struct msghdr msg_sys; struct socket *sock; if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; + CLASS(fd, f)(fd); - err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; - fput_light(sock->file, fput_needed); -out: - return err; + return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); } SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) @@ -2455,7 +2690,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat) { - int fput_needed, err, datagrams; + int err, datagrams; struct socket *sock; struct mmsghdr __user *entry; struct compat_mmsghdr __user *compat_entry; @@ -2471,9 +2706,13 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, datagrams = 0; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - return err; + CLASS(fd, f)(fd); + + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; used_address.name_len = UINT_MAX; entry = mmsg; @@ -2510,8 +2749,6 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, cond_resched(); } - fput_light(sock->file, fput_needed); - /* We only return an error if no datagrams were able to be sent */ if (datagrams != 0) return datagrams; @@ -2525,10 +2762,10 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, return __sys_sendmmsg(fd, mmsg, vlen, flags, true); } -int recvmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct sockaddr __user **uaddr, - struct iovec **iov) +static int recvmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct sockaddr __user **uaddr, + struct iovec **iov) { ssize_t err; @@ -2627,32 +2864,27 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags) { - /* disallow ancillary data requests from this path */ - if (msg->msg_control || msg->msg_controllen) - return -EINVAL; - return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0); } long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat) { - int fput_needed, err; struct msghdr msg_sys; struct socket *sock; if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; + CLASS(fd, f)(fd); - err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; - fput_light(sock->file, fput_needed); -out: - return err; + return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); } SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg, @@ -2669,7 +2901,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, struct timespec64 *timeout) { - int fput_needed, err, datagrams; + int err = 0, datagrams; struct socket *sock; struct mmsghdr __user *entry; struct compat_mmsghdr __user *compat_entry; @@ -2684,16 +2916,18 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, datagrams = 0; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - return err; + CLASS(fd, f)(fd); + + if (fd_empty(f)) + return -EBADF; + sock = sock_from_file(fd_file(f)); + if (unlikely(!sock)) + return -ENOTSOCK; if (likely(!(flags & MSG_ERRQUEUE))) { err = sock_error(sock->sk); - if (err) { - datagrams = err; - goto out_put; - } + if (err) + return err; } entry = mmsg; @@ -2750,12 +2984,10 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, } if (err == 0) - goto out_put; + return datagrams; - if (datagrams == 0) { - datagrams = err; - goto out_put; - } + if (datagrams == 0) + return err; /* * We may return less entries than requested (vlen) if the @@ -2768,11 +3000,8 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, * error to return on the next call or if the * app asks about it using getsockopt(SO_ERROR). */ - sock->sk->sk_err = -err; + WRITE_ONCE(sock->sk->sk_err, -err); } -out_put: - fput_light(sock->file, fput_needed); - return datagrams; } @@ -2895,12 +3124,12 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) case SYS_GETSOCKNAME: err = __sys_getsockname(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + (int __user *)a[2], 0); break; case SYS_GETPEERNAME: err = - __sys_getpeername(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); + __sys_getsockname(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 1); break; case SYS_SOCKETPAIR: err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]); @@ -2998,7 +3227,7 @@ int sock_register(const struct net_proto_family *ops) } spin_unlock(&net_family_lock); - pr_info("NET: Registered protocol family %d\n", ops->family); + pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]); return err; } EXPORT_SYMBOL(sock_register); @@ -3026,7 +3255,7 @@ void sock_unregister(int family) synchronize_rcu(); - pr_info("NET: Unregistered protocol family %d\n", family); + pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]); } EXPORT_SYMBOL(sock_unregister); @@ -3058,7 +3287,7 @@ static int __init sock_init(void) err = register_filesystem(&sock_fs_type); if (err) - goto out_fs; + goto out; sock_mnt = kern_mount(&sock_fs_type); if (IS_ERR(sock_mnt)) { err = PTR_ERR(sock_mnt); @@ -3081,7 +3310,6 @@ out: out_mount: unregister_filesystem(&sock_fs_type); -out_fs: goto out; } @@ -3095,154 +3323,55 @@ void socket_seq_show(struct seq_file *seq) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_COMPAT -static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +/* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ +int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg) { - struct compat_ifconf ifc32; - struct ifconf ifc; - int err; + if (in_compat_syscall()) { + struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr; - if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) - return -EFAULT; + memset(ifr, 0, sizeof(*ifr)); + if (copy_from_user(ifr32, arg, sizeof(*ifr32))) + return -EFAULT; - ifc.ifc_len = ifc32.ifc_len; - ifc.ifc_req = compat_ptr(ifc32.ifcbuf); + if (ifrdata) + *ifrdata = compat_ptr(ifr32->ifr_data); - rtnl_lock(); - err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); - rtnl_unlock(); - if (err) - return err; + return 0; + } - ifc32.ifc_len = ifc.ifc_len; - if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) + if (copy_from_user(ifr, arg, sizeof(*ifr))) return -EFAULT; + if (ifrdata) + *ifrdata = ifr->ifr_data; + return 0; } +EXPORT_SYMBOL(get_user_ifreq); -static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) +int put_user_ifreq(struct ifreq *ifr, void __user *arg) { - struct compat_ethtool_rxnfc __user *compat_rxnfc; - bool convert_in = false, convert_out = false; - size_t buf_size = 0; - struct ethtool_rxnfc __user *rxnfc = NULL; - struct ifreq ifr; - u32 rule_cnt = 0, actual_rule_cnt; - u32 ethcmd; - u32 data; - int ret; + size_t size = sizeof(*ifr); - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - compat_rxnfc = compat_ptr(data); + if (in_compat_syscall()) + size = sizeof(struct compat_ifreq); - if (get_user(ethcmd, &compat_rxnfc->cmd)) + if (copy_to_user(arg, ifr, size)) return -EFAULT; - /* Most ethtool structures are defined without padding. - * Unfortunately struct ethtool_rxnfc is an exception. - */ - switch (ethcmd) { - default: - break; - case ETHTOOL_GRXCLSRLALL: - /* Buffer size is variable */ - if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) - return -EFAULT; - if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) - return -ENOMEM; - buf_size += rule_cnt * sizeof(u32); - /* fall through */ - case ETHTOOL_GRXRINGS: - case ETHTOOL_GRXCLSRLCNT: - case ETHTOOL_GRXCLSRULE: - case ETHTOOL_SRXCLSRLINS: - convert_out = true; - /* fall through */ - case ETHTOOL_SRXCLSRLDEL: - buf_size += sizeof(struct ethtool_rxnfc); - convert_in = true; - rxnfc = compat_alloc_user_space(buf_size); - break; - } - - if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; - - if (convert_in) { - /* We expect there to be holes between fs.m_ext and - * fs.ring_cookie and at the end of fs, but nowhere else. - */ - BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + - sizeof(compat_rxnfc->fs.m_ext) != - offsetof(struct ethtool_rxnfc, fs.m_ext) + - sizeof(rxnfc->fs.m_ext)); - BUILD_BUG_ON( - offsetof(struct compat_ethtool_rxnfc, fs.location) - - offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != - offsetof(struct ethtool_rxnfc, fs.location) - - offsetof(struct ethtool_rxnfc, fs.ring_cookie)); - - if (copy_in_user(rxnfc, compat_rxnfc, - (void __user *)(&rxnfc->fs.m_ext + 1) - - (void __user *)rxnfc) || - copy_in_user(&rxnfc->fs.ring_cookie, - &compat_rxnfc->fs.ring_cookie, - (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie)) - return -EFAULT; - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - if (put_user(rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - } else if (copy_in_user(&rxnfc->rule_cnt, - &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - } - - ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); - if (ret) - return ret; - - if (convert_out) { - if (copy_in_user(compat_rxnfc, rxnfc, - (const void __user *)(&rxnfc->fs.m_ext + 1) - - (const void __user *)rxnfc) || - copy_in_user(&compat_rxnfc->fs.ring_cookie, - &rxnfc->fs.ring_cookie, - (const void __user *)(&rxnfc->fs.location + 1) - - (const void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) - return -EFAULT; - - if (ethcmd == ETHTOOL_GRXCLSRLALL) { - /* As an optimisation, we only copy the actual - * number of rules that the underlying - * function returned. Since Mallory might - * change the rule count in user memory, we - * check that it is less than the rule count - * originally given (as the user buffer size), - * which has been range-checked. - */ - if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) - return -EFAULT; - if (actual_rule_cnt < rule_cnt) - rule_cnt = actual_rule_cnt; - if (copy_in_user(&compat_rxnfc->rule_locs[0], - &rxnfc->rule_locs[0], - rule_cnt * sizeof(u32))) - return -EFAULT; - } - } - return 0; } +EXPORT_SYMBOL(put_user_ifreq); +#ifdef CONFIG_COMPAT static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { compat_uptr_t uptr32; @@ -3250,7 +3379,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 void __user *saved; int err; - if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) + if (get_user_ifreq(&ifr, NULL, uifr32)) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) @@ -3259,10 +3388,10 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); - err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL); if (!err) { ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; - if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + if (put_user_ifreq(&ifr, uifr32)) err = -EFAULT; } return err; @@ -3273,112 +3402,15 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { struct ifreq ifreq; - u32 data32; - - if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) - return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_data)) - return -EFAULT; - ifreq.ifr_data = compat_ptr(data32); - - return dev_ioctl(net, cmd, &ifreq, NULL); -} - -static int compat_ifreq_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - /* Handle the fact that while struct ifreq has the same *layout* on - * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, - * which are handled elsewhere, it still has different *size* due to - * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, - * resulting in struct ifreq being 32 and 40 bytes respectively). - * As a result, if the struct happens to be at the end of a page and - * the next page isn't readable/writable, we get a fault. To prevent - * that, copy back and forth to the full size. - */ + void __user *data; - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + if (!is_socket_ioctl_cmd(cmd)) + return -ENOTTY; + if (get_user_ifreq(&ifreq, &data, u_ifreq32)) return -EFAULT; + ifreq.ifr_data = data; - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCGIFNAME: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; -} - -static int compat_sioc_ifmap(struct net *net, unsigned int cmd, - struct compat_ifreq __user *uifr32) -{ - struct ifreq ifr; - struct compat_ifmap __user *uifmap32; - int err; - - uifmap32 = &uifr32->ifr_ifru.ifru_map; - err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= get_user(ifr.ifr_map.port, &uifmap32->port); - if (err) - return -EFAULT; - - err = dev_ioctl(net, cmd, &ifr, NULL); - - if (cmd == SIOCGIFMAP && !err) { - err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= put_user(ifr.ifr_map.port, &uifmap32->port); - if (err) - err = -EFAULT; - } - return err; -} - -/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE - * for some operations; this forces use of the newer bridge-utils that - * use compatible ioctls - */ -static int old_bridge_ioctl(compat_ulong_t __user *argp) -{ - compat_ulong_t tmp; - - if (get_user(tmp, argp)) - return -EFAULT; - if (tmp == BRCTL_GET_VERSION) - return BRCTL_VERSION + 1; - return -EINVAL; + return dev_ioctl(net, cmd, &ifreq, data, NULL); } static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, @@ -3387,30 +3419,23 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; struct net *net = sock_net(sk); + const struct proto_ops *ops; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return compat_ifr_data_ioctl(net, cmd, argp); + return sock_ioctl(file, cmd, (unsigned long)argp); switch (cmd) { - case SIOCSIFBR: - case SIOCGIFBR: - return old_bridge_ioctl(argp); - case SIOCGIFCONF: - return compat_dev_ifconf(net, argp); - case SIOCETHTOOL: - return ethtool_ioctl(net, argp); case SIOCWANDEV: return compat_siocwandev(net, argp); - case SIOCGIFMAP: - case SIOCSIFMAP: - return compat_sioc_ifmap(net, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: - if (!sock->ops->gettstamp) + ops = READ_ONCE(sock->ops); + if (!ops->gettstamp) return -ENOIOCTLCMD; - return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, - !COMPAT_USE_64BIT_TIME); + return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, + !COMPAT_USE_64BIT_TIME); + case SIOCETHTOOL: case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: @@ -3423,17 +3448,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGPGRP: case SIOCBRADDBR: case SIOCBRDELBR: + case SIOCBRADDIF: + case SIOCBRDELIF: case SIOCGIFVLAN: case SIOCSIFVLAN: - case SIOCADDDLCI: - case SIOCDELDLCI: case SIOCGSKNS: case SIOCGSTAMP_NEW: case SIOCGSTAMPNS_NEW: + case SIOCGIFCONF: + case SIOCSIFBR: + case SIOCGIFBR: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: case SIOCSIFFLAGS: + case SIOCGIFMAP: + case SIOCSIFMAP: case SIOCGIFMETRIC: case SIOCSIFMETRIC: case SIOCGIFMTU: @@ -3459,8 +3489,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFPFLAGS: case SIOCGIFTXQLEN: case SIOCSIFTXQLEN: - case SIOCBRADDIF: - case SIOCBRDELIF: case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: @@ -3470,8 +3498,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - return compat_ifreq_ioctl(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: @@ -3488,6 +3514,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct socket *sock = file->private_data; + const struct proto_ops *ops = READ_ONCE(sock->ops); int ret = -ENOIOCTLCMD; struct sock *sk; struct net *net; @@ -3495,8 +3522,8 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, sk = sock->sk; net = sock_net(sk); - if (sock->ops->compat_ioctl) - ret = sock->ops->compat_ioctl(sock, cmd, arg); + if (ops->compat_ioctl) + ret = ops->compat_ioctl(sock, cmd, arg); if (ret == -ENOIOCTLCMD && (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) @@ -3518,9 +3545,14 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, * Returns 0 or an error. */ -int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +int kernel_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen) { - return sock->ops->bind(sock, addr, addrlen); + struct sockaddr_storage address; + + memcpy(&address, addr, addrlen); + + return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr_unsized *)&address, + addrlen); } EXPORT_SYMBOL(kernel_bind); @@ -3534,7 +3566,7 @@ EXPORT_SYMBOL(kernel_bind); int kernel_listen(struct socket *sock, int backlog) { - return sock->ops->listen(sock, backlog); + return READ_ONCE(sock->ops)->listen(sock, backlog); } EXPORT_SYMBOL(kernel_listen); @@ -3552,6 +3584,11 @@ EXPORT_SYMBOL(kernel_listen); int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; + const struct proto_ops *ops = READ_ONCE(sock->ops); + struct proto_accept_arg arg = { + .flags = flags, + .kern = true, + }; int err; err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, @@ -3559,15 +3596,15 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) if (err < 0) goto done; - err = sock->ops->accept(sock, *newsock, flags, true); + err = ops->accept(sock, *newsock, &arg); if (err < 0) { sock_release(*newsock); *newsock = NULL; goto done; } - (*newsock)->ops = sock->ops; - __module_get((*newsock)->ops->owner); + (*newsock)->ops = ops; + __module_get(ops->owner); done: return err; @@ -3581,16 +3618,21 @@ EXPORT_SYMBOL(kernel_accept); * @addrlen: address length * @flags: flags (O_NONBLOCK, ...) * - * For datagram sockets, @addr is the addres to which datagrams are sent + * For datagram sockets, @addr is the address to which datagrams are sent * by default, and the only address from which datagrams are received. * For stream sockets, attempts to connect to @addr. * Returns 0 or an error code. */ -int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, +int kernel_connect(struct socket *sock, struct sockaddr_unsized *addr, int addrlen, int flags) { - return sock->ops->connect(sock, addr, addrlen, flags); + struct sockaddr_storage address; + + memcpy(&address, addr, addrlen); + + return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr_unsized *)&address, + addrlen, flags); } EXPORT_SYMBOL(kernel_connect); @@ -3600,78 +3642,32 @@ EXPORT_SYMBOL(kernel_connect); * @addr: address holder * * Fills the @addr pointer with the address which the socket is bound. - * Returns 0 or an error code. + * Returns the length of the address in bytes or an error code. */ int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { - return sock->ops->getname(sock, addr, 0); + return READ_ONCE(sock->ops)->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); /** - * kernel_peername - get the address which the socket is connected (kernel space) + * kernel_getpeername - get the address which the socket is connected (kernel space) * @sock: socket * @addr: address holder * * Fills the @addr pointer with the address which the socket is connected. - * Returns 0 or an error code. + * Returns the length of the address in bytes or an error code. */ int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { - return sock->ops->getname(sock, addr, 1); + return READ_ONCE(sock->ops)->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); /** - * kernel_sendpage - send a &page through a socket (kernel space) - * @sock: socket - * @page: page - * @offset: page offset - * @size: total size in bytes - * @flags: flags (MSG_DONTWAIT, ...) - * - * Returns the total amount sent in bytes or an error. - */ - -int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) -{ - if (sock->ops->sendpage) - return sock->ops->sendpage(sock, page, offset, size, flags); - - return sock_no_sendpage(sock, page, offset, size, flags); -} -EXPORT_SYMBOL(kernel_sendpage); - -/** - * kernel_sendpage_locked - send a &page through the locked sock (kernel space) - * @sk: sock - * @page: page - * @offset: page offset - * @size: total size in bytes - * @flags: flags (MSG_DONTWAIT, ...) - * - * Returns the total amount sent in bytes or an error. - * Caller must hold @sk. - */ - -int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct socket *sock = sk->sk_socket; - - if (sock->ops->sendpage_locked) - return sock->ops->sendpage_locked(sk, page, offset, size, - flags); - - return sock_no_sendpage_locked(sk, page, offset, size, flags); -} -EXPORT_SYMBOL(kernel_sendpage_locked); - -/** - * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space) * @sock: socket * @how: connection part * @@ -3680,7 +3676,7 @@ EXPORT_SYMBOL(kernel_sendpage_locked); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { - return sock->ops->shutdown(sock, how); + return READ_ONCE(sock->ops)->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); |
