diff options
Diffstat (limited to 'drivers/net/tap.c')
| -rw-r--r-- | drivers/net/tap.c | 341 |
1 files changed, 122 insertions, 219 deletions
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index c0b52e48f0e6..1197f245e873 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/etherdevice.h> #include <linux/if_tap.h> #include <linux/if_vlan.h> @@ -17,80 +18,17 @@ #include <linux/fs.h> #include <linux/uio.h> +#include <net/gso.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> +#include <net/xdp.h> #include <linux/virtio_net.h> #include <linux/skb_array.h> -#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) - -#define TAP_VNET_LE 0x80000000 -#define TAP_VNET_BE 0x40000000 - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s = !!(q->flags & TAP_VNET_BE); - - if (put_user(s, sp)) - return -EFAULT; - - return 0; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s; - - if (get_user(s, sp)) - return -EFAULT; - - if (s) - q->flags |= TAP_VNET_BE; - else - q->flags &= ~TAP_VNET_BE; - - return 0; -} -#else -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tap_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_LE || - tap_legacy_is_little_endian(q); -} - -static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) -{ - return __virtio16_to_cpu(tap_is_little_endian(q), val); -} +#include "tun_vnet.h" -static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) -{ - return __cpu_to_virtio16(tap_is_little_endian(q), val); -} +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) static struct proto tap_proto = { .name = "tap", @@ -321,6 +259,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) struct tap_dev *tap; struct tap_queue *q; netdev_features_t features = TAP_FEATURES; + enum skb_drop_reason drop_reason; tap = tap_dev_get_rcu(dev); if (!tap) @@ -340,27 +279,30 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) features |= tap->tap_features; if (netif_needs_gso(skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); + struct sk_buff *next; - if (IS_ERR(segs)) + if (IS_ERR(segs)) { + drop_reason = SKB_DROP_REASON_SKB_GSO_SEG; goto drop; + } if (!segs) { - if (ptr_ring_produce(&q->ring, skb)) + if (ptr_ring_produce(&q->ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } goto wake_up; } consume_skb(skb); - while (segs) { - struct sk_buff *nskb = segs->next; - - segs->next = NULL; - if (ptr_ring_produce(&q->ring, segs)) { - kfree_skb(segs); - kfree_skb_list(nskb); + skb_list_walk_safe(segs, skb, next) { + skb_mark_not_on_list(skb); + if (ptr_ring_produce(&q->ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; + kfree_skb_reason(skb, drop_reason); + kfree_skb_list_reason(next, drop_reason); break; } - segs = nskb; } } else { /* If we receive a partial checksum and the tap side @@ -370,10 +312,14 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) */ if (skb->ip_summed == CHECKSUM_PARTIAL && !(features & NETIF_F_CSUM_MASK) && - skb_checksum_help(skb)) + skb_checksum_help(skb)) { + drop_reason = SKB_DROP_REASON_SKB_CSUM; goto drop; - if (ptr_ring_produce(&q->ring, skb)) + } + if (ptr_ring_produce(&q->ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } } wake_up: @@ -384,7 +330,7 @@ drop: /* Count errors/drops only here, thus don't care about args. */ if (tap->count_rx_dropped) tap->count_rx_dropped(tap); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return RX_HANDLER_CONSUMED; } EXPORT_SYMBOL_GPL(tap_handle_frame); @@ -519,13 +465,12 @@ static int tap_open(struct inode *inode, struct file *file) goto err; } - RCU_INIT_POINTER(q->sock.wq, &q->wq); - init_waitqueue_head(&q->wq.wait); + init_waitqueue_head(&q->sock.wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; - sock_init_data(&q->sock, &q->sk); + sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; @@ -547,6 +492,9 @@ static int tap_open(struct inode *inode, struct file *file) goto err_put; } + /* tap groks IOCB_NOWAIT just fine, mark it as such */ + file->f_mode |= FMODE_NOWAIT; + dev_put(tap->dev); rtnl_unlock(); @@ -578,7 +526,7 @@ static __poll_t tap_poll(struct file *file, poll_table *wait) goto out; mask = 0; - poll_wait(file, &q->wq.wait, wait); + poll_wait(file, &q->sock.wq.wait, wait); if (!ptr_ring_empty(&q->ring)) mask |= EPOLLIN | EPOLLRDNORM; @@ -602,8 +550,10 @@ static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, if (prepad + len < PAGE_SIZE || !linear) linear = len; + if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, - err, 0); + err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return NULL; @@ -630,33 +580,23 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; + int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; size_t linear; + enum skb_drop_reason drop_reason; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - err = -EINVAL; - if (len < vnet_hdr_len) + hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr); + if (hdr_len < 0) { + err = hdr_len; goto err; - len -= vnet_hdr_len; + } - err = -EFAULT; - if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) - goto err; - iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > - tap16_to_cpu(q, vnet_hdr.hdr_len)) - vnet_hdr.hdr_len = cpu_to_tap16(q, - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); - err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) - goto err; + len -= vnet_hdr_len; } err = -EINVAL; @@ -666,12 +606,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i; - copylen = vnet_hdr.hdr_len ? - tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; - else if (copylen < ETH_HLEN) - copylen = ETH_HLEN; + copylen = clamp(hdr_len ?: GOODCOPY_LEN, ETH_HLEN, good_linear); linear = copylen; i = *from; iov_iter_advance(&i, copylen); @@ -681,11 +616,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (!zerocopy) { copylen = len; - linear = tap16_to_cpu(q, vnet_hdr.hdr_len); - if (linear > good_linear) - linear = good_linear; - else if (linear < ETH_HLEN) - linear = ETH_HLEN; + linear = clamp(hdr_len, ETH_HLEN, good_linear); } skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, @@ -698,52 +629,54 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, else err = skb_copy_datagram_from_iter(skb, 0, from, len); - if (err) + if (err) { + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto err_kfree; + } skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; + rcu_read_lock(); + tap = rcu_dereference(q->tap); + if (!tap) { + kfree_skb(skb); + rcu_read_unlock(); + return total_len; + } + skb->dev = tap->dev; + if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, &vnet_hdr, - tap_is_little_endian(q)); - if (err) + err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr); + if (err) { + rcu_read_unlock(); + drop_reason = SKB_DROP_REASON_DEV_HDR; goto err_kfree; + } } - skb_probe_transport_header(skb, ETH_HLEN); + skb_probe_transport_header(skb); /* Move network header to the right position for VLAN tagged packets */ - if ((skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + if (eth_type_vlan(skb->protocol) && + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); - rcu_read_lock(); - tap = rcu_dereference(q->tap); /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { - skb_shinfo(skb)->destructor_arg = msg_control; - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; - skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + skb_zcopy_init(skb, msg_control); } else if (msg_control) { struct ubuf_info *uarg = msg_control; - uarg->callback(uarg, false); + uarg->ops->complete(NULL, uarg, false); } - if (tap) { - skb->dev = tap->dev; - dev_queue_xmit(skb); - } else { - kfree_skb(skb); - } + dev_queue_xmit(skb); rcu_read_unlock(); - return total_len; err_kfree: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); err: rcu_read_lock(); @@ -759,8 +692,12 @@ static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; + int noblock = 0; + + if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) + noblock = 1; - return tap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK); + return tap_get_user(q, NULL, from, noblock); } /* Put packet to the user space buffer */ @@ -774,23 +711,17 @@ static ssize_t tap_put_user(struct tap_queue *q, int total; if (q->flags & IFF_VNET_HDR) { - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; struct virtio_net_hdr vnet_hdr; vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - if (iov_iter_count(iter) < vnet_hdr_len) - return -EINVAL; - - if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - tap_is_little_endian(q), true, - vlan_hlen)) - BUG(); - if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != - sizeof(vnet_hdr)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr); + if (ret) + return ret; - iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr); + if (ret) + return ret; } total = vnet_hdr_len; total += skb->len; @@ -876,8 +807,12 @@ static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; ssize_t len = iov_iter_count(to), ret; + int noblock = 0; - ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK, NULL); + if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) + noblock = 1; + + ret = tap_do_read(q, to, noblock, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; @@ -945,6 +880,10 @@ static int set_offload(struct tap_queue *q, unsigned long arg) if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } + + /* TODO: for now USO4 and USO6 should work simultaneously */ + if ((arg & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) + features |= NETIF_F_GSO_UDP_L4; } /* tun/tap driver inverts the usage for TSO offloads, where @@ -955,7 +894,8 @@ static int set_offload(struct tap_queue *q, unsigned long arg) * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ - if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6)) + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) || + (feature_mask & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; @@ -983,7 +923,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; - struct sockaddr sa; + struct sockaddr_storage ss; int s; int ret; @@ -1040,46 +980,11 @@ static long tap_ioctl(struct file *file, unsigned int cmd, q->sk.sk_sndbuf = s; return 0; - case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - q->vnet_hdr_sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(q->flags & TAP_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - q->flags |= TAP_VNET_LE; - else - q->flags &= ~TAP_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tap_get_vnet_be(q, sp); - - case TUNSETVNETBE: - return tap_set_vnet_be(q, sp); - case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN | TUN_F_UFO)) + TUN_F_TSO_ECN | TUN_F_UFO | + TUN_F_USO4 | TUN_F_USO6)) return -EINVAL; rtnl_lock(); @@ -1095,17 +1000,17 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return -ENOLINK; } ret = 0; - u = tap->dev->type; + netif_get_mac_address((struct sockaddr *)&ss, dev_net(tap->dev), + tap->dev->name); if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || - copy_to_user(&ifr->ifr_hwaddr.sa_data, tap->dev->dev_addr, ETH_ALEN) || - put_user(u, &ifr->ifr_hwaddr.sa_family)) + copy_to_user(&ifr->ifr_hwaddr, &ss, sizeof(ifr->ifr_hwaddr))) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); return ret; case SIOCSIFHWADDR: - if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) + if (copy_from_user(&ss, &ifr->ifr_hwaddr, sizeof(ifr->ifr_hwaddr))) return -EFAULT; rtnl_lock(); tap = tap_get_tap_dev(q); @@ -1113,24 +1018,19 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address(tap->dev, &sa, NULL); + if (tap->dev->addr_len > sizeof(ifr->ifr_hwaddr)) + ret = -EINVAL; + else + ret = dev_set_mac_address_user(tap->dev, &ss, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; default: - return -EINVAL; + return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp); } } -#ifdef CONFIG_COMPAT -static long tap_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - return tap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); -} -#endif - static const struct file_operations tap_fops = { .owner = THIS_MODULE, .open = tap_open, @@ -1138,23 +1038,24 @@ static const struct file_operations tap_fops = { .read_iter = tap_read_iter, .write_iter = tap_write_iter, .poll = tap_poll, - .llseek = no_llseek, .unlocked_ioctl = tap_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = tap_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, }; static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) { - struct tun_xdp_hdr *hdr = xdp->data_hard_start; - struct virtio_net_hdr *gso = &hdr->gso; - int buflen = hdr->buflen; + struct virtio_net_hdr *gso = xdp->data_hard_start; + int buflen = xdp->frame_sz; int vnet_hdr_len = 0; struct tap_dev *tap; struct sk_buff *skb; int err, depth; + if (unlikely(xdp->data_end - xdp->data < ETH_HLEN)) { + err = -EINVAL; + goto err; + } + if (q->flags & IFF_VNET_HDR) vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); @@ -1172,22 +1073,21 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb->protocol = eth_hdr(skb)->h_proto; if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, gso); if (err) goto err_kfree; } /* Move network header to the right position for VLAN tagged packets */ - if ((skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + if (eth_type_vlan(skb->protocol) && + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap) { skb->dev = tap->dev; - skb_probe_transport_header(skb, ETH_HLEN); + skb_probe_transport_header(skb); dev_queue_xmit(skb); } else { kfree_skb(skb); @@ -1200,7 +1100,7 @@ err_kfree: kfree_skb(skb); err: rcu_read_lock(); - tap = rcu_dereference(q->tap); + tap = rcu_dereference(q->tap); if (tap && tap->count_tx_dropped) tap->count_tx_dropped(tap); rcu_read_unlock(); @@ -1215,7 +1115,8 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, struct xdp_buff *xdp; int i; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); @@ -1303,9 +1204,9 @@ int tap_queue_resize(struct tap_dev *tap) list_for_each_entry(q, &tap->queue_list, next) rings[i++] = &q->ring; - ret = ptr_ring_resize_multiple(rings, n, - dev->tx_queue_len, GFP_KERNEL, - __skb_array_destroy_skb); + ret = ptr_ring_resize_multiple_bh(rings, n, + dev->tx_queue_len, GFP_KERNEL, + __skb_array_destroy_skb); kfree(rings); return ret; @@ -1377,6 +1278,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) } EXPORT_SYMBOL_GPL(tap_destroy_cdev); +MODULE_DESCRIPTION("Common library for drivers implementing the TAP interface"); MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); |
