summaryrefslogtreecommitdiff
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c267
1 files changed, 81 insertions, 186 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 92da8c03d960..1207196cbbed 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -71,7 +71,7 @@
#include <linux/bpf_trace.h>
#include <linux/mutex.h>
#include <linux/ieee802154.h>
-#include <linux/if_ltalk.h>
+#include <uapi/linux/if_ltalk.h>
#include <uapi/linux/if_fddi.h>
#include <uapi/linux/if_hippi.h>
#include <uapi/linux/if_fc.h>
@@ -83,6 +83,8 @@
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
+#include "tun_vnet.h"
+
static void tun_default_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd);
@@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev,
* overload it to mean fasync when stored there.
*/
#define TUN_FASYNC IFF_ATTACH_QUEUE
-/* High bits in flags field are unused. */
-#define TUN_VNET_LE 0x80000000
-#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
@@ -298,70 +297,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile)
return tfile->napi_frags_enabled;
}
-#ifdef CONFIG_TUN_VNET_CROSS_LE
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
-{
- return tun->flags & TUN_VNET_BE ? false :
- virtio_legacy_is_little_endian();
-}
-
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- int be = !!(tun->flags & TUN_VNET_BE);
-
- if (put_user(be, argp))
- return -EFAULT;
-
- return 0;
-}
-
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- int be;
-
- if (get_user(be, argp))
- return -EFAULT;
-
- if (be)
- tun->flags |= TUN_VNET_BE;
- else
- tun->flags &= ~TUN_VNET_BE;
-
- return 0;
-}
-#else
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
-{
- return virtio_legacy_is_little_endian();
-}
-
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- return -EINVAL;
-}
-
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_TUN_VNET_CROSS_LE */
-
-static inline bool tun_is_little_endian(struct tun_struct *tun)
-{
- return tun->flags & TUN_VNET_LE ||
- tun_legacy_is_little_endian(tun);
-}
-
-static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
-{
- return __virtio16_to_cpu(tun_is_little_endian(tun), val);
-}
-
-static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
-{
- return __cpu_to_virtio16(tun_is_little_endian(tun), val);
-}
-
static inline u32 tun_hashfn(u32 rxhash)
{
return rxhash & TUN_MASK_FLOW_ENTRIES;
@@ -580,7 +515,7 @@ static inline bool tun_not_capable(struct tun_struct *tun)
struct net *net = dev_net(tun->dev);
return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
- (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
+ (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN);
}
@@ -990,10 +925,11 @@ static int tun_net_init(struct net_device *dev)
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- dev->features = dev->hw_features | NETIF_F_LLTX;
+ dev->features = dev->hw_features;
dev->vlan_features = dev->features &
~(NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
+ dev->lltx = true;
tun->flags = (tun->flags & ~TUN_FEATURES) |
(ifr->ifr_flags & TUN_FEATURES);
@@ -1129,7 +1065,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
goto drop;
}
- /* NETIF_F_LLTX requires to do our own update of trans_start */
+ /* dev->lltx requires to do our own update of trans_start */
queue = netdev_get_tx_queue(dev, txq);
txq_trans_cond_update(queue);
@@ -1359,7 +1295,7 @@ static void tun_flow_init(struct tun_struct *tun)
static void tun_flow_uninit(struct tun_struct *tun)
{
- del_timer_sync(&tun->flow_gc_timer);
+ timer_delete_sync(&tun->flow_gc_timer);
tun_flow_flush(tun);
}
@@ -1480,7 +1416,7 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
skb->truesize += skb->data_len;
for (i = 1; i < it->nr_segs; i++) {
- const struct iovec *iov = iter_iov(it);
+ const struct iovec *iov = iter_iov(it) + i;
size_t fragsz = iov->iov_len;
struct page *page;
void *frag;
@@ -1599,7 +1535,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
struct page_frag *alloc_frag, char *buf,
- int buflen, int len, int pad)
+ int buflen, int len, int pad,
+ int metasize)
{
struct sk_buff *skb = build_skb(buf, buflen);
@@ -1608,6 +1545,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
skb_reserve(skb, pad);
skb_put(skb, len);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
skb_set_owner_w(skb, tfile->socket.sk);
get_page(alloc_frag->page);
@@ -1661,11 +1600,13 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
int len, int *skb_xdp)
{
struct page_frag *alloc_frag = &current->task_frag;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
char *buf;
size_t copied;
int pad = TUN_RX_PAD;
+ int metasize = 0;
int err = 0;
rcu_read_lock();
@@ -1693,20 +1634,21 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
if (hdr->gso_type || !xdp_prog) {
*skb_xdp = 1;
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
- pad);
+ pad, metasize);
}
*skb_xdp = 0;
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
- xdp_prepare_buff(&xdp, buf, pad, len, false);
+ xdp_prepare_buff(&xdp, buf, pad, len, true);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (act == XDP_REDIRECT || act == XDP_TX) {
@@ -1727,13 +1669,21 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
+
+ /* It is known that the xdp_buff was prepared with metadata
+ * support, so the metasize will never be negative.
+ */
+ metasize = xdp.data - xdp.data_meta;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
- return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
+ return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
+ metasize);
out:
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return NULL;
@@ -1751,6 +1701,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
struct virtio_net_hdr gso = { 0 };
int good_linear;
int copylen;
+ int hdr_len = 0;
bool zerocopy = false;
int err;
u32 rxhash = 0;
@@ -1770,26 +1721,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) {
int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (len < vnet_hdr_sz)
- return -EINVAL;
- len -= vnet_hdr_sz;
-
- if (!copy_from_iter_full(&gso, sizeof(gso), from))
- return -EFAULT;
-
- if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
- tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
- gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
+ hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso);
+ if (hdr_len < 0)
+ return hdr_len;
- if (tun16_to_cpu(tun, gso.hdr_len) > len)
- return -EINVAL;
- iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
+ len -= vnet_hdr_sz;
}
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
align += NET_IP_ALIGN;
- if (unlikely(len < ETH_HLEN ||
- (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
+ if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN)))
return -EINVAL;
}
@@ -1802,9 +1743,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
* enough room for skb expand head in case it is used.
* The rest of the buffer is mapped from userspace.
*/
- copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
- if (copylen > good_linear)
- copylen = good_linear;
+ copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen;
iov_iter_advance(&i, copylen);
if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
@@ -1825,10 +1764,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
} else {
if (!zerocopy) {
copylen = len;
- if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
- linear = good_linear;
- else
- linear = tun16_to_cpu(tun, gso.hdr_len);
+ linear = min(hdr_len, good_linear);
}
if (frags) {
@@ -1863,7 +1799,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
}
}
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
+ if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) {
atomic_long_inc(&tun->rx_frame_errors);
err = -EINVAL;
goto free_skb;
@@ -1906,7 +1842,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
skb_zcopy_init(skb, msg_control);
} else if (msg_control) {
struct ubuf_info *uarg = msg_control;
- uarg->callback(NULL, uarg, false);
+ uarg->ops->complete(NULL, uarg, false);
}
skb_reset_network_header(skb);
@@ -2058,18 +1994,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
{
int vnet_hdr_sz = 0;
size_t size = xdp_frame->len;
- size_t ret;
+ ssize_t ret;
if (tun->flags & IFF_VNET_HDR) {
struct virtio_net_hdr gso = { 0 };
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
- return -EINVAL;
- if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
- sizeof(gso)))
- return -EFAULT;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ if (ret)
+ return ret;
}
ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
@@ -2092,6 +2025,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
int vlan_offset = 0;
int vlan_hlen = 0;
int vnet_hdr_sz = 0;
+ int ret;
if (skb_vlan_tag_present(skb))
vlan_hlen = VLAN_HLEN;
@@ -2118,31 +2052,13 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vnet_hdr_sz) {
struct virtio_net_hdr gso;
- if (iov_iter_count(iter) < vnet_hdr_sz)
- return -EINVAL;
-
- if (virtio_net_hdr_from_skb(skb, &gso,
- tun_is_little_endian(tun), true,
- vlan_hlen)) {
- struct skb_shared_info *sinfo = skb_shinfo(skb);
-
- if (net_ratelimit()) {
- netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
- print_hex_dump(KERN_ERR, "tun: ",
- DUMP_PREFIX_NONE,
- 16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
- }
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
-
- if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
- return -EFAULT;
+ ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
+ if (ret)
+ return ret;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ if (ret)
+ return ret;
}
if (vlan_hlen) {
@@ -2447,10 +2363,14 @@ static int tun_xdp_one(struct tun_struct *tun,
struct sk_buff_head *queue;
u32 rxhash = 0, act;
int buflen = hdr->buflen;
+ int metasize = 0;
int ret = 0;
bool skb_xdp = false;
struct page *page;
+ if (unlikely(datasize < ETH_HLEN))
+ return -EINVAL;
+
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
if (gso->gso_type) {
@@ -2459,7 +2379,6 @@ static int tun_xdp_one(struct tun_struct *tun,
}
xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
- xdp_set_data_meta_invalid(xdp);
act = bpf_prog_run_xdp(xdp_prog, xdp);
ret = tun_xdp_act(tun, xdp_prog, xdp, act);
@@ -2499,7 +2418,15 @@ build:
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+ /* The externally provided xdp_buff may have no metadata support, which
+ * is marked by xdp->data_meta being xdp->data + 1. This will lead to a
+ * metasize of -1 and is the reason why the condition checks for > 0.
+ */
+ metasize = xdp->data - xdp->data_meta;
+ if (metasize > 0)
+ skb_metadata_set(skb, metasize);
+
+ if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
atomic_long_inc(&tun->rx_frame_errors);
kfree_skb(skb);
ret = -EINVAL;
@@ -2566,6 +2493,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
ctl && ctl->type == TUN_MSG_PTR) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct tun_page tpage;
int n = ctl->num;
int flush = 0, queued = 0;
@@ -2574,6 +2502,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
@@ -2588,6 +2517,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (tfile->napi_enabled && queued > 0)
napi_schedule(&tfile->napi);
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
@@ -3080,8 +3010,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
kgid_t group;
int ifindex;
int sndbuf;
- int vnet_hdr_sz;
- int le;
int ret;
bool do_notify = false;
@@ -3265,7 +3193,13 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
case SIOCSIFHWADDR:
/* Set hw address */
- ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL);
+ if (tun->dev->addr_len > sizeof(ifr.ifr_hwaddr)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = dev_set_mac_address_user(tun->dev,
+ (struct sockaddr_storage *)&ifr.ifr_hwaddr,
+ NULL);
break;
case TUNGETSNDBUF:
@@ -3288,50 +3222,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
tun_set_sndbuf(tun);
break;
- case TUNGETVNETHDRSZ:
- vnet_hdr_sz = tun->vnet_hdr_sz;
- if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
- ret = -EFAULT;
- break;
-
- case TUNSETVNETHDRSZ:
- if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
- ret = -EFAULT;
- break;
- }
- if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
- ret = -EINVAL;
- break;
- }
-
- tun->vnet_hdr_sz = vnet_hdr_sz;
- break;
-
- case TUNGETVNETLE:
- le = !!(tun->flags & TUN_VNET_LE);
- if (put_user(le, (int __user *)argp))
- ret = -EFAULT;
- break;
-
- case TUNSETVNETLE:
- if (get_user(le, (int __user *)argp)) {
- ret = -EFAULT;
- break;
- }
- if (le)
- tun->flags |= TUN_VNET_LE;
- else
- tun->flags &= ~TUN_VNET_LE;
- break;
-
- case TUNGETVNETBE:
- ret = tun_get_vnet_be(tun, argp);
- break;
-
- case TUNSETVNETBE:
- ret = tun_set_vnet_be(tun, argp);
- break;
-
case TUNATTACHFILTER:
/* Can be set only for TAPs */
ret = -EINVAL;
@@ -3387,7 +3277,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
default:
- ret = -EINVAL;
+ ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
break;
}
@@ -3441,6 +3331,12 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
struct tun_file *tfile = file->private_data;
int ret;
+ if (on) {
+ ret = file_f_owner_allocate(file);
+ if (ret)
+ goto out;
+ }
+
if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
goto out;
@@ -3526,7 +3422,6 @@ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
static const struct file_operations tun_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read_iter = tun_chr_read_iter,
.write_iter = tun_chr_write_iter,
.poll = tun_chr_poll,
@@ -3681,9 +3576,9 @@ static int tun_queue_resize(struct tun_struct *tun)
list_for_each_entry(tfile, &tun->disabled, next)
rings[i++] = &tfile->tx_ring;
- ret = ptr_ring_resize_multiple(rings, n,
- dev->tx_queue_len, GFP_KERNEL,
- tun_ptr_free);
+ ret = ptr_ring_resize_multiple_bh(rings, n,
+ dev->tx_queue_len, GFP_KERNEL,
+ tun_ptr_free);
kfree(rings);
return ret;