summaryrefslogtreecommitdiff
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c655
1 files changed, 348 insertions, 307 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fed85447701a..8192740357a0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -54,6 +54,7 @@
#include <linux/if_tun.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
+#include <linux/math.h>
#include <linux/nsproxy.h>
#include <linux/virtio_net.h>
#include <linux/rcupdate.h>
@@ -70,17 +71,20 @@
#include <linux/bpf_trace.h>
#include <linux/mutex.h>
#include <linux/ieee802154.h>
-#include <linux/if_ltalk.h>
+#include <uapi/linux/if_ltalk.h>
#include <uapi/linux/if_fddi.h>
#include <uapi/linux/if_hippi.h>
#include <uapi/linux/if_fc.h>
#include <net/ax25.h>
#include <net/rose.h>
#include <net/6lowpan.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/proc_fs.h>
+#include "tun_vnet.h"
+
static void tun_default_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd);
@@ -92,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev,
* overload it to mean fasync when stored there.
*/
#define TUN_FASYNC IFF_ATTACH_QUEUE
-/* High bits in flags field are unused. */
-#define TUN_VNET_LE 0x80000000
-#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
@@ -185,7 +186,8 @@ struct tun_struct {
struct net_device *dev;
netdev_features_t set_features;
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
- NETIF_F_TSO6)
+ NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \
+ NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM)
int align;
int vnet_hdr_sz;
@@ -268,12 +270,17 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
tfile->napi_enabled = napi_en;
tfile->napi_frags_enabled = napi_en && napi_frags;
if (napi_en) {
- netif_tx_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(tun->dev, &tfile->napi, tun_napi_poll);
napi_enable(&tfile->napi);
}
}
+static void tun_napi_enable(struct tun_file *tfile)
+{
+ if (tfile->napi_enabled)
+ napi_enable(&tfile->napi);
+}
+
static void tun_napi_disable(struct tun_file *tfile)
{
if (tfile->napi_enabled)
@@ -291,70 +298,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile)
return tfile->napi_frags_enabled;
}
-#ifdef CONFIG_TUN_VNET_CROSS_LE
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
-{
- return tun->flags & TUN_VNET_BE ? false :
- virtio_legacy_is_little_endian();
-}
-
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- int be = !!(tun->flags & TUN_VNET_BE);
-
- if (put_user(be, argp))
- return -EFAULT;
-
- return 0;
-}
-
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- int be;
-
- if (get_user(be, argp))
- return -EFAULT;
-
- if (be)
- tun->flags |= TUN_VNET_BE;
- else
- tun->flags &= ~TUN_VNET_BE;
-
- return 0;
-}
-#else
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
-{
- return virtio_legacy_is_little_endian();
-}
-
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- return -EINVAL;
-}
-
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_TUN_VNET_CROSS_LE */
-
-static inline bool tun_is_little_endian(struct tun_struct *tun)
-{
- return tun->flags & TUN_VNET_LE ||
- tun_legacy_is_little_endian(tun);
-}
-
-static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
-{
- return __virtio16_to_cpu(tun_is_little_endian(tun), val);
-}
-
-static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
-{
- return __cpu_to_virtio16(tun_is_little_endian(tun), val);
-}
-
static inline u32 tun_hashfn(u32 rxhash)
{
return rxhash & TUN_MASK_FLOW_ENTRIES;
@@ -435,7 +378,7 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
static void tun_flow_cleanup(struct timer_list *t)
{
- struct tun_struct *tun = from_timer(tun, t, flow_gc_timer);
+ struct tun_struct *tun = timer_container_of(tun, t, flow_gc_timer);
unsigned long delay = tun->ageing_time;
unsigned long next_timer = jiffies + delay;
unsigned long count = 0;
@@ -518,8 +461,7 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{
struct tun_flow_entry *e;
- u32 txq = 0;
- u32 numqueues = 0;
+ u32 txq, numqueues;
numqueues = READ_ONCE(tun->numqueues);
@@ -529,8 +471,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
tun_flow_save_rps_rxhash(e, txq);
txq = e->queue_index;
} else {
- /* use multiply and shift instead of expensive divide */
- txq = ((u64)txq * numqueues) >> 32;
+ txq = reciprocal_scale(txq, numqueues);
}
return txq;
@@ -575,7 +516,7 @@ static inline bool tun_not_capable(struct tun_struct *tun)
struct net *net = dev_net(tun->dev);
return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
- (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
+ (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN);
}
@@ -635,7 +576,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun = rtnl_dereference(tfile->tun);
if (tun && clean) {
- tun_napi_disable(tfile);
+ if (!tfile->detached)
+ tun_napi_disable(tfile);
tun_napi_del(tfile);
}
@@ -647,6 +589,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->tfiles[tun->numqueues - 1]);
ntfile = rtnl_dereference(tun->tfiles[index]);
ntfile->queue_index = index;
+ ntfile->xdp_rxq.queue_index = index;
rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
NULL);
@@ -654,8 +597,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
if (clean) {
RCU_INIT_POINTER(tfile->tun, NULL);
sock_put(&tfile->sk);
- } else
+ } else {
tun_disable_queue(tun, tfile);
+ tun_napi_disable(tfile);
+ }
synchronize_net();
tun_flow_delete_by_queue(tun, tun->numqueues + 1);
@@ -678,7 +623,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
if (tun)
xdp_rxq_info_unreg(&tfile->xdp_rxq);
ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
- sock_put(&tfile->sk);
}
}
@@ -694,6 +638,9 @@ static void tun_detach(struct tun_file *tfile, bool clean)
if (dev)
netdev_state_change(dev);
rtnl_unlock();
+
+ if (clean)
+ sock_put(&tfile->sk);
}
static void tun_detach_all(struct net_device *dev)
@@ -728,6 +675,7 @@ static void tun_detach_all(struct net_device *dev)
sock_put(&tfile->sk);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
+ tun_napi_del(tfile);
tun_enable_queue(tfile);
tun_queue_purge(tfile);
xdp_rxq_info_unreg(&tfile->xdp_rxq);
@@ -808,6 +756,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
if (tfile->detached) {
tun_enable_queue(tfile);
+ tun_napi_enable(tfile);
} else {
sock_hold(&tfile->sk);
tun_napi_init(tun, tfile, napi, napi_frags);
@@ -965,27 +914,24 @@ static int tun_net_init(struct net_device *dev)
struct ifreq *ifr = tun->ifr;
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
spin_lock_init(&tun->lock);
err = security_tun_dev_alloc_security(&tun->security);
- if (err < 0) {
- free_percpu(dev->tstats);
+ if (err < 0)
return err;
- }
tun_flow_init(tun);
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- dev->features = dev->hw_features | NETIF_F_LLTX;
+ dev->hw_enc_features = dev->hw_features;
+ dev->features = dev->hw_features;
dev->vlan_features = dev->features &
~(NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
+ dev->lltx = true;
tun->flags = (tun->flags & ~TUN_FEATURES) |
(ifr->ifr_flags & TUN_FEATURES);
@@ -996,7 +942,6 @@ static int tun_net_init(struct net_device *dev)
if (err < 0) {
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
- free_percpu(dev->tstats);
return err;
}
return 0;
@@ -1057,6 +1002,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
/* Net device start xmit */
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct tun_struct *tun = netdev_priv(dev);
int txq = skb->queue_mapping;
struct netdev_queue *queue;
@@ -1067,8 +1013,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
tfile = rcu_dereference(tun->tfiles[txq]);
/* Drop packet if interface is not attached */
- if (!tfile)
+ if (!tfile) {
+ drop_reason = SKB_DROP_REASON_DEV_READY;
goto drop;
+ }
if (!rcu_dereference(tun->steering_prog))
tun_automq_xmit(tun, skb);
@@ -1078,19 +1026,30 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Drop if the filter does not like it.
* This is a noop if the filter is disabled.
* Filter can be enabled only for the TAP devices. */
- if (!check_filter(&tun->txflt, skb))
+ if (!check_filter(&tun->txflt, skb)) {
+ drop_reason = SKB_DROP_REASON_TAP_TXFILTER;
goto drop;
+ }
if (tfile->socket.sk->sk_filter &&
- sk_filter(tfile->socket.sk, skb))
+ sk_filter_reason(tfile->socket.sk, skb, &drop_reason))
goto drop;
len = run_ebpf_filter(tun, skb, len);
- if (len == 0 || pskb_trim(skb, len))
+ if (len == 0) {
+ drop_reason = SKB_DROP_REASON_TAP_FILTER;
goto drop;
+ }
- if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+ if (pskb_trim(skb, len)) {
+ drop_reason = SKB_DROP_REASON_NOMEM;
goto drop;
+ }
+
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+ drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
+ goto drop;
+ }
skb_tx_timestamp(skb);
@@ -1101,12 +1060,14 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
nf_reset_ct(skb);
- if (ptr_ring_produce(&tfile->tx_ring, skb))
+ if (ptr_ring_produce(&tfile->tx_ring, skb)) {
+ drop_reason = SKB_DROP_REASON_FULL_RING;
goto drop;
+ }
- /* NETIF_F_LLTX requires to do our own update of trans_start */
+ /* dev->lltx requires to do our own update of trans_start */
queue = netdev_get_tx_queue(dev, txq);
- queue->trans_start = jiffies;
+ txq_trans_cond_update(queue);
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
@@ -1117,9 +1078,9 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
drop:
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
skb_tx_error(skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
rcu_read_unlock();
return NET_XMIT_DROP;
}
@@ -1273,7 +1234,7 @@ resample:
void *frame = tun_xdp_to_ptr(xdp);
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
break;
}
nxmit++;
@@ -1314,7 +1275,6 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_select_queue = tun_select_queue,
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = tun_set_headroom,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_bpf = tun_xdp,
.ndo_xdp_xmit = tun_xdp_xmit,
.ndo_change_carrier = tun_net_change_carrier,
@@ -1335,7 +1295,7 @@ static void tun_flow_init(struct tun_struct *tun)
static void tun_flow_uninit(struct tun_struct *tun)
{
- del_timer_sync(&tun->flow_gc_timer);
+ timer_delete_sync(&tun->flow_gc_timer);
tun_flow_flush(tun);
}
@@ -1371,6 +1331,11 @@ static void tun_net_initialize(struct net_device *dev)
eth_hw_addr_random(dev);
+ /* Currently tun does not support XDP, only tap does. */
+ dev->xdp_features = NETDEV_XDP_ACT_BASIC |
+ NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT;
+
break;
}
@@ -1431,7 +1396,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
int err;
int i;
- if (it->nr_segs > MAX_SKB_FRAGS + 1)
+ if (it->nr_segs > MAX_SKB_FRAGS + 1 ||
+ len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN))
return ERR_PTR(-EMSGSIZE);
local_bh_disable();
@@ -1450,7 +1416,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
skb->truesize += skb->data_len;
for (i = 1; i < it->nr_segs; i++) {
- size_t fragsz = it->iov[i].iov_len;
+ const struct iovec *iov = iter_iov(it) + i;
+ size_t fragsz = iov->iov_len;
struct page *page;
void *frag;
@@ -1486,11 +1453,13 @@ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
int err;
/* Under a page? Don't bother with paged skb. */
- if (prepad + len < PAGE_SIZE || !linear)
+ if (prepad + len < PAGE_SIZE)
linear = len;
+ if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
- &err, 0);
+ &err, PAGE_ALLOC_COSTLY_ORDER);
if (!skb)
return ERR_PTR(err);
@@ -1557,7 +1526,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
if (zerocopy)
return false;
- if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
return false;
@@ -1566,7 +1535,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
struct page_frag *alloc_frag, char *buf,
- int buflen, int len, int pad)
+ int buflen, int len, int pad,
+ int metasize)
{
struct sk_buff *skb = build_skb(buf, buflen);
@@ -1575,6 +1545,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
skb_reserve(skb, pad);
skb_put(skb, len);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
skb_set_owner_w(skb, tfile->socket.sk);
get_page(alloc_frag->page);
@@ -1591,13 +1563,19 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
switch (act) {
case XDP_REDIRECT:
err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
- if (err)
+ if (err) {
+ dev_core_stats_rx_dropped_inc(tun->dev);
return err;
+ }
+ dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
break;
case XDP_TX:
err = tun_xdp_tx(tun->dev, xdp);
- if (err < 0)
+ if (err < 0) {
+ dev_core_stats_rx_dropped_inc(tun->dev);
return err;
+ }
+ dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
break;
case XDP_PASS:
break;
@@ -1608,7 +1586,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
trace_xdp_exception(tun->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
- atomic_long_inc(&tun->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(tun->dev);
break;
}
@@ -1622,11 +1600,13 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
int len, int *skb_xdp)
{
struct page_frag *alloc_frag = &current->task_frag;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
char *buf;
size_t copied;
int pad = TUN_RX_PAD;
+ int metasize = 0;
int err = 0;
rcu_read_lock();
@@ -1654,20 +1634,21 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
if (hdr->gso_type || !xdp_prog) {
*skb_xdp = 1;
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
- pad);
+ pad, metasize);
}
*skb_xdp = 0;
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
- xdp_prepare_buff(&xdp, buf, pad, len, false);
+ xdp_prepare_buff(&xdp, buf, pad, len, true);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (act == XDP_REDIRECT || act == XDP_TX) {
@@ -1688,13 +1669,21 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
+
+ /* It is known that the xdp_buff was prepared with metadata
+ * support, so the metasize will never be negative.
+ */
+ metasize = xdp.data - xdp.data_meta;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
- return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
+ return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
+ metasize);
out:
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return NULL;
@@ -1709,14 +1698,26 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
struct sk_buff *skb;
size_t total_len = iov_iter_count(from);
size_t len = total_len, align = tun->align, linear;
- struct virtio_net_hdr gso = { 0 };
+ struct virtio_net_hdr_v1_hash_tunnel hdr;
+ struct virtio_net_hdr *gso;
int good_linear;
int copylen;
+ int hdr_len = 0;
bool zerocopy = false;
int err;
u32 rxhash = 0;
int skb_xdp = 1;
bool frags = tun_napi_frags_enabled(tfile);
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ netdev_features_t features = 0;
+
+ /*
+ * Keep it easy and always zero the whole buffer, even if the
+ * tunnel-related field will be touched only when the feature
+ * is enabled and the hdr size id compatible.
+ */
+ memset(&hdr, 0, sizeof(hdr));
+ gso = (struct virtio_net_hdr *)&hdr;
if (!(tun->flags & IFF_NO_PI)) {
if (len < sizeof(pi))
@@ -1730,26 +1731,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) {
int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (len < vnet_hdr_sz)
- return -EINVAL;
- len -= vnet_hdr_sz;
-
- if (!copy_from_iter_full(&gso, sizeof(gso), from))
- return -EFAULT;
+ features = tun_vnet_hdr_guest_features(vnet_hdr_sz);
+ hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags,
+ features, from, gso);
+ if (hdr_len < 0)
+ return hdr_len;
- if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
- tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
- gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
-
- if (tun16_to_cpu(tun, gso.hdr_len) > len)
- return -EINVAL;
- iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
+ len -= vnet_hdr_sz;
}
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
align += NET_IP_ALIGN;
- if (unlikely(len < ETH_HLEN ||
- (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
+ if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN)))
return -EINVAL;
}
@@ -1762,9 +1755,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
* enough room for skb expand head in case it is used.
* The rest of the buffer is mapped from userspace.
*/
- copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
- if (copylen > good_linear)
- copylen = good_linear;
+ copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen;
iov_iter_advance(&i, copylen);
if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
@@ -1776,20 +1767,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
* (e.g gso or jumbo packet), we will do it at after
* skb was created with generic XDP routine.
*/
- skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
- if (IS_ERR(skb)) {
- atomic_long_inc(&tun->dev->rx_dropped);
- return PTR_ERR(skb);
- }
+ skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp);
+ err = PTR_ERR_OR_ZERO(skb);
+ if (err)
+ goto drop;
if (!skb)
return total_len;
} else {
if (!zerocopy) {
copylen = len;
- if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
- linear = good_linear;
- else
- linear = tun16_to_cpu(tun, gso.hdr_len);
+ linear = min(hdr_len, good_linear);
}
if (frags) {
@@ -1801,17 +1788,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
*/
zerocopy = false;
} else {
+ if (!linear)
+ linear = min_t(size_t, good_linear, copylen);
+
skb = tun_alloc_skb(tfile, align, copylen, linear,
noblock);
}
- if (IS_ERR(skb)) {
- if (PTR_ERR(skb) != -EAGAIN)
- atomic_long_inc(&tun->dev->rx_dropped);
- if (frags)
- mutex_unlock(&tfile->napi_mutex);
- return PTR_ERR(skb);
- }
+ err = PTR_ERR_OR_ZERO(skb);
+ if (err)
+ goto drop;
if (zerocopy)
err = zerocopy_sg_from_iter(skb, from);
@@ -1820,27 +1806,15 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (err) {
err = -EFAULT;
-drop:
- atomic_long_inc(&tun->dev->rx_dropped);
- kfree_skb(skb);
- if (frags) {
- tfile->napi.skb = NULL;
- mutex_unlock(&tfile->napi_mutex);
- }
-
- return err;
+ drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
+ goto drop;
}
}
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
+ if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) {
atomic_long_inc(&tun->rx_frame_errors);
- kfree_skb(skb);
- if (frags) {
- tfile->napi.skb = NULL;
- mutex_unlock(&tfile->napi_mutex);
- }
-
- return -EINVAL;
+ err = -EINVAL;
+ goto free_skb;
}
switch (tun->flags & TUN_TYPE_MASK) {
@@ -1856,9 +1830,8 @@ drop:
pi.proto = htons(ETH_P_IPV6);
break;
default:
- atomic_long_inc(&tun->dev->rx_dropped);
- kfree_skb(skb);
- return -EINVAL;
+ err = -EINVAL;
+ goto drop;
}
}
@@ -1869,6 +1842,7 @@ drop:
case IFF_TAP:
if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
err = -ENOMEM;
+ drop_reason = SKB_DROP_REASON_HDR_TRUNC;
goto drop;
}
skb->protocol = eth_type_trans(skb, tun->dev);
@@ -1880,7 +1854,7 @@ drop:
skb_zcopy_init(skb, msg_control);
} else if (msg_control) {
struct ubuf_info *uarg = msg_control;
- uarg->callback(NULL, uarg, false);
+ uarg->ops->complete(NULL, uarg, false);
}
skb_reset_network_header(skb);
@@ -1895,16 +1869,15 @@ drop:
rcu_read_lock();
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
- ret = do_xdp_generic(xdp_prog, skb);
+ ret = do_xdp_generic(xdp_prog, &skb);
if (ret != XDP_PASS) {
rcu_read_unlock();
local_bh_enable();
- if (frags) {
- tfile->napi.skb = NULL;
- mutex_unlock(&tfile->napi_mutex);
- }
- return total_len;
+ goto unlock_frags;
}
+
+ if (frags && skb != tfile->napi.skb)
+ tfile->napi.skb = skb;
}
rcu_read_unlock();
local_bh_enable();
@@ -1922,6 +1895,7 @@ drop:
if (unlikely(!(tun->dev->flags & IFF_UP))) {
err = -EIO;
rcu_read_unlock();
+ drop_reason = SKB_DROP_REASON_DEV_READY;
goto drop;
}
@@ -1934,23 +1908,39 @@ drop:
skb_headlen(skb));
if (unlikely(headlen > skb_headlen(skb))) {
- atomic_long_inc(&tun->dev->rx_dropped);
+ WARN_ON_ONCE(1);
+ err = -ENOMEM;
+ dev_core_stats_rx_dropped_inc(tun->dev);
+napi_busy:
napi_free_frags(&tfile->napi);
rcu_read_unlock();
mutex_unlock(&tfile->napi_mutex);
- WARN_ON(1);
- return -ENOMEM;
+ return err;
}
- local_bh_disable();
- napi_gro_frags(&tfile->napi);
- local_bh_enable();
+ if (likely(napi_schedule_prep(&tfile->napi))) {
+ local_bh_disable();
+ napi_gro_frags(&tfile->napi);
+ napi_complete(&tfile->napi);
+ local_bh_enable();
+ } else {
+ err = -EBUSY;
+ goto napi_busy;
+ }
mutex_unlock(&tfile->napi_mutex);
} else if (tfile->napi_enabled) {
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
int queue_len;
spin_lock_bh(&queue->lock);
+
+ if (unlikely(tfile->detached)) {
+ spin_unlock_bh(&queue->lock);
+ rcu_read_unlock();
+ err = -EBUSY;
+ goto free_skb;
+ }
+
__skb_queue_tail(queue, skb);
queue_len = skb_queue_len(queue);
spin_unlock(&queue->lock);
@@ -1962,7 +1952,7 @@ drop:
} else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
tun_rx_batched(tun, tfile, skb, more);
} else {
- netif_rx_ni(skb);
+ netif_rx(skb);
}
rcu_read_unlock();
@@ -1974,6 +1964,22 @@ drop:
tun_flow_update(tun, rxhash, tfile);
return total_len;
+
+drop:
+ if (err != -EAGAIN)
+ dev_core_stats_rx_dropped_inc(tun->dev);
+
+free_skb:
+ if (!IS_ERR_OR_NULL(skb))
+ kfree_skb_reason(skb, drop_reason);
+
+unlock_frags:
+ if (frags) {
+ tfile->napi.skb = NULL;
+ mutex_unlock(&tfile->napi_mutex);
+ }
+
+ return err ?: total_len;
}
static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -2003,18 +2009,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
{
int vnet_hdr_sz = 0;
size_t size = xdp_frame->len;
- size_t ret;
+ ssize_t ret;
if (tun->flags & IFF_VNET_HDR) {
struct virtio_net_hdr gso = { 0 };
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
- return -EINVAL;
- if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
- sizeof(gso)))
- return -EFAULT;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+ if (ret)
+ return ret;
}
ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
@@ -2037,6 +2040,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
int vlan_offset = 0;
int vlan_hlen = 0;
int vnet_hdr_sz = 0;
+ int ret;
if (skb_vlan_tag_present(skb))
vlan_hlen = VLAN_HLEN;
@@ -2061,31 +2065,23 @@ static ssize_t tun_put_user(struct tun_struct *tun,
}
if (vnet_hdr_sz) {
- struct virtio_net_hdr gso;
-
- if (iov_iter_count(iter) < vnet_hdr_sz)
- return -EINVAL;
+ struct virtio_net_hdr_v1_hash_tunnel hdr;
+ struct virtio_net_hdr *gso;
- if (virtio_net_hdr_from_skb(skb, &gso,
- tun_is_little_endian(tun), true,
- vlan_hlen)) {
- struct skb_shared_info *sinfo = skb_shinfo(skb);
- pr_err("unexpected GSO type: "
- "0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
- print_hex_dump(KERN_ERR, "tun: ",
- DUMP_PREFIX_NONE,
- 16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
-
- if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
- return -EFAULT;
+ ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb,
+ &hdr);
+ if (ret)
+ return ret;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ /*
+ * Drop the packet if the configured header size is too small
+ * WRT the enabled offloads.
+ */
+ gso = (struct virtio_net_hdr *)&hdr;
+ ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features,
+ iter, gso);
+ if (ret)
+ return ret;
}
if (vlan_hlen) {
@@ -2256,7 +2252,6 @@ static void tun_free_netdev(struct net_device *dev)
BUG_ON(!(list_empty(&tun->disabled)));
- free_percpu(dev->tstats);
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
__tun_set_ebpf(tun, &tun->steering_prog, NULL);
@@ -2384,16 +2379,22 @@ static int tun_xdp_one(struct tun_struct *tun,
struct tun_page *tpage)
{
unsigned int datasize = xdp->data_end - xdp->data;
- struct tun_xdp_hdr *hdr = xdp->data_hard_start;
- struct virtio_net_hdr *gso = &hdr->gso;
+ struct virtio_net_hdr *gso = xdp->data_hard_start;
+ struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
struct bpf_prog *xdp_prog;
struct sk_buff *skb = NULL;
+ struct sk_buff_head *queue;
+ netdev_features_t features;
u32 rxhash = 0, act;
- int buflen = hdr->buflen;
- int err = 0;
+ int buflen = xdp->frame_sz;
+ int metasize = 0;
+ int ret = 0;
bool skb_xdp = false;
struct page *page;
+ if (unlikely(datasize < ETH_HLEN))
+ return -EINVAL;
+
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
if (gso->gso_type) {
@@ -2402,16 +2403,15 @@ static int tun_xdp_one(struct tun_struct *tun,
}
xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
- xdp_set_data_meta_invalid(xdp);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- err = tun_xdp_act(tun, xdp_prog, xdp, act);
- if (err < 0) {
+ ret = tun_xdp_act(tun, xdp_prog, xdp, act);
+ if (ret < 0) {
put_page(virt_to_head_page(xdp->data));
- return err;
+ return ret;
}
- switch (err) {
+ switch (ret) {
case XDP_REDIRECT:
*flush = true;
fallthrough;
@@ -2435,17 +2435,27 @@ static int tun_xdp_one(struct tun_struct *tun,
build:
skb = build_skb(xdp->data_hard_start, buflen);
if (!skb) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto out;
}
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+ /* The externally provided xdp_buff may have no metadata support, which
+ * is marked by xdp->data_meta being xdp->data + 1. This will lead to a
+ * metasize of -1 and is the reason why the condition checks for > 0.
+ */
+ metasize = xdp->data - xdp->data_meta;
+ if (metasize > 0)
+ skb_metadata_set(skb, metasize);
+
+ features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz));
+ tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso;
+ if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) {
atomic_long_inc(&tun->rx_frame_errors);
kfree_skb(skb);
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -2455,16 +2465,34 @@ build:
skb_record_rx_queue(skb, tfile->queue_index);
if (skb_xdp) {
- err = do_xdp_generic(xdp_prog, skb);
- if (err != XDP_PASS)
+ ret = do_xdp_generic(xdp_prog, &skb);
+ if (ret != XDP_PASS) {
+ ret = 0;
goto out;
+ }
}
if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
!tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
- netif_receive_skb(skb);
+ if (tfile->napi_enabled) {
+ queue = &tfile->sk.sk_write_queue;
+ spin_lock(&queue->lock);
+
+ if (unlikely(tfile->detached)) {
+ spin_unlock(&queue->lock);
+ kfree_skb(skb);
+ return -EBUSY;
+ }
+
+ __skb_queue_tail(queue, skb);
+ spin_unlock(&queue->lock);
+ ret = 1;
+ } else {
+ netif_receive_skb(skb);
+ ret = 0;
+ }
/* No need to disable preemption here since this function is
* always called with bh disabled
@@ -2475,7 +2503,7 @@ build:
tun_flow_update(tun, rxhash, tfile);
out:
- return err;
+ return ret;
}
static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
@@ -2489,24 +2517,33 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (!tun)
return -EBADFD;
- if (ctl && (ctl->type == TUN_MSG_PTR)) {
+ if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
+ ctl && ctl->type == TUN_MSG_PTR) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct tun_page tpage;
int n = ctl->num;
- int flush = 0;
+ int flush = 0, queued = 0;
memset(&tpage, 0, sizeof(tpage));
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
- tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+ ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+ if (ret > 0)
+ queued += ret;
}
if (flush)
xdp_do_flush();
+ if (tfile->napi_enabled && queued > 0)
+ napi_schedule(&tfile->napi);
+
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
@@ -2614,7 +2651,7 @@ static ssize_t tun_flags_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct tun_struct *tun = netdev_priv(to_net_dev(dev));
- return sprintf(buf, "0x%x\n", tun_flags(tun));
+ return sysfs_emit(buf, "0x%x\n", tun_flags(tun));
}
static ssize_t owner_show(struct device *dev, struct device_attribute *attr,
@@ -2622,9 +2659,9 @@ static ssize_t owner_show(struct device *dev, struct device_attribute *attr,
{
struct tun_struct *tun = netdev_priv(to_net_dev(dev));
return uid_valid(tun->owner)?
- sprintf(buf, "%u\n",
- from_kuid_munged(current_user_ns(), tun->owner)):
- sprintf(buf, "-1\n");
+ sysfs_emit(buf, "%u\n",
+ from_kuid_munged(current_user_ns(), tun->owner)) :
+ sysfs_emit(buf, "-1\n");
}
static ssize_t group_show(struct device *dev, struct device_attribute *attr,
@@ -2632,9 +2669,9 @@ static ssize_t group_show(struct device *dev, struct device_attribute *attr,
{
struct tun_struct *tun = netdev_priv(to_net_dev(dev));
return gid_valid(tun->group) ?
- sprintf(buf, "%u\n",
- from_kgid_munged(current_user_ns(), tun->group)):
- sprintf(buf, "-1\n");
+ sysfs_emit(buf, "%u\n",
+ from_kgid_munged(current_user_ns(), tun->group)) :
+ sysfs_emit(buf, "-1\n");
}
static DEVICE_ATTR_RO(tun_flags);
@@ -2778,7 +2815,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
rcu_assign_pointer(tfile->tun, tun);
}
- netif_carrier_on(tun->dev);
+ if (ifr->ifr_flags & IFF_NO_CARRIER)
+ netif_carrier_off(tun->dev);
+ else
+ netif_carrier_on(tun->dev);
/* Make sure persistent devices do not get stuck in
* xoff state.
@@ -2786,18 +2826,20 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
if (netif_running(tun->dev))
netif_tx_wake_all_queues(tun->dev);
- strcpy(ifr->ifr_name, tun->dev->name);
+ strscpy(ifr->ifr_name, tun->dev->name);
return 0;
}
static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)
{
- strcpy(ifr->ifr_name, tun->dev->name);
+ strscpy(ifr->ifr_name, tun->dev->name);
ifr->ifr_flags = tun_flags(tun);
}
+#define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6)
+
/* This is like a cut-down ethtool ops, except done via tun fd so no
* privs required. */
static int set_offload(struct tun_struct *tun, unsigned long arg)
@@ -2821,6 +2863,24 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
}
arg &= ~TUN_F_UFO;
+
+ /* TODO: for now USO4 and USO6 should work simultaneously */
+ if (arg & TUN_F_USO4 && arg & TUN_F_USO6) {
+ features |= NETIF_F_GSO_UDP_L4;
+ arg &= ~(TUN_F_USO4 | TUN_F_USO6);
+ }
+
+ /*
+ * Tunnel offload is allowed only if some plain offload is
+ * available, too.
+ */
+ if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) {
+ features |= NETIF_F_GSO_UDP_TUNNEL;
+ if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM)
+ features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ arg &= ~(TUN_F_UDP_TUNNEL_GSO |
+ TUN_F_UDP_TUNNEL_GSO_CSUM);
+ }
}
/* This gives the user a way to test for new features in future by
@@ -2984,13 +3044,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
struct net *net = sock_net(&tfile->sk);
struct tun_struct *tun;
void __user* argp = (void __user*)arg;
- unsigned int ifindex, carrier;
+ unsigned int carrier;
struct ifreq ifr;
kuid_t owner;
kgid_t group;
+ int ifindex;
int sndbuf;
- int vnet_hdr_sz;
- int le;
int ret;
bool do_notify = false;
@@ -3006,8 +3065,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
* This is needed because we never checked for invalid flags on
* TUNSETIFF.
*/
- return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
- (unsigned int __user*)argp);
+ return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER |
+ TUN_FEATURES, (unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE) {
return tun_set_queue(file, &ifr);
} else if (cmd == SIOCGSKNS) {
@@ -3043,7 +3102,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = -EFAULT;
if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
goto unlock;
-
+ ret = -EINVAL;
+ if (ifindex < 0)
+ goto unlock;
ret = 0;
tfile->ifindex = ifindex;
goto unlock;
@@ -3165,14 +3226,20 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
case SIOCGIFHWADDR:
/* Get hw address */
- dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name);
+ netif_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name);
if (copy_to_user(argp, &ifr, ifreq_len))
ret = -EFAULT;
break;
case SIOCSIFHWADDR:
/* Set hw address */
- ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL);
+ if (tun->dev->addr_len > sizeof(ifr.ifr_hwaddr)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = dev_set_mac_address_user(tun->dev,
+ (struct sockaddr_storage *)&ifr.ifr_hwaddr,
+ NULL);
break;
case TUNGETSNDBUF:
@@ -3195,50 +3262,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
tun_set_sndbuf(tun);
break;
- case TUNGETVNETHDRSZ:
- vnet_hdr_sz = tun->vnet_hdr_sz;
- if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
- ret = -EFAULT;
- break;
-
- case TUNSETVNETHDRSZ:
- if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
- ret = -EFAULT;
- break;
- }
- if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
- ret = -EINVAL;
- break;
- }
-
- tun->vnet_hdr_sz = vnet_hdr_sz;
- break;
-
- case TUNGETVNETLE:
- le = !!(tun->flags & TUN_VNET_LE);
- if (put_user(le, (int __user *)argp))
- ret = -EFAULT;
- break;
-
- case TUNSETVNETLE:
- if (get_user(le, (int __user *)argp)) {
- ret = -EFAULT;
- break;
- }
- if (le)
- tun->flags |= TUN_VNET_LE;
- else
- tun->flags &= ~TUN_VNET_LE;
- break;
-
- case TUNGETVNETBE:
- ret = tun_get_vnet_be(tun, argp);
- break;
-
- case TUNSETVNETBE:
- ret = tun_set_vnet_be(tun, argp);
- break;
-
case TUNATTACHFILTER:
/* Can be set only for TAPs */
ret = -EINVAL;
@@ -3294,7 +3317,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
default:
- ret = -EINVAL;
+ ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
break;
}
@@ -3348,6 +3371,12 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
struct tun_file *tfile = file->private_data;
int ret;
+ if (on) {
+ ret = file_f_owner_allocate(file);
+ if (ret)
+ goto out;
+ }
+
if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
goto out;
@@ -3385,7 +3414,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
tfile->socket.file = file;
tfile->socket.ops = &tun_socket_ops;
- sock_init_data(&tfile->socket, &tfile->sk);
+ sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
tfile->sk.sk_write_space = tun_sock_write_space;
tfile->sk.sk_sndbuf = INT_MAX;
@@ -3395,6 +3424,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
+ /* tun groks IOCB_NOWAIT just fine, mark it as such */
+ file->f_mode |= FMODE_NOWAIT;
return 0;
}
@@ -3431,7 +3462,6 @@ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
static const struct file_operations tun_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read_iter = tun_chr_read_iter,
.write_iter = tun_chr_write_iter,
.poll = tun_chr_poll,
@@ -3461,7 +3491,7 @@ static void tun_default_link_ksettings(struct net_device *dev,
{
ethtool_link_ksettings_zero_link_mode(cmd, supported);
ethtool_link_ksettings_zero_link_mode(cmd, advertising);
- cmd->base.speed = SPEED_10;
+ cmd->base.speed = SPEED_10000;
cmd->base.duplex = DUPLEX_FULL;
cmd->base.port = PORT_TP;
cmd->base.phy_address = 0;
@@ -3490,15 +3520,15 @@ static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
{
struct tun_struct *tun = netdev_priv(dev);
- strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
- strlcpy(info->version, DRV_VERSION, sizeof(info->version));
+ strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strscpy(info->version, DRV_VERSION, sizeof(info->version));
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
- strlcpy(info->bus_info, "tun", sizeof(info->bus_info));
+ strscpy(info->bus_info, "tun", sizeof(info->bus_info));
break;
case IFF_TAP:
- strlcpy(info->bus_info, "tap", sizeof(info->bus_info));
+ strscpy(info->bus_info, "tap", sizeof(info->bus_info));
break;
}
}
@@ -3544,12 +3574,22 @@ static int tun_set_coalesce(struct net_device *dev,
return 0;
}
+static void tun_get_channels(struct net_device *dev,
+ struct ethtool_channels *channels)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ channels->combined_count = tun->numqueues;
+ channels->max_combined = tun->flags & IFF_MULTI_QUEUE ? MAX_TAP_QUEUES : 1;
+}
+
static const struct ethtool_ops tun_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES,
.get_drvinfo = tun_get_drvinfo,
.get_msglevel = tun_get_msglevel,
.set_msglevel = tun_set_msglevel,
.get_link = ethtool_op_get_link,
+ .get_channels = tun_get_channels,
.get_ts_info = ethtool_op_get_ts_info,
.get_coalesce = tun_get_coalesce,
.set_coalesce = tun_set_coalesce,
@@ -3576,9 +3616,9 @@ static int tun_queue_resize(struct tun_struct *tun)
list_for_each_entry(tfile, &tun->disabled, next)
rings[i++] = &tfile->tx_ring;
- ret = ptr_ring_resize_multiple(rings, n,
- dev->tx_queue_len, GFP_KERNEL,
- tun_ptr_free);
+ ret = ptr_ring_resize_multiple_bh(rings, n,
+ dev->tx_queue_len, GFP_KERNEL,
+ tun_ptr_free);
kfree(rings);
return ret;
@@ -3652,7 +3692,7 @@ err_linkops:
return ret;
}
-static void tun_cleanup(void)
+static void __exit tun_cleanup(void)
{
misc_deregister(&tun_miscdev);
rtnl_link_unregister(&tun_link_ops);
@@ -3695,3 +3735,4 @@ MODULE_AUTHOR(DRV_COPYRIGHT);
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(TUN_MINOR);
MODULE_ALIAS("devname:net/tun");
+MODULE_IMPORT_NS("NETDEV_INTERNAL");