diff options
Diffstat (limited to 'drivers/net/vrf.c')
| -rw-r--r-- | drivers/net/vrf.c | 184 |
1 files changed, 65 insertions, 119 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e0b1ab99a359..571847a7f86d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -26,6 +26,7 @@ #include <linux/inetdevice.h> #include <net/arp.h> +#include <net/flow.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> @@ -34,6 +35,7 @@ #include <net/addrconf.h> #include <net/l3mdev.h> #include <net/fib_rules.h> +#include <net/netdev_lock.h> #include <net/sch_generic.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack.h> @@ -121,59 +123,12 @@ struct net_vrf { int ifindex; }; -struct pcpu_dstats { - u64 tx_pkts; - u64 tx_bytes; - u64 tx_drps; - u64 rx_pkts; - u64 rx_bytes; - u64 rx_drps; - struct u64_stats_sync syncp; -}; - -static void vrf_rx_stats(struct net_device *dev, int len) -{ - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; - dstats->rx_bytes += len; - u64_stats_update_end(&dstats->syncp); -} - static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } -static void vrf_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_dstats *dstats; - u64 tbytes, tpkts, tdrops, rbytes, rpkts; - unsigned int start; - - dstats = per_cpu_ptr(dev->dstats, i); - do { - start = u64_stats_fetch_begin_irq(&dstats->syncp); - tbytes = dstats->tx_bytes; - tpkts = dstats->tx_pkts; - tdrops = dstats->tx_drps; - rbytes = dstats->rx_bytes; - rpkts = dstats->rx_pkts; - } while (u64_stats_fetch_retry_irq(&dstats->syncp, start)); - stats->tx_bytes += tbytes; - stats->tx_packets += tpkts; - stats->tx_dropped += tdrops; - stats->rx_bytes += rbytes; - stats->rx_packets += rpkts; - } -} - static struct vrf_map *netns_vrf_map(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); @@ -388,15 +343,13 @@ unlock: static bool qdisc_tx_is_default(const struct net_device *dev) { struct netdev_queue *txq; - struct Qdisc *qdisc; if (dev->num_tx_queues > 1) return false; txq = netdev_get_tx_queue(dev, 0); - qdisc = rcu_access_pointer(txq->qdisc); - return !qdisc->enqueue; + return qdisc_txq_has_no_queue(txq); } /* Local traffic destined to local address. Reinsert the packet to rx @@ -405,7 +358,7 @@ static bool qdisc_tx_is_default(const struct net_device *dev) static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst) { - int len = skb->len; + unsigned int len = skb->len; skb_orphan(skb); @@ -418,10 +371,10 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb->protocol = eth_type_trans(skb, dev); - if (likely(netif_rx(skb) == NET_RX_SUCCESS)) - vrf_rx_stats(dev, len); + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) + dev_dstats_rx_add(dev, len); else - this_cpu_inc(dev->dstats->rx_drps); + dev_dstats_rx_dropped(dev); return NETDEV_TX_OK; } @@ -472,14 +425,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, memset(&fl6, 0, sizeof(fl6)); /* needed to match OIF rule */ - fl6.flowi6_oif = dev->ifindex; + fl6.flowi6_l3mdev = dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = iph->nexthdr; - fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst) || dst == dst_null) @@ -551,10 +503,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, memset(&fl4, 0, sizeof(fl4)); /* needed to match OIF rule */ - fl4.flowi4_oif = vrf_dev->ifindex; + fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.flowi4_tos = RT_TOS(ip4h->tos); - fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; + fl4.flowi4_dscp = ip4h_dscp(ip4h); + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; @@ -610,19 +562,14 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { - int len = skb->len; - netdev_tx_t ret = is_ip_tx_frame(skb, dev); - - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + unsigned int len = skb->len; + netdev_tx_t ret; - u64_stats_update_begin(&dstats->syncp); - dstats->tx_pkts++; - dstats->tx_bytes += len; - u64_stats_update_end(&dstats->syncp); - } else { - this_cpu_inc(dev->dstats->tx_drps); - } + ret = is_ip_tx_frame(skb, dev); + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) + dev_dstats_tx_add(dev, len); + else + dev_dstats_tx_dropped(dev); return ret; } @@ -665,18 +612,18 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + rcu_read_lock(); + nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -815,8 +762,8 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) */ if (rt6) { dst = &rt6->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } @@ -873,7 +820,7 @@ static int vrf_rt6_create(struct net_device *dev) static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = (struct rtable *)dst; + struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; @@ -890,7 +837,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s } } - rcu_read_lock_bh(); + rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { @@ -899,11 +846,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } - rcu_read_unlock_bh(); + rcu_read_unlock(); vrf_tx_error(skb->dev, skb); return -EINVAL; } @@ -1062,8 +1009,8 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) */ if (rth) { dst = &rth->dst; - dev_replace_track(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } @@ -1078,7 +1025,7 @@ static int vrf_rtable_create(struct net_device *dev) return -ENOMEM; /* create a dst for routing packets out through a VRF device */ - rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1); + rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1); if (!rth) return -ENOMEM; @@ -1177,22 +1124,15 @@ static void vrf_dev_uninit(struct net_device *dev) vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); - - free_percpu(dev->dstats); - dev->dstats = NULL; } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); - if (!dev->dstats) - goto out_nomem; - /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) - goto out_stats; + goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; @@ -1206,9 +1146,6 @@ static int vrf_dev_init(struct net_device *dev) out_rth: vrf_rtable_release(dev, vrf); -out_stats: - free_percpu(dev->dstats); - dev->dstats = NULL; out_nomem: return -ENOMEM; } @@ -1218,7 +1155,6 @@ static const struct net_device_ops vrf_netdev_ops = { .ndo_uninit = vrf_dev_uninit, .ndo_start_xmit = vrf_xmit, .ndo_set_mac_address = eth_mac_addr, - .ndo_get_stats64 = vrf_get_stats64, .ndo_add_slave = vrf_add_slave, .ndo_del_slave = vrf_del_slave, }; @@ -1266,6 +1202,7 @@ static int vrf_prepare_mac_header(struct sk_buff *skb, eth = (struct ethhdr *)skb->data; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); /* we set the ethernet destination and the source addresses to the * address of the VRF device. @@ -1295,9 +1232,9 @@ static int vrf_prepare_mac_header(struct sk_buff *skb, */ static int vrf_add_mac_header_if_unset(struct sk_buff *skb, struct net_device *vrf_dev, - u16 proto) + u16 proto, struct net_device *orig_dev) { - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev)) return 0; return vrf_prepare_mac_header(skb, vrf_dev, proto); @@ -1365,6 +1302,8 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; + skb_dst_drop(skb); + rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) @@ -1385,8 +1324,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. - * For strict packets with a source LLA, determine the dst using the - * original ifindex. + * For non-loopback strict packets, determine the dst using the original + * ifindex. */ if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; @@ -1395,7 +1334,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; - else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) + else vrf_ip6_input_dst(skb, vrf_dev, orig_iif); goto out; @@ -1403,7 +1342,9 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, /* if packet is NDISC then keep the ingress interface */ if (!is_ndisc) { - vrf_rx_stats(vrf_dev, skb->len); + struct net_device *orig_dev = skb->dev; + + dev_dstats_rx_add(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1411,7 +1352,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, - ETH_P_IPV6); + ETH_P_IPV6, + orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); @@ -1441,6 +1383,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { + struct net_device *orig_dev = skb->dev; + skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; @@ -1456,12 +1400,13 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } - vrf_rx_stats(vrf_dev, skb->len); + dev_dstats_rx_add(vrf_dev, skb->len); if (!list_empty(&vrf_dev->ptype_all)) { int err; - err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP); + err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP, + orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); @@ -1535,8 +1480,8 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { static void vrf_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static const struct ethtool_ops vrf_ethtool_ops = { @@ -1593,14 +1538,12 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) nlmsg_end(skb, nlh); - /* fib_nl_{new,del}rule handling looks for net from skb->sk */ - skb->sk = dev_net(dev)->rtnl; if (add_it) { - err = fib_nl_newrule(skb, nlh, NULL); + err = fib_newrule(dev_net(dev), skb, nlh, NULL, true); if (err == -EEXIST) err = 0; } else { - err = fib_nl_delrule(skb, nlh, NULL); + err = fib_delrule(dev_net(dev), skb, nlh, NULL, true); if (err == -ENOENT) err = 0; } @@ -1672,10 +1615,10 @@ static void vrf_setup(struct net_device *dev) eth_hw_addr_random(dev); /* don't acquire vrf device's netif_tx_lock when transmitting */ - dev->features |= NETIF_F_LLTX; + dev->lltx = true; /* don't allow vrf devices to change network namespaces. */ - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_immutable = true; /* does not make sense for a VLAN to be added to a vrf device */ dev->features |= NETIF_F_VLAN_CHALLENGED; @@ -1700,6 +1643,8 @@ static void vrf_setup(struct net_device *dev) dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; + + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1731,11 +1676,12 @@ static void vrf_dellink(struct net_device *dev, struct list_head *head) unregister_netdevice_queue(dev, head); } -static int vrf_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +static int vrf_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net_vrf *vrf = netdev_priv(dev); + struct nlattr **data = params->data; struct netns_vrf *nn_vrf; bool *add_fib_rules; struct net *net; @@ -1922,7 +1868,7 @@ unlock: return res; } -static int vrf_shared_table_handler(struct ctl_table *table, int write, +static int vrf_shared_table_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->extra1; @@ -1959,7 +1905,6 @@ static const struct ctl_table vrf_table[] = { /* set by the vrf_netns_init */ .extra1 = NULL, }, - { }, }; static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) @@ -1973,7 +1918,8 @@ static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) /* init the extra1 parameter with the reference to current netns */ table[0].extra1 = net; - nn_vrf->ctl_hdr = register_net_sysctl(net, "net/vrf", table); + nn_vrf->ctl_hdr = register_net_sysctl_sz(net, "net/vrf", table, + ARRAY_SIZE(vrf_table)); if (!nn_vrf->ctl_hdr) { kfree(table); return -ENOMEM; @@ -1985,7 +1931,7 @@ static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) static void vrf_netns_exit_sysctl(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); - struct ctl_table *table; + const struct ctl_table *table; table = nn_vrf->ctl_hdr->ctl_table_arg; unregister_net_sysctl_table(nn_vrf->ctl_hdr); |
