summaryrefslogtreecommitdiff
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 11:56:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 11:56:19 -0800
commit5bbcc0f595fadb4cac0eddc4401035ec0bd95b09 (patch)
tree3b65e490cc36a6c6fecac1fa24d9e0ac9ced4455 /drivers/net/hyperv
parent892204e06cb9e89fbc4b299a678f9ca358e97cac (diff)
parent50895b9de1d3e0258e015e8e55128d835d9a9f19 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Maintain the TCP retransmit queue using an rbtree, with 1GB windows at 100Gb this really has become necessary. From Eric Dumazet. 2) Multi-program support for cgroup+bpf, from Alexei Starovoitov. 3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew Lunn. 4) Add meter action support to openvswitch, from Andy Zhou. 5) Add a data meta pointer for BPF accessible packets, from Daniel Borkmann. 6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet. 7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli. 8) More work to move the RTNL mutex down, from Florian Westphal. 9) Add 'bpftool' utility, to help with bpf program introspection. From Jakub Kicinski. 10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper Dangaard Brouer. 11) Support 'blocks' of transformations in the packet scheduler which can span multiple network devices, from Jiri Pirko. 12) TC flower offload support in cxgb4, from Kumar Sanghvi. 13) Priority based stream scheduler for SCTP, from Marcelo Ricardo Leitner. 14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg. 15) Add RED qdisc offloadability, and use it in mlxsw driver. From Nogah Frankel. 16) eBPF based device controller for cgroup v2, from Roman Gushchin. 17) Add some fundamental tracepoints for TCP, from Song Liu. 18) Remove garbage collection from ipv6 route layer, this is a significant accomplishment. From Wei Wang. 19) Add multicast route offload support to mlxsw, from Yotam Gigi" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits) tcp: highest_sack fix geneve: fix fill_info when link down bpf: fix lockdep splat net: cdc_ncm: GetNtbFormat endian fix openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start netem: remove unnecessary 64 bit modulus netem: use 64 bit divide by rate tcp: Namespace-ify sysctl_tcp_default_congestion_control net: Protect iterations over net::fib_notifier_ops in fib_seq_sum() ipv6: set all.accept_dad to 0 by default uapi: fix linux/tls.h userspace compilation error usbnet: ipheth: prevent TX queue timeouts when device not ready vhost_net: conditionally enable tx polling uapi: fix linux/rxrpc.h userspace compilation errors net: stmmac: fix LPI transitioning for dwmac4 atm: horizon: Fix irq release error net-sysfs: trigger netlink notification on ifalias change via sysfs openvswitch: Using kfree_rcu() to simplify the code openvswitch: Make local function ovs_nsh_key_attr_size() static openvswitch: Fix return value check in ovs_meter_cmd_features() ...
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h17
-rw-r--r--drivers/net/hyperv/netvsc.c88
-rw-r--r--drivers/net/hyperv/netvsc_drv.c114
-rw-r--r--drivers/net/hyperv/rndis_filter.c13
4 files changed, 158 insertions, 74 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 5176be76ca7d..4958bb6b7376 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -179,7 +179,7 @@ struct rndis_device {
u8 hw_mac_adr[ETH_ALEN];
u8 rss_key[NETVSC_HASH_KEYLEN];
- u16 ind_table[ITAB_NUM];
+ u16 rx_table[ITAB_NUM];
};
@@ -686,6 +686,8 @@ struct netvsc_ethtool_stats {
unsigned long tx_busy;
unsigned long tx_send_full;
unsigned long rx_comp_busy;
+ unsigned long stop_queue;
+ unsigned long wake_queue;
};
struct netvsc_vf_pcpu_stats {
@@ -702,6 +704,14 @@ struct netvsc_reconfig {
u32 event;
};
+/* L4 hash bits for different protocols */
+#define HV_TCP4_L4HASH 1
+#define HV_TCP6_L4HASH 2
+#define HV_UDP4_L4HASH 4
+#define HV_UDP6_L4HASH 8
+#define HV_DEFAULT_L4HASH (HV_TCP4_L4HASH | HV_TCP6_L4HASH | HV_UDP4_L4HASH | \
+ HV_UDP6_L4HASH)
+
/* The context of the netvsc device */
struct net_device_context {
/* point back to our device context */
@@ -721,13 +731,12 @@ struct net_device_context {
u32 tx_checksum_mask;
- u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+ u32 tx_table[VRSS_SEND_TAB_SIZE];
/* Ethtool settings */
- bool udp4_l4_hash;
- bool udp6_l4_hash;
u8 duplex;
u32 speed;
+ u32 l4_hash; /* L4 hash settings */
struct netvsc_ethtool_stats eth_stats;
/* State to manage the associated VF interface. */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 8d5077fb0492..bfc79698b8f4 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
call_rcu(&nvdev->rcu, free_netvsc_device);
}
-static void netvsc_destroy_buf(struct hv_device *device)
+static void netvsc_revoke_buf(struct hv_device *device,
+ struct netvsc_device *net_device)
{
struct nvsp_message *revoke_packet;
struct net_device *ndev = hv_get_drvdata(device);
- struct net_device_context *ndc = netdev_priv(ndev);
- struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
int ret;
/*
@@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv_device *device)
net_device->recv_section_cnt = 0;
}
- /* Teardown the gpadl on the vsp end */
- if (net_device->recv_buf_gpadl_handle) {
- ret = vmbus_teardown_gpadl(device->channel,
- net_device->recv_buf_gpadl_handle);
-
- /* If we failed here, we might as well return and have a leak
- * rather than continue and a bugchk
- */
- if (ret != 0) {
- netdev_err(ndev,
- "unable to teardown receive buffer's gpadl\n");
- return;
- }
- net_device->recv_buf_gpadl_handle = 0;
- }
-
- if (net_device->recv_buf) {
- /* Free up the receive buffer */
- vfree(net_device->recv_buf);
- net_device->recv_buf = NULL;
- }
-
/* Deal with the send buffer we may have setup.
* If we got a send section size, it means we received a
* NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
@@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv_device *device)
}
net_device->send_section_cnt = 0;
}
- /* Teardown the gpadl on the vsp end */
+}
+
+static void netvsc_teardown_gpadl(struct hv_device *device,
+ struct netvsc_device *net_device)
+{
+ struct net_device *ndev = hv_get_drvdata(device);
+ int ret;
+
+ if (net_device->recv_buf_gpadl_handle) {
+ ret = vmbus_teardown_gpadl(device->channel,
+ net_device->recv_buf_gpadl_handle);
+
+ /* If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to teardown receive buffer's gpadl\n");
+ return;
+ }
+ net_device->recv_buf_gpadl_handle = 0;
+ }
+
+ if (net_device->recv_buf) {
+ /* Free up the receive buffer */
+ vfree(net_device->recv_buf);
+ net_device->recv_buf = NULL;
+ }
+
if (net_device->send_buf_gpadl_handle) {
ret = vmbus_teardown_gpadl(device->channel,
net_device->send_buf_gpadl_handle);
@@ -420,7 +425,8 @@ static int netvsc_init_buf(struct hv_device *device,
goto exit;
cleanup:
- netvsc_destroy_buf(device);
+ netvsc_revoke_buf(device, net_device);
+ netvsc_teardown_gpadl(device, net_device);
exit:
return ret;
@@ -484,7 +490,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
struct netvsc_device *net_device,
const struct netvsc_device_info *device_info)
{
- const u32 ver_list[] = {
+ static const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
};
@@ -539,11 +545,6 @@ cleanup:
return ret;
}
-static void netvsc_disconnect_vsp(struct hv_device *device)
-{
- netvsc_destroy_buf(device);
-}
-
/*
* netvsc_device_remove - Callback when the root bus device is removed
*/
@@ -557,7 +558,7 @@ void netvsc_device_remove(struct hv_device *device)
cancel_work_sync(&net_device->subchan_work);
- netvsc_disconnect_vsp(device);
+ netvsc_revoke_buf(device, net_device);
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
@@ -570,6 +571,8 @@ void netvsc_device_remove(struct hv_device *device)
/* Now, we can close the channel safely */
vmbus_close(device->channel);
+ netvsc_teardown_gpadl(device, net_device);
+
/* And dissassociate NAPI context from device */
for (i = 0; i < net_device->num_chn; i++)
netif_napi_del(&net_device->chan_table[i].napi);
@@ -609,6 +612,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
{
struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
struct net_device *ndev = hv_get_drvdata(device);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = device->channel;
u16 q_idx = 0;
int queue_sends;
@@ -643,8 +647,10 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
- queue_sends < 1))
+ queue_sends < 1)) {
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
+ ndev_ctx->eth_stats.wake_queue++;
+ }
}
static void netvsc_send_completion(struct netvsc_device *net_device,
@@ -749,6 +755,7 @@ static inline int netvsc_send_pkt(
&net_device->chan_table[packet->q_idx];
struct vmbus_channel *out_channel = nvchan->channel;
struct net_device *ndev = hv_get_drvdata(device);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
@@ -789,12 +796,16 @@ static inline int netvsc_send_pkt(
if (ret == 0) {
atomic_inc_return(&nvchan->queue_sends);
- if (ring_avail < RING_AVAIL_PERCENT_LOWATER)
+ if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
netif_tx_stop_queue(txq);
+ ndev_ctx->eth_stats.stop_queue++;
+ }
} else if (ret == -EAGAIN) {
netif_tx_stop_queue(txq);
+ ndev_ctx->eth_stats.stop_queue++;
if (atomic_read(&nvchan->queue_sends) < 1) {
netif_tx_wake_queue(txq);
+ ndev_ctx->eth_stats.wake_queue++;
ret = -ENOSPC;
}
} else {
@@ -1102,7 +1113,7 @@ static void netvsc_send_table(struct hv_device *hdev,
nvmsg->msg.v5_msg.send_table.offset);
for (i = 0; i < count; i++)
- net_device_ctx->tx_send_table[i] = tab[i];
+ net_device_ctx->tx_table[i] = tab[i];
}
static void netvsc_send_vf(struct net_device_context *net_device_ctx,
@@ -1247,6 +1258,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
if (!net_device)
return ERR_PTR(-ENOMEM);
+ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+ net_device_ctx->tx_table[i] = 0;
+
net_device->ring_size = ring_size;
/* Because the device uses NAPI, all the interrupt batching and
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index a32ae02e1b6c..da216ca4f2b2 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -203,7 +203,7 @@ static inline u32 netvsc_get_hash(
const struct net_device_context *ndc)
{
struct flow_keys flow;
- u32 hash;
+ u32 hash, pkt_proto = 0;
static u32 hashrnd __read_mostly;
net_get_random_once(&hashrnd, sizeof(hashrnd));
@@ -211,11 +211,25 @@ static inline u32 netvsc_get_hash(
if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
return 0;
- if (flow.basic.ip_proto == IPPROTO_TCP ||
- (flow.basic.ip_proto == IPPROTO_UDP &&
- ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
- (flow.basic.n_proto == htons(ETH_P_IPV6) &&
- ndc->udp6_l4_hash)))) {
+ switch (flow.basic.ip_proto) {
+ case IPPROTO_TCP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_TCP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_TCP6_L4HASH;
+
+ break;
+
+ case IPPROTO_UDP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_UDP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_UDP6_L4HASH;
+
+ break;
+ }
+
+ if (pkt_proto & ndc->l4_hash) {
return skb_get_hash(skb);
} else {
if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -238,8 +252,8 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
struct sock *sk = skb->sk;
int q_idx;
- q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
- (VRSS_SEND_TAB_SIZE - 1)];
+ q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) &
+ (VRSS_SEND_TAB_SIZE - 1)];
/* If queue index changed record the new value */
if (q_idx != old_idx &&
@@ -898,8 +912,7 @@ static void netvsc_init_settings(struct net_device *dev)
{
struct net_device_context *ndc = netdev_priv(dev);
- ndc->udp4_l4_hash = true;
- ndc->udp6_l4_hash = true;
+ ndc->l4_hash = HV_DEFAULT_L4HASH;
ndc->speed = SPEED_UNKNOWN;
ndc->duplex = DUPLEX_FULL;
@@ -1126,6 +1139,8 @@ static const struct {
{ "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
{ "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
{ "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+ { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
+ { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
}, vf_stats[] = {
{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
{ "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1243,23 +1258,32 @@ static int
netvsc_get_rss_hash_opts(struct net_device_context *ndc,
struct ethtool_rxnfc *info)
{
+ const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
info->data = RXH_IP_SRC | RXH_IP_DST;
switch (info->flow_type) {
case TCP_V4_FLOW:
+ if (ndc->l4_hash & HV_TCP4_L4HASH)
+ info->data |= l4_flag;
+
+ break;
+
case TCP_V6_FLOW:
- info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ if (ndc->l4_hash & HV_TCP6_L4HASH)
+ info->data |= l4_flag;
+
break;
case UDP_V4_FLOW:
- if (ndc->udp4_l4_hash)
- info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ if (ndc->l4_hash & HV_UDP4_L4HASH)
+ info->data |= l4_flag;
break;
case UDP_V6_FLOW:
- if (ndc->udp6_l4_hash)
- info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ if (ndc->l4_hash & HV_UDP6_L4HASH)
+ info->data |= l4_flag;
break;
@@ -1300,23 +1324,51 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
{
if (info->data == (RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
- if (info->flow_type == UDP_V4_FLOW)
- ndc->udp4_l4_hash = true;
- else if (info->flow_type == UDP_V6_FLOW)
- ndc->udp6_l4_hash = true;
- else
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ ndc->l4_hash |= HV_TCP4_L4HASH;
+ break;
+
+ case TCP_V6_FLOW:
+ ndc->l4_hash |= HV_TCP6_L4HASH;
+ break;
+
+ case UDP_V4_FLOW:
+ ndc->l4_hash |= HV_UDP4_L4HASH;
+ break;
+
+ case UDP_V6_FLOW:
+ ndc->l4_hash |= HV_UDP6_L4HASH;
+ break;
+
+ default:
return -EOPNOTSUPP;
+ }
return 0;
}
if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
- if (info->flow_type == UDP_V4_FLOW)
- ndc->udp4_l4_hash = false;
- else if (info->flow_type == UDP_V6_FLOW)
- ndc->udp6_l4_hash = false;
- else
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ ndc->l4_hash &= ~HV_TCP4_L4HASH;
+ break;
+
+ case TCP_V6_FLOW:
+ ndc->l4_hash &= ~HV_TCP6_L4HASH;
+ break;
+
+ case UDP_V4_FLOW:
+ ndc->l4_hash &= ~HV_UDP4_L4HASH;
+ break;
+
+ case UDP_V6_FLOW:
+ ndc->l4_hash &= ~HV_UDP6_L4HASH;
+ break;
+
+ default:
return -EOPNOTSUPP;
+ }
return 0;
}
@@ -1382,7 +1434,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- indir[i] = rndis_dev->ind_table[i];
+ indir[i] = rndis_dev->rx_table[i];
}
if (key)
@@ -1412,7 +1464,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
- rndis_dev->ind_table[i] = indir[i];
+ rndis_dev->rx_table[i] = indir[i];
}
if (!key) {
@@ -1746,7 +1798,7 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
goto rx_handler_failed;
}
- ret = netdev_upper_dev_link(vf_netdev, ndev);
+ ret = netdev_upper_dev_link(vf_netdev, ndev, NULL);
if (ret != 0) {
netdev_err(vf_netdev,
"can not set master device %s (err = %d)\n",
@@ -1935,6 +1987,12 @@ static int netvsc_probe(struct hv_device *dev,
/* We always need headroom for rndis header */
net->needed_headroom = RNDIS_AND_PPI_SIZE;
+ /* Initialize the number of queues to be 1, we may change it if more
+ * channels are offered later.
+ */
+ netif_set_real_num_tx_queues(net, 1);
+ netif_set_real_num_rx_queues(net, 1);
+
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 065b204d8e17..8b1242b8d8ef 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -407,13 +407,13 @@ int rndis_filter_receive(struct net_device *ndev,
/* Make sure the rndis device state is initialized */
if (unlikely(!rndis_dev)) {
- netif_err(net_device_ctx, rx_err, ndev,
+ netif_dbg(net_device_ctx, rx_err, ndev,
"got rndis message but no rndis device!\n");
return NVSP_STAT_FAIL;
}
if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
- netif_err(net_device_ctx, rx_err, ndev,
+ netif_dbg(net_device_ctx, rx_err, ndev,
"got rndis message uninitialized\n");
return NVSP_STAT_FAIL;
}
@@ -759,7 +759,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
/* Set indirection table entries */
itab = (u32 *)(rssp + 1);
for (i = 0; i < ITAB_NUM; i++)
- itab[i] = rdev->ind_table[i];
+ itab[i] = rdev->rx_table[i];
/* Set hask key values */
keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
@@ -1114,6 +1114,9 @@ void rndis_set_subchannel(struct work_struct *w)
netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
+ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+ ndev_ctx->tx_table[i] = i % nvdev->num_chn;
+
rtnl_unlock();
return;
@@ -1284,8 +1287,8 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
for (i = 0; i < ITAB_NUM; i++)
- rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
- net_device->num_chn);
+ rndis_device->rx_table[i] = ethtool_rxfh_indir_default(
+ i, net_device->num_chn);
atomic_set(&net_device->open_chn, 1);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);