summaryrefslogtreecommitdiff
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/Kconfig3
-rw-r--r--drivers/net/hyperv/hyperv_net.h30
-rw-r--r--drivers/net/hyperv/netvsc.c173
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c2
-rw-r--r--drivers/net/hyperv/netvsc_drv.c402
-rw-r--r--drivers/net/hyperv/netvsc_trace.h8
-rw-r--r--drivers/net/hyperv/rndis_filter.c85
7 files changed, 435 insertions, 268 deletions
diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig
index ca7bf7f897d3..982964c1a9fb 100644
--- a/drivers/net/hyperv/Kconfig
+++ b/drivers/net/hyperv/Kconfig
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
config HYPERV_NET
tristate "Microsoft Hyper-V virtual network driver"
- depends on HYPERV
+ depends on HYPERV_VMBUS
select UCS2_STRING
+ select NLS
help
Select this option to enable the Hyper-V virtual network driver.
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index dd5919ec408b..7397c693f984 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -16,6 +16,7 @@
#include <linux/hyperv.h>
#include <linux/rndis.h>
#include <linux/jhash.h>
+#include <net/xdp.h>
/* RSS related */
#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
@@ -74,6 +75,7 @@ struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
#define ITAB_NUM 128
+#define ITAB_NUM_MAX 256
struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
struct ndis_obj_header hdr;
@@ -156,7 +158,6 @@ struct hv_netvsc_packet {
u8 cp_partial; /* partial copy into send buffer */
u8 rmsg_size; /* RNDIS header and PPI size */
- u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */
u8 page_buf_cnt;
u16 q_idx;
@@ -462,7 +463,7 @@ struct nvsp_1_message_send_receive_buffer_complete {
* LargeOffset SmallOffset
*/
- struct nvsp_1_receive_buffer_section sections[1];
+ struct nvsp_1_receive_buffer_section sections[];
} __packed;
/*
@@ -880,7 +881,7 @@ struct nvsp_message {
#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
-#define VRSS_CHANNEL_DEFAULT 8
+#define VRSS_CHANNEL_DEFAULT 16
#define RNDIS_MAX_PKT_DEFAULT 8
#define RNDIS_PKT_ALIGN_DEFAULT 8
@@ -891,6 +892,18 @@ struct nvsp_message {
sizeof(struct nvsp_message))
#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor)
+/* Maximum # of contiguous data ranges that can make up a trasmitted packet.
+ * Typically it's the max SKB fragments plus 2 for the rndis packet and the
+ * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may
+ * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries
+ * in a GPA direct packet sent to netvsp over VMBus.
+ */
+#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT
+#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2)
+#else
+#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT
+#endif
+
/* Estimated requestor size:
* out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size
*/
@@ -1034,7 +1047,9 @@ struct net_device_context {
u32 tx_table[VRSS_SEND_TAB_SIZE];
- u16 rx_table[ITAB_NUM];
+ u16 *rx_table;
+
+ u32 rx_table_sz;
/* Ethtool settings */
u8 duplex;
@@ -1046,6 +1061,7 @@ struct net_device_context {
struct net_device __rcu *vf_netdev;
struct netvsc_vf_pcpu_stats __percpu *vf_stats;
struct delayed_work vf_takeover;
+ struct delayed_work vfns_work;
/* 1: allocated, serial number is valid. 0: not allocated */
u32 vf_alloc;
@@ -1060,6 +1076,8 @@ struct net_device_context {
struct netvsc_device_info *saved_netvsc_dev_info;
};
+void netvsc_vfns_work(struct work_struct *w);
+
/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
* packets. We can use ethtool to change UDP hash level when necessary.
*/
@@ -1139,7 +1157,6 @@ struct netvsc_device {
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
- void *recv_original_buf;
u32 recv_buf_size; /* allocated bytes */
struct vmbus_gpadl recv_buf_gpadl_handle;
u32 recv_section_cnt;
@@ -1148,7 +1165,6 @@ struct netvsc_device {
/* Send buffer allocated by us */
void *send_buf;
- void *send_original_buf;
u32 send_buf_size;
struct vmbus_gpadl send_buf_gpadl_handle;
u32 send_section_cnt;
@@ -1164,6 +1180,8 @@ struct netvsc_device {
u32 max_chn;
u32 num_chn;
+ u32 netvsc_gso_max_size;
+
atomic_t open_chn;
struct work_struct subchan_work;
wait_queue_head_t subchan_open;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9352dad58996..60a4629fe6ba 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -155,16 +155,10 @@ static void free_netvsc_device(struct rcu_head *head)
kfree(nvdev->extension);
- if (nvdev->recv_original_buf)
- vfree(nvdev->recv_original_buf);
- else
+ if (!nvdev->recv_buf_gpadl_handle.decrypted)
vfree(nvdev->recv_buf);
-
- if (nvdev->send_original_buf)
- vfree(nvdev->send_original_buf);
- else
+ if (!nvdev->send_buf_gpadl_handle.decrypted)
vfree(nvdev->send_buf);
-
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
@@ -347,7 +341,6 @@ static int netvsc_init_buf(struct hv_device *device,
struct nvsp_message *init_packet;
unsigned int buf_size;
int i, ret = 0;
- void *vaddr;
/* Get receive buffer area. */
buf_size = device_info->recv_sections * device_info->recv_section_size;
@@ -383,17 +376,6 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
- if (hv_isolation_type_snp()) {
- vaddr = hv_map_memory(net_device->recv_buf, buf_size);
- if (!vaddr) {
- ret = -ENOMEM;
- goto cleanup;
- }
-
- net_device->recv_original_buf = net_device->recv_buf;
- net_device->recv_buf = vaddr;
- }
-
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -497,17 +479,6 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}
- if (hv_isolation_type_snp()) {
- vaddr = hv_map_memory(net_device->send_buf, buf_size);
- if (!vaddr) {
- ret = -ENOMEM;
- goto cleanup;
- }
-
- net_device->send_original_buf = net_device->send_buf;
- net_device->send_buf = vaddr;
- }
-
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -740,7 +711,15 @@ void netvsc_device_remove(struct hv_device *device)
/* Disable NAPI and disassociate its context from the device. */
for (i = 0; i < net_device->num_chn; i++) {
/* See also vmbus_reset_channel_cb(). */
- napi_disable(&net_device->chan_table[i].napi);
+ /* only disable enabled NAPI channel */
+ if (i < ndev->real_num_rx_queues) {
+ netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_TX,
+ NULL);
+ netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_RX,
+ NULL);
+ napi_disable(&net_device->chan_table[i].napi);
+ }
+
netif_napi_del(&net_device->chan_table[i].napi);
}
@@ -762,12 +741,6 @@ void netvsc_device_remove(struct hv_device *device)
netvsc_teardown_send_gpadl(device, net_device, ndev);
}
- if (net_device->recv_original_buf)
- hv_unmap_memory(net_device->recv_buf);
-
- if (net_device->send_original_buf)
- hv_unmap_memory(net_device->send_buf);
-
/* Release all resources */
free_netvsc_device_rcu(net_device);
}
@@ -851,6 +824,7 @@ static void netvsc_send_completion(struct net_device *ndev,
u32 msglen = hv_pkt_datalen(desc);
struct nvsp_message *pkt_rqst;
u64 cmd_rqst;
+ u32 status;
/* First check if this is a VMBUS completion without data payload */
if (!msglen) {
@@ -888,16 +862,17 @@ static void netvsc_send_completion(struct net_device *ndev,
msglen);
return;
}
- fallthrough;
+ break;
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
if (msglen < sizeof(struct nvsp_message_header) +
- sizeof(struct nvsp_1_message_send_receive_buffer_complete)) {
+ struct_size_t(struct nvsp_1_message_send_receive_buffer_complete,
+ sections, 1)) {
netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
msglen);
return;
}
- fallthrough;
+ break;
case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
if (msglen < sizeof(struct nvsp_message_header) +
@@ -906,7 +881,7 @@ static void netvsc_send_completion(struct net_device *ndev,
msglen);
return;
}
- fallthrough;
+ break;
case NVSP_MSG5_TYPE_SUBCHANNEL:
if (msglen < sizeof(struct nvsp_message_header) +
@@ -915,22 +890,41 @@ static void netvsc_send_completion(struct net_device *ndev,
msglen);
return;
}
- /* Copy the response back */
- memcpy(&net_device->channel_init_pkt, nvsp_packet,
- sizeof(struct nvsp_message));
- complete(&net_device->channel_init_wait);
break;
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
+ if (msglen < sizeof(struct nvsp_message_header) +
+ sizeof(struct nvsp_1_message_send_rndis_packet_complete)) {
+ if (net_ratelimit())
+ netdev_err(ndev, "nvsp_rndis_pkt_complete length too small: %u\n",
+ msglen);
+ return;
+ }
+
+ /* If status indicates an error, output a message so we know
+ * there's a problem. But process the completion anyway so the
+ * resources are released.
+ */
+ status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status;
+ if (status != NVSP_STAT_SUCCESS && net_ratelimit())
+ netdev_err(ndev, "nvsp_rndis_pkt_complete error status: %x\n",
+ status);
+
netvsc_send_tx_complete(ndev, net_device, incoming_channel,
desc, budget);
- break;
+ return;
default:
netdev_err(ndev,
"Unknown send completion type %d received!!\n",
nvsp_packet->hdr.msg_type);
+ return;
}
+
+ /* Copy the response back */
+ memcpy(&net_device->channel_init_pkt, nvsp_packet,
+ sizeof(struct nvsp_message));
+ complete(&net_device->channel_init_wait);
}
static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
@@ -959,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ pend_size;
int i;
u32 padding = 0;
- u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
- packet->page_buf_cnt;
+ u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt;
u32 remain;
/* Add padding */
@@ -987,9 +980,6 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
void netvsc_dma_unmap(struct hv_device *hv_dev,
struct hv_netvsc_packet *packet)
{
- u32 page_count = packet->cp_partial ?
- packet->page_buf_cnt - packet->rmsg_pgcnt :
- packet->page_buf_cnt;
int i;
if (!hv_is_isolation_supported())
@@ -998,7 +988,7 @@ void netvsc_dma_unmap(struct hv_device *hv_dev,
if (!packet->dma_range)
return;
- for (i = 0; i < page_count; i++)
+ for (i = 0; i < packet->page_buf_cnt; i++)
dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
packet->dma_range[i].mapping_size,
DMA_TO_DEVICE);
@@ -1028,9 +1018,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev,
struct hv_netvsc_packet *packet,
struct hv_page_buffer *pb)
{
- u32 page_count = packet->cp_partial ?
- packet->page_buf_cnt - packet->rmsg_pgcnt :
- packet->page_buf_cnt;
+ u32 page_count = packet->page_buf_cnt;
dma_addr_t dma;
int i;
@@ -1039,7 +1027,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev,
packet->dma_range = kcalloc(page_count,
sizeof(*packet->dma_range),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!packet->dma_range)
return -ENOMEM;
@@ -1066,6 +1054,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev,
return 0;
}
+/* Build an "array" of mpb entries describing the data to be transferred
+ * over VMBus. After the desc header fields, each "array" entry is variable
+ * size, and each entry starts after the end of the previous entry. The
+ * "offset" and "len" fields for each entry imply the size of the entry.
+ *
+ * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V
+ * uses that granularity, even if the system page size of the guest is larger.
+ * Each entry in the input "pb" array must describe a contiguous range of
+ * guest physical memory so that the pfns are sequential if the range crosses
+ * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE.
+ */
+static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb,
+ u32 page_buffer_count,
+ struct vmbus_packet_mpb_array *desc,
+ u32 *desc_size)
+{
+ struct hv_mpb_array *mpb_entry = &desc->range;
+ int i, j;
+
+ for (i = 0; i < page_buffer_count; i++) {
+ u32 offset = pb[i].offset;
+ u32 len = pb[i].len;
+
+ mpb_entry->offset = offset;
+ mpb_entry->len = len;
+
+ for (j = 0; j < HVPFN_UP(offset + len); j++)
+ mpb_entry->pfn_array[j] = pb[i].pfn + j;
+
+ mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j];
+ }
+
+ desc->rangecount = page_buffer_count;
+ *desc_size = (char *)mpb_entry - (char *)desc;
+}
+
static inline int netvsc_send_pkt(
struct hv_device *device,
struct hv_netvsc_packet *packet,
@@ -1108,8 +1132,11 @@ static inline int netvsc_send_pkt(
packet->dma_range = NULL;
if (packet->page_buf_cnt) {
+ struct vmbus_channel_packet_page_buffer desc;
+ u32 desc_size;
+
if (packet->cp_partial)
- pb += packet->rmsg_pgcnt;
+ pb++;
ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
if (ret) {
@@ -1117,11 +1144,12 @@ static inline int netvsc_send_pkt(
goto exit;
}
- ret = vmbus_sendpacket_pagebuffer(out_channel,
- pb, packet->page_buf_cnt,
- &nvmsg, sizeof(nvmsg),
- req_id);
-
+ netvsc_build_mpb_array(pb, packet->page_buf_cnt,
+ (struct vmbus_packet_mpb_array *)&desc,
+ &desc_size);
+ ret = vmbus_sendpacket_mpb_desc(out_channel,
+ (struct vmbus_packet_mpb_array *)&desc,
+ desc_size, &nvmsg, sizeof(nvmsg), req_id);
if (ret)
netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
} else {
@@ -1270,7 +1298,7 @@ int netvsc_send(struct net_device *ndev,
packet->send_buf_index = section_index;
if (packet->cp_partial) {
- packet->page_buf_cnt -= packet->rmsg_pgcnt;
+ packet->page_buf_cnt--;
packet->total_data_buflen = msd_len + packet->rmsg_size;
} else {
packet->page_buf_cnt = 0;
@@ -1784,6 +1812,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
/* Enable NAPI handler before init callbacks */
netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll);
+ napi_enable(&net_device->chan_table[0].napi);
+ netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX,
+ &net_device->chan_table[0].napi);
+ netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX,
+ &net_device->chan_table[0].napi);
/* Open the channel */
device->channel->next_request_id_callback = vmbus_next_request_id;
@@ -1803,8 +1836,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
/* Channel is opened */
netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
- napi_enable(&net_device->chan_table[0].napi);
-
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device, net_device, device_info);
if (ret != 0) {
@@ -1822,21 +1853,17 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
close:
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
- napi_disable(&net_device->chan_table[0].napi);
/* Now, we can close the channel safely */
vmbus_close(device->channel);
cleanup:
+ netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL);
+ netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL);
+ napi_disable(&net_device->chan_table[0].napi);
netif_napi_del(&net_device->chan_table[0].napi);
cleanup2:
- if (net_device->recv_original_buf)
- hv_unmap_memory(net_device->recv_buf);
-
- if (net_device->send_original_buf)
- hv_unmap_memory(net_device->send_buf);
-
free_netvsc_device(&net_device->rcu);
return ERR_PTR(ret);
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 4a9522689fa4..1dd3755d9e6d 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp);
+ ret = netif_xdp_propagate(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f9b219e6cd58..3d47d749ef9f 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -29,6 +29,7 @@
#include <linux/bpf.h>
#include <net/arp.h>
+#include <net/netdev_lock.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -42,9 +43,13 @@
#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)
+/* Macros to define the context of vf registration */
+#define VF_REG_IN_PROBE 1
+#define VF_REG_IN_NOTIFIER 2
+
static unsigned int ring_size __ro_after_init = 128;
module_param(ring_size, uint, 0444);
-MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
unsigned int netvsc_ring_bytes __ro_after_init;
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -321,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
return txq;
}
-static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len,
- struct hv_page_buffer *pb)
-{
- int j = 0;
-
- hvpfn += offset >> HV_HYP_PAGE_SHIFT;
- offset = offset & ~HV_HYP_PAGE_MASK;
-
- while (len > 0) {
- unsigned long bytes;
-
- bytes = HV_HYP_PAGE_SIZE - offset;
- if (bytes > len)
- bytes = len;
- pb[j].pfn = hvpfn;
- pb[j].offset = offset;
- pb[j].len = bytes;
-
- offset += bytes;
- len -= bytes;
-
- if (offset == HV_HYP_PAGE_SIZE && len) {
- hvpfn++;
- offset = 0;
- j++;
- }
- }
-
- return j + 1;
-}
-
static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
struct hv_netvsc_packet *packet,
struct hv_page_buffer *pb)
{
- u32 slots_used = 0;
- char *data = skb->data;
int frags = skb_shinfo(skb)->nr_frags;
int i;
@@ -366,28 +338,27 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
* 2. skb linear data
* 3. skb fragment data
*/
- slots_used += fill_pg_buf(virt_to_hvpfn(hdr),
- offset_in_hvpage(hdr),
- len,
- &pb[slots_used]);
+ pb[0].offset = offset_in_hvpage(hdr);
+ pb[0].len = len;
+ pb[0].pfn = virt_to_hvpfn(hdr);
packet->rmsg_size = len;
- packet->rmsg_pgcnt = slots_used;
- slots_used += fill_pg_buf(virt_to_hvpfn(data),
- offset_in_hvpage(data),
- skb_headlen(skb),
- &pb[slots_used]);
+ pb[1].offset = offset_in_hvpage(skb->data);
+ pb[1].len = skb_headlen(skb);
+ pb[1].pfn = virt_to_hvpfn(skb->data);
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+ struct hv_page_buffer *cur_pb = &pb[i + 2];
+ u64 pfn = page_to_hvpfn(skb_frag_page(frag));
+ u32 offset = skb_frag_off(frag);
- slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)),
- skb_frag_off(frag),
- skb_frag_size(frag),
- &pb[slots_used]);
+ cur_pb->offset = offset_in_hvpage(offset);
+ cur_pb->len = skb_frag_size(frag);
+ cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT);
}
- return slots_used;
+ return frags + 2;
}
static int count_skb_frag_slots(struct sk_buff *skb)
@@ -478,7 +449,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
struct net_device *vf_netdev;
u32 rndis_msg_size;
u32 hash;
- struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+ struct hv_page_buffer pb[MAX_DATA_RANGES];
/* If VF is present and up then redirect packets to it.
* Skip the VF if it is marked down or has no carrier.
@@ -983,7 +954,8 @@ struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev)
dev_info->bprog = prog;
}
} else {
- dev_info->num_chn = VRSS_CHANNEL_DEFAULT;
+ dev_info->num_chn = max(VRSS_CHANNEL_DEFAULT,
+ netif_get_num_default_rss_queues());
dev_info->send_sections = NETVSC_DEFAULT_TX;
dev_info->send_section_size = NETVSC_SEND_SECTION_SIZE;
dev_info->recv_sections = NETVSC_DEFAULT_RX;
@@ -1229,14 +1201,14 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
if (ret)
goto rollback_vf;
- ndev->mtu = mtu;
+ WRITE_ONCE(ndev->mtu, mtu);
ret = netvsc_attach(ndev, device_info);
if (!ret)
goto out;
/* Attempt rollback to original MTU */
- ndev->mtu = orig_mtu;
+ WRITE_ONCE(ndev->mtu, orig_mtu);
if (netvsc_attach(ndev, device_info))
netdev_err(ndev, "restoring mtu failed\n");
@@ -1399,7 +1371,7 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
struct net_device_context *ndc = netdev_priv(ndev);
struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
- struct sockaddr *addr = p;
+ struct sockaddr_storage *addr = p;
int err;
err = eth_prepare_mac_addr_change(ndev, p);
@@ -1415,12 +1387,12 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
return err;
}
- err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
+ err = rndis_filter_set_device_mac(nvdev, addr->__data);
if (!err) {
eth_commit_mac_addr_change(ndev, p);
} else if (vf_netdev) {
/* rollback change on VF */
- memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
+ memcpy(addr->__data, ndev->dev_addr, ETH_ALEN);
dev_set_mac_address(vf_netdev, addr, NULL);
}
@@ -1552,7 +1524,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
data[i++] = xdp_tx;
}
- pcpu_sum = kvmalloc_array(num_possible_cpus(),
+ pcpu_sum = kvmalloc_array(nr_cpu_ids,
sizeof(struct netvsc_ethtool_pcpu_stats),
GFP_KERNEL);
if (!pcpu_sum)
@@ -1582,10 +1554,10 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
- ethtool_sprintf(&p, netvsc_stats[i].name);
+ ethtool_puts(&p, netvsc_stats[i].name);
for (i = 0; i < ARRAY_SIZE(vf_stats); i++)
- ethtool_sprintf(&p, vf_stats[i].name);
+ ethtool_puts(&p, vf_stats[i].name);
for (i = 0; i < nvdev->num_chn; i++) {
ethtool_sprintf(&p, "tx_queue_%u_packets", i);
@@ -1608,9 +1580,10 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
}
static int
-netvsc_get_rss_hash_opts(struct net_device_context *ndc,
- struct ethtool_rxnfc *info)
+netvsc_get_rxfh_fields(struct net_device *ndev,
+ struct ethtool_rxfh_fields *info)
{
+ struct net_device_context *ndc = netdev_priv(ndev);
const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3;
info->data = RXH_IP_SRC | RXH_IP_DST;
@@ -1651,30 +1624,24 @@ netvsc_get_rss_hash_opts(struct net_device_context *ndc,
return 0;
}
-static int
-netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
- u32 *rules)
+static u32 netvsc_get_rx_ring_count(struct net_device *dev)
{
struct net_device_context *ndc = netdev_priv(dev);
struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
if (!nvdev)
- return -ENODEV;
-
- switch (info->cmd) {
- case ETHTOOL_GRXRINGS:
- info->data = nvdev->num_chn;
return 0;
- case ETHTOOL_GRXFH:
- return netvsc_get_rss_hash_opts(ndc, info);
- }
- return -EOPNOTSUPP;
+ return nvdev->num_chn;
}
-static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
- struct ethtool_rxnfc *info)
+static int
+netvsc_set_rxfh_fields(struct net_device *dev,
+ const struct ethtool_rxfh_fields *info,
+ struct netlink_ext_ack *extack)
{
+ struct net_device_context *ndc = netdev_priv(dev);
+
if (info->data == (RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
switch (info->flow_type) {
@@ -1729,17 +1696,6 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
return -EOPNOTSUPP;
}
-static int
-netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
-{
- struct net_device_context *ndc = netdev_priv(ndev);
-
- if (info->cmd == ETHTOOL_SRXFH)
- return netvsc_set_rss_hash_opts(ndc, info);
-
- return -EOPNOTSUPP;
-}
-
static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
{
return NETVSC_HASH_KEYLEN;
@@ -1747,11 +1703,13 @@ static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
static u32 netvsc_rss_indir_size(struct net_device *dev)
{
- return ITAB_NUM;
+ struct net_device_context *ndc = netdev_priv(dev);
+
+ return ndc->rx_table_sz;
}
-static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
- u8 *hfunc)
+static int netvsc_get_rxfh(struct net_device *dev,
+ struct ethtool_rxfh_param *rxfh)
{
struct net_device_context *ndc = netdev_priv(dev);
struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
@@ -1761,47 +1719,49 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
if (!ndev)
return -ENODEV;
- if (hfunc)
- *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
+ rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
rndis_dev = ndev->extension;
- if (indir) {
- for (i = 0; i < ITAB_NUM; i++)
- indir[i] = ndc->rx_table[i];
+ if (rxfh->indir) {
+ for (i = 0; i < ndc->rx_table_sz; i++)
+ rxfh->indir[i] = ndc->rx_table[i];
}
- if (key)
- memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN);
+ if (rxfh->key)
+ memcpy(rxfh->key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN);
return 0;
}
-static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
- const u8 *key, const u8 hfunc)
+static int netvsc_set_rxfh(struct net_device *dev,
+ struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
{
struct net_device_context *ndc = netdev_priv(dev);
struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
struct rndis_device *rndis_dev;
+ u8 *key = rxfh->key;
int i;
if (!ndev)
return -ENODEV;
- if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+ if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
+ rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
rndis_dev = ndev->extension;
- if (indir) {
- for (i = 0; i < ITAB_NUM; i++)
- if (indir[i] >= ndev->num_chn)
+ if (rxfh->indir) {
+ for (i = 0; i < ndc->rx_table_sz; i++)
+ if (rxfh->indir[i] >= ndev->num_chn)
return -EINVAL;
- for (i = 0; i < ITAB_NUM; i++)
- ndc->rx_table[i] = indir[i];
+ for (i = 0; i < ndc->rx_table_sz; i++)
+ ndc->rx_table[i] = rxfh->indir[i];
}
if (!key) {
- if (!indir)
+ if (!rxfh->indir)
return 0;
key = rndis_dev->rss_key;
@@ -2002,12 +1962,13 @@ static const struct ethtool_ops ethtool_ops = {
.get_channels = netvsc_get_channels,
.set_channels = netvsc_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
- .get_rxnfc = netvsc_get_rxnfc,
- .set_rxnfc = netvsc_set_rxnfc,
+ .get_rx_ring_count = netvsc_get_rx_ring_count,
.get_rxfh_key_size = netvsc_get_rxfh_key_size,
.get_rxfh_indir_size = netvsc_rss_indir_size,
.get_rxfh = netvsc_get_rxfh,
.set_rxfh = netvsc_set_rxfh,
+ .get_rxfh_fields = netvsc_get_rxfh_fields,
+ .set_rxfh_fields = netvsc_set_rxfh_fields,
.get_link_ksettings = netvsc_get_link_ksettings,
.set_link_ksettings = netvsc_set_link_ksettings,
.get_ringparam = netvsc_get_ringparam,
@@ -2181,7 +2142,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
}
static int netvsc_vf_join(struct net_device *vf_netdev,
- struct net_device *ndev)
+ struct net_device *ndev, int context)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
int ret;
@@ -2204,10 +2165,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
goto upper_link_failed;
}
- /* set slave flag before open to prevent IPv6 addrconf */
- vf_netdev->flags |= IFF_SLAVE;
-
- schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+ /* If this registration is called from probe context vf_takeover
+ * is taken care of later in probe itself.
+ */
+ if (context == VF_REG_IN_NOTIFIER)
+ schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
@@ -2313,16 +2275,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
}
- /* Fallback path to check synthetic vf with
- * help of mac addr
+ /* Fallback path to check synthetic vf with help of mac addr.
+ * Because this function can be called before vf_netdev is
+ * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied
+ * from dev_addr, also try to match to its dev_addr.
+ * Note: On Hyper-V and Azure, it's not possible to set a MAC address
+ * on a VF that matches to the MAC of a unrelated NETVSC device.
*/
list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
ndev = hv_get_drvdata(ndev_ctx->device_ctx);
- if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
- netdev_notice(vf_netdev,
- "falling back to mac addr based matching\n");
+ if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) ||
+ ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr))
return ndev;
- }
}
netdev_notice(vf_netdev,
@@ -2330,7 +2294,23 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
return NULL;
}
-static int netvsc_register_vf(struct net_device *vf_netdev)
+static int netvsc_prepare_bonding(struct net_device *vf_netdev)
+{
+ struct net_device *ndev;
+
+ ndev = get_netvsc_byslot(vf_netdev);
+ if (!ndev)
+ return NOTIFY_DONE;
+
+ /* Set slave flag and no addrconf flag before open
+ * to prevent IPv6 addrconf.
+ */
+ vf_netdev->flags |= IFF_SLAVE;
+ vf_netdev->priv_flags |= IFF_NO_ADDRCONF;
+ return NOTIFY_DONE;
+}
+
+static int netvsc_register_vf(struct net_device *vf_netdev, int context)
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
@@ -2370,7 +2350,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
- if (netvsc_vf_join(vf_netdev, ndev) != 0)
+ if (netvsc_vf_join(vf_netdev, ndev, context) != 0)
return NOTIFY_DONE;
dev_hold(vf_netdev);
@@ -2436,6 +2416,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
} else {
netdev_info(ndev, "Data path switched %s VF: %s\n",
vf_is_up ? "to" : "from", vf_netdev->name);
+
+ /* In Azure, when accelerated networking in enabled, other NICs
+ * like MANA, MLX, are configured as a bonded nic with
+ * Netvsc(failover) NIC. For bonded NICs, the min of the max
+ * pkt aggregate size of the members is propagated in the stack.
+ * In order to allow these NICs (MANA/MLX) to use up to
+ * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to
+ * also support this in the guest.
+ * This value is only increased for netvsc NIC when datapath is
+ * switched over to the VF
+ */
+ if (vf_is_up)
+ netif_set_tso_max_size(ndev, vf_netdev->tso_max_size);
+ else
+ netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size);
}
return NOTIFY_OK;
@@ -2455,8 +2450,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
- netvsc_vf_setxdp(vf_netdev, NULL);
-
reinit_completion(&net_device_ctx->vf_add);
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
@@ -2468,10 +2461,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
return NOTIFY_OK;
}
+static int check_dev_is_matching_vf(struct net_device *event_ndev)
+{
+ /* Skip NetVSC interfaces */
+ if (event_ndev->netdev_ops == &device_ops)
+ return -ENODEV;
+
+ /* Avoid non-Ethernet type devices */
+ if (event_ndev->type != ARPHRD_ETHER)
+ return -ENODEV;
+
+ /* Avoid Vlan dev with same MAC registering as VF */
+ if (is_vlan_dev(event_ndev))
+ return -ENODEV;
+
+ /* Avoid Bonding master dev with same MAC registering as VF */
+ if (netif_is_bond_master(event_ndev))
+ return -ENODEV;
+
+ return 0;
+}
+
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
- struct net_device *net = NULL;
+ struct net_device *net = NULL, *vf_netdev;
struct net_device_context *net_device_ctx;
struct netvsc_device_info *device_info = NULL;
struct netvsc_device *nvdev;
@@ -2501,6 +2515,7 @@ static int netvsc_probe(struct hv_device *dev,
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+ INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work);
net_device_ctx->vf_stats
= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
@@ -2529,15 +2544,6 @@ static int netvsc_probe(struct hv_device *dev,
goto devinfo_failed;
}
- nvdev = rndis_filter_device_add(dev, device_info);
- if (IS_ERR(nvdev)) {
- ret = PTR_ERR(nvdev);
- netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- goto rndis_failed;
- }
-
- eth_hw_addr_set(net, device_info->mac_adr);
-
/* We must get rtnl lock before scheduling nvdev->subchan_work,
* otherwise netvsc_subchan_work() can get rtnl lock first and wait
* all subchannels to show up, but that may not happen because
@@ -2545,9 +2551,23 @@ static int netvsc_probe(struct hv_device *dev,
* -> ... -> device_add() -> ... -> __device_attach() can't get
* the device lock, so all the subchannels can't be processed --
* finally netvsc_subchan_work() hangs forever.
+ *
+ * The rtnl lock also needs to be held before rndis_filter_device_add()
+ * which advertises nvsp_2_vsc_capability / sriov bit, and triggers
+ * VF NIC offering and registering. If VF NIC finished register_netdev()
+ * earlier it may cause name based config failure.
*/
rtnl_lock();
+ nvdev = rndis_filter_device_add(dev, device_info);
+ if (IS_ERR(nvdev)) {
+ ret = PTR_ERR(nvdev);
+ netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
+ goto rndis_failed;
+ }
+
+ eth_hw_addr_set(net, device_info->mac_adr);
+
if (nvdev->num_chn > 1)
schedule_work(&nvdev->subchan_work);
@@ -2559,6 +2579,9 @@ static int netvsc_probe(struct hv_device *dev,
netdev_lockdep_set_classes(net);
+ net->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT;
+
/* MTU range: 68 - 1500 or 65521 */
net->min_mtu = NETVSC_MTU_MIN;
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
@@ -2575,15 +2598,41 @@ static int netvsc_probe(struct hv_device *dev,
}
list_add(&net_device_ctx->list, &netvsc_dev_list);
+
+ /* When the hv_netvsc driver is unloaded and reloaded, the
+ * NET_DEVICE_REGISTER for the vf device is replayed before probe
+ * is complete. This is because register_netdevice_notifier() gets
+ * registered before vmbus_driver_register() so that callback func
+ * is set before probe and we don't miss events like NETDEV_POST_INIT
+ * So, in this section we try to register the matching vf device that
+ * is present as a netdevice, knowing that its register call is not
+ * processed in the netvsc_netdev_notifier(as probing is progress and
+ * get_netvsc_byslot fails).
+ */
+ for_each_netdev(dev_net(net), vf_netdev) {
+ ret = check_dev_is_matching_vf(vf_netdev);
+ if (ret != 0)
+ continue;
+
+ if (net != get_netvsc_byslot(vf_netdev))
+ continue;
+
+ netvsc_prepare_bonding(vf_netdev);
+ netdev_lock_ops(vf_netdev);
+ netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE);
+ netdev_unlock_ops(vf_netdev);
+ __netvsc_vf_setup(net, vf_netdev);
+ break;
+ }
rtnl_unlock();
netvsc_devinfo_put(device_info);
return 0;
register_failed:
- rtnl_unlock();
rndis_filter_device_remove(dev, nvdev);
rndis_failed:
+ rtnl_unlock();
netvsc_devinfo_put(device_info);
devinfo_failed:
free_percpu(net_device_ctx->vf_stats);
@@ -2594,7 +2643,7 @@ no_net:
return ret;
}
-static int netvsc_remove(struct hv_device *dev)
+static void netvsc_remove(struct hv_device *dev)
{
struct net_device_context *ndev_ctx;
struct net_device *vf_netdev, *net;
@@ -2603,7 +2652,7 @@ static int netvsc_remove(struct hv_device *dev)
net = hv_get_drvdata(dev);
if (net == NULL) {
dev_err(&dev->device, "No net device to remove\n");
- return 0;
+ return;
}
ndev_ctx = netdev_priv(net);
@@ -2611,6 +2660,8 @@ static int netvsc_remove(struct hv_device *dev)
cancel_delayed_work_sync(&ndev_ctx->dwork);
rtnl_lock();
+ cancel_delayed_work_sync(&ndev_ctx->vfns_work);
+
nvdev = rtnl_dereference(ndev_ctx->nvdev);
if (nvdev) {
cancel_work_sync(&nvdev->subchan_work);
@@ -2637,7 +2688,6 @@ static int netvsc_remove(struct hv_device *dev)
free_percpu(ndev_ctx->vf_stats);
free_netdev(net);
- return 0;
}
static int netvsc_suspend(struct hv_device *dev)
@@ -2653,6 +2703,7 @@ static int netvsc_suspend(struct hv_device *dev)
cancel_delayed_work_sync(&ndev_ctx->dwork);
rtnl_lock();
+ cancel_delayed_work_sync(&ndev_ctx->vfns_work);
nvdev = rtnl_dereference(ndev_ctx->nvdev);
if (nvdev == NULL) {
@@ -2721,6 +2772,52 @@ static struct hv_driver netvsc_drv = {
},
};
+/* Set VF's namespace same as the synthetic NIC */
+static void netvsc_event_set_vf_ns(struct net_device *ndev)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct net_device *vf_netdev;
+ int ret;
+
+ vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+ if (!vf_netdev)
+ return;
+
+ if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) {
+ ret = dev_change_net_namespace(vf_netdev, dev_net(ndev),
+ "eth%d");
+ if (ret)
+ netdev_err(vf_netdev,
+ "Cannot move to same namespace as %s: %d\n",
+ ndev->name, ret);
+ else
+ netdev_info(vf_netdev,
+ "Moved VF to namespace with: %s\n",
+ ndev->name);
+ }
+}
+
+void netvsc_vfns_work(struct work_struct *w)
+{
+ struct net_device_context *ndev_ctx =
+ container_of(w, struct net_device_context, vfns_work.work);
+ struct net_device *ndev;
+
+ if (!rtnl_trylock()) {
+ schedule_delayed_work(&ndev_ctx->vfns_work, 1);
+ return;
+ }
+
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ if (!ndev)
+ goto out;
+
+ netvsc_event_set_vf_ns(ndev);
+
+out:
+ rtnl_unlock();
+}
+
/*
* On Hyper-V, every VF interface is matched with a corresponding
* synthetic interface. The synthetic interface is presented first
@@ -2731,26 +2828,24 @@ static int netvsc_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct net_device_context *ndev_ctx;
+ int ret = 0;
- /* Skip our own events */
- if (event_dev->netdev_ops == &device_ops)
- return NOTIFY_DONE;
-
- /* Avoid non-Ethernet type devices */
- if (event_dev->type != ARPHRD_ETHER)
- return NOTIFY_DONE;
-
- /* Avoid Vlan dev with same MAC registering as VF */
- if (is_vlan_dev(event_dev))
+ if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) {
+ ndev_ctx = netdev_priv(event_dev);
+ schedule_delayed_work(&ndev_ctx->vfns_work, 0);
return NOTIFY_DONE;
+ }
- /* Avoid Bonding master dev with same MAC registering as VF */
- if (netif_is_bond_master(event_dev))
+ ret = check_dev_is_matching_vf(event_dev);
+ if (ret != 0)
return NOTIFY_DONE;
switch (event) {
+ case NETDEV_POST_INIT:
+ return netvsc_prepare_bonding(event_dev);
case NETDEV_REGISTER:
- return netvsc_register_vf(event_dev);
+ return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER);
case NETDEV_UNREGISTER:
return netvsc_unregister_vf(event_dev);
case NETDEV_UP:
@@ -2782,14 +2877,19 @@ static int __init netvsc_drv_init(void)
pr_info("Increased ring_size to %u (min allowed)\n",
ring_size);
}
- netvsc_ring_bytes = ring_size * PAGE_SIZE;
+ netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096);
+
+ register_netdevice_notifier(&netvsc_netdev_notifier);
ret = vmbus_driver_register(&netvsc_drv);
if (ret)
- return ret;
+ goto err_vmbus_reg;
- register_netdevice_notifier(&netvsc_netdev_notifier);
return 0;
+
+err_vmbus_reg:
+ unregister_netdevice_notifier(&netvsc_netdev_notifier);
+ return ret;
}
MODULE_LICENSE("GPL");
diff --git a/drivers/net/hyperv/netvsc_trace.h b/drivers/net/hyperv/netvsc_trace.h
index f7585563dea5..05e620cbdd29 100644
--- a/drivers/net/hyperv/netvsc_trace.h
+++ b/drivers/net/hyperv/netvsc_trace.h
@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(rndis_msg_class,
__field( u32, msg_len )
),
TP_fast_assign(
- __assign_str(name, ndev->name);
+ __assign_str(name);
__entry->queue = q;
__entry->req_id = msg->msg.init_req.req_id;
__entry->msg_type = msg->ndis_msg_type;
@@ -121,7 +121,7 @@ TRACE_EVENT(nvsp_send,
__field( u32, msg_type )
),
TP_fast_assign(
- __assign_str(name, ndev->name);
+ __assign_str(name);
__entry->msg_type = msg->hdr.msg_type;
),
TP_printk("dev=%s type=%s",
@@ -142,7 +142,7 @@ TRACE_EVENT(nvsp_send_pkt,
__field( u32, section_size )
),
TP_fast_assign(
- __assign_str(name, ndev->name);
+ __assign_str(name);
__entry->qid = chan->offermsg.offer.sub_channel_index;
__entry->channel_type = rpkt->channel_type;
__entry->section_index = rpkt->send_buf_section_index;
@@ -165,7 +165,7 @@ TRACE_EVENT(nvsp_recv,
__field( u32, msg_type )
),
TP_fast_assign(
- __assign_str(name, ndev->name);
+ __assign_str(name);
__entry->qid = chan->offermsg.offer.sub_channel_index;
__entry->msg_type = msg->hdr.msg_type;
),
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index eea777ec2541..c35f9685b6bf 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev,
struct rndis_request *req)
{
struct hv_netvsc_packet *packet;
- struct hv_page_buffer page_buf[2];
- struct hv_page_buffer *pb = page_buf;
+ struct hv_page_buffer pb;
int ret;
/* Setup the packet to send it */
@@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev,
packet->total_data_buflen = req->request_msg.msg_len;
packet->page_buf_cnt = 1;
- pb[0].pfn = virt_to_phys(&req->request_msg) >>
- HV_HYP_PAGE_SHIFT;
- pb[0].len = req->request_msg.msg_len;
- pb[0].offset = offset_in_hvpage(&req->request_msg);
-
- /* Add one page_buf when request_msg crossing page boundary */
- if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) {
- packet->page_buf_cnt++;
- pb[0].len = HV_HYP_PAGE_SIZE -
- pb[0].offset;
- pb[1].pfn = virt_to_phys((void *)&req->request_msg
- + pb[0].len) >> HV_HYP_PAGE_SHIFT;
- pb[1].offset = 0;
- pb[1].len = req->request_msg.msg_len -
- pb[0].len;
- }
+ pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT;
+ pb.len = req->request_msg.msg_len;
+ pb.offset = offset_in_hvpage(&req->request_msg);
trace_rndis_send(dev->ndev, 0, &req->request_msg);
rcu_read_lock_bh();
- ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false);
+ ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false);
rcu_read_unlock_bh();
return ret;
@@ -927,7 +913,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
struct rndis_set_request *set;
struct rndis_set_complete *set_complete;
u32 extlen = sizeof(struct ndis_recv_scale_param) +
- 4 * ITAB_NUM + NETVSC_HASH_KEYLEN;
+ 4 * ndc->rx_table_sz + NETVSC_HASH_KEYLEN;
struct ndis_recv_scale_param *rssp;
u32 *itab;
u8 *keyp;
@@ -953,7 +939,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 |
NDIS_HASH_TCP_IPV6;
- rssp->indirect_tabsize = 4*ITAB_NUM;
+ rssp->indirect_tabsize = 4 * ndc->rx_table_sz;
rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
rssp->hashkey_size = NETVSC_HASH_KEYLEN;
rssp->hashkey_offset = rssp->indirect_taboffset +
@@ -961,7 +947,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
/* Set indirection table entries */
itab = (u32 *)(rssp + 1);
- for (i = 0; i < ITAB_NUM; i++)
+ for (i = 0; i < ndc->rx_table_sz; i++)
itab[i] = ndc->rx_table[i];
/* Set hask key values */
@@ -1266,13 +1252,27 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+ /* Enable napi before opening the vmbus channel to avoid races
+ * as the host placing data on the host->guest ring may be left
+ * out if napi was not enabled.
+ */
+ napi_enable(&nvchan->napi);
+ netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX,
+ &nvchan->napi);
+ netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX,
+ &nvchan->napi);
+
ret = vmbus_open(new_sc, netvsc_ring_bytes,
netvsc_ring_bytes, NULL, 0,
netvsc_channel_cb, nvchan);
- if (ret == 0)
- napi_enable(&nvchan->napi);
- else
+ if (ret != 0) {
netdev_notice(ndev, "sub channel open failed: %d\n", ret);
+ netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX,
+ NULL);
+ netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX,
+ NULL);
+ napi_disable(&nvchan->napi);
+ }
if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn)
wake_up(&nvscdev->subchan_open);
@@ -1351,9 +1351,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
struct net_device_context *net_device_ctx = netdev_priv(net);
struct ndis_offload hwcaps;
struct ndis_offload_params offloads;
- unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE;
int ret;
+ nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE;
+
/* Find HW offload capabilities */
ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps);
if (ret != 0)
@@ -1385,8 +1386,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
net->hw_features |= NETIF_F_TSO;
- if (hwcaps.lsov2.ip4_maxsz < gso_max_size)
- gso_max_size = hwcaps.lsov2.ip4_maxsz;
+ if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size)
+ nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz;
}
if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
@@ -1406,8 +1407,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
net->hw_features |= NETIF_F_TSO6;
- if (hwcaps.lsov2.ip6_maxsz < gso_max_size)
- gso_max_size = hwcaps.lsov2.ip6_maxsz;
+ if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size)
+ nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz;
}
if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) {
@@ -1433,7 +1434,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
*/
net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features;
- netif_set_tso_max_size(net, gso_max_size);
+ netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size);
ret = rndis_filter_set_offload_params(net, nvdev, &offloads);
@@ -1548,6 +1549,18 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
if (ret || rsscap.num_recv_que < 2)
goto out;
+ if (rsscap.num_indirect_tabent &&
+ rsscap.num_indirect_tabent <= ITAB_NUM_MAX)
+ ndc->rx_table_sz = rsscap.num_indirect_tabent;
+ else
+ ndc->rx_table_sz = ITAB_NUM;
+
+ ndc->rx_table = kcalloc(ndc->rx_table_sz, sizeof(u16), GFP_KERNEL);
+ if (!ndc->rx_table) {
+ ret = -ENOMEM;
+ goto err_dev_remv;
+ }
+
/* This guarantees that num_possible_rss_qs <= num_online_cpus */
num_possible_rss_qs = min_t(u32, num_online_cpus(),
rsscap.num_recv_que);
@@ -1558,7 +1571,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
if (!netif_is_rxfh_configured(net)) {
- for (i = 0; i < ITAB_NUM; i++)
+ for (i = 0; i < ndc->rx_table_sz; i++)
ndc->rx_table[i] = ethtool_rxfh_indir_default(
i, net_device->num_chn);
}
@@ -1596,11 +1609,19 @@ void rndis_filter_device_remove(struct hv_device *dev,
struct netvsc_device *net_dev)
{
struct rndis_device *rndis_dev = net_dev->extension;
+ struct net_device *net = hv_get_drvdata(dev);
+ struct net_device_context *ndc;
+
+ ndc = netdev_priv(net);
/* Halt and release the rndis device */
rndis_filter_halt_device(net_dev, rndis_dev);
netvsc_device_remove(dev);
+
+ ndc->rx_table_sz = 0;
+ kfree(ndc->rx_table);
+ ndc->rx_table = NULL;
}
int rndis_filter_open(struct netvsc_device *nvdev)