diff options
Diffstat (limited to 'drivers/net/hyperv')
| -rw-r--r-- | drivers/net/hyperv/Kconfig | 3 | ||||
| -rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 30 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc.c | 173 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc_bpf.c | 2 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 402 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc_trace.h | 8 | ||||
| -rw-r--r-- | drivers/net/hyperv/rndis_filter.c | 85 |
7 files changed, 435 insertions, 268 deletions
diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index ca7bf7f897d3..982964c1a9fb 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" - depends on HYPERV + depends on HYPERV_VMBUS select UCS2_STRING + select NLS help Select this option to enable the Hyper-V virtual network driver. diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index dd5919ec408b..7397c693f984 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -16,6 +16,7 @@ #include <linux/hyperv.h> #include <linux/rndis.h> #include <linux/jhash.h> +#include <net/xdp.h> /* RSS related */ #define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ @@ -74,6 +75,7 @@ struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 +#define ITAB_NUM_MAX 256 struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ struct ndis_obj_header hdr; @@ -156,7 +158,6 @@ struct hv_netvsc_packet { u8 cp_partial; /* partial copy into send buffer */ u8 rmsg_size; /* RNDIS header and PPI size */ - u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ u8 page_buf_cnt; u16 q_idx; @@ -462,7 +463,7 @@ struct nvsp_1_message_send_receive_buffer_complete { * LargeOffset SmallOffset */ - struct nvsp_1_receive_buffer_section sections[1]; + struct nvsp_1_receive_buffer_section sections[]; } __packed; /* @@ -880,7 +881,7 @@ struct nvsp_message { #define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */ #define VRSS_CHANNEL_MAX 64 -#define VRSS_CHANNEL_DEFAULT 8 +#define VRSS_CHANNEL_DEFAULT 16 #define RNDIS_MAX_PKT_DEFAULT 8 #define RNDIS_PKT_ALIGN_DEFAULT 8 @@ -891,6 +892,18 @@ struct nvsp_message { sizeof(struct nvsp_message)) #define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) +/* Maximum # of contiguous data ranges that can make up a trasmitted packet. + * Typically it's the max SKB fragments plus 2 for the rndis packet and the + * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may + * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries + * in a GPA direct packet sent to netvsp over VMBus. + */ +#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT +#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2) +#else +#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT +#endif + /* Estimated requestor size: * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size */ @@ -1034,7 +1047,9 @@ struct net_device_context { u32 tx_table[VRSS_SEND_TAB_SIZE]; - u16 rx_table[ITAB_NUM]; + u16 *rx_table; + + u32 rx_table_sz; /* Ethtool settings */ u8 duplex; @@ -1046,6 +1061,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1060,6 +1076,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ @@ -1139,7 +1157,6 @@ struct netvsc_device { /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; - void *recv_original_buf; u32 recv_buf_size; /* allocated bytes */ struct vmbus_gpadl recv_buf_gpadl_handle; u32 recv_section_cnt; @@ -1148,7 +1165,6 @@ struct netvsc_device { /* Send buffer allocated by us */ void *send_buf; - void *send_original_buf; u32 send_buf_size; struct vmbus_gpadl send_buf_gpadl_handle; u32 send_section_cnt; @@ -1164,6 +1180,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; + u32 netvsc_gso_max_size; + atomic_t open_chn; struct work_struct subchan_work; wait_queue_head_t subchan_open; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 9352dad58996..60a4629fe6ba 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -155,16 +155,10 @@ static void free_netvsc_device(struct rcu_head *head) kfree(nvdev->extension); - if (nvdev->recv_original_buf) - vfree(nvdev->recv_original_buf); - else + if (!nvdev->recv_buf_gpadl_handle.decrypted) vfree(nvdev->recv_buf); - - if (nvdev->send_original_buf) - vfree(nvdev->send_original_buf); - else + if (!nvdev->send_buf_gpadl_handle.decrypted) vfree(nvdev->send_buf); - bitmap_free(nvdev->send_section_map); for (i = 0; i < VRSS_CHANNEL_MAX; i++) { @@ -347,7 +341,6 @@ static int netvsc_init_buf(struct hv_device *device, struct nvsp_message *init_packet; unsigned int buf_size; int i, ret = 0; - void *vaddr; /* Get receive buffer area. */ buf_size = device_info->recv_sections * device_info->recv_section_size; @@ -383,17 +376,6 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } - if (hv_isolation_type_snp()) { - vaddr = hv_map_memory(net_device->recv_buf, buf_size); - if (!vaddr) { - ret = -ENOMEM; - goto cleanup; - } - - net_device->recv_original_buf = net_device->recv_buf; - net_device->recv_buf = vaddr; - } - /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -497,17 +479,6 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } - if (hv_isolation_type_snp()) { - vaddr = hv_map_memory(net_device->send_buf, buf_size); - if (!vaddr) { - ret = -ENOMEM; - goto cleanup; - } - - net_device->send_original_buf = net_device->send_buf; - net_device->send_buf = vaddr; - } - /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -740,7 +711,15 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) { + netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_TX, + NULL); + netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable(&net_device->chan_table[i].napi); + } + netif_napi_del(&net_device->chan_table[i].napi); } @@ -762,12 +741,6 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_send_gpadl(device, net_device, ndev); } - if (net_device->recv_original_buf) - hv_unmap_memory(net_device->recv_buf); - - if (net_device->send_original_buf) - hv_unmap_memory(net_device->send_buf); - /* Release all resources */ free_netvsc_device_rcu(net_device); } @@ -851,6 +824,7 @@ static void netvsc_send_completion(struct net_device *ndev, u32 msglen = hv_pkt_datalen(desc); struct nvsp_message *pkt_rqst; u64 cmd_rqst; + u32 status; /* First check if this is a VMBUS completion without data payload */ if (!msglen) { @@ -888,16 +862,17 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - fallthrough; + break; case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + - sizeof(struct nvsp_1_message_send_receive_buffer_complete)) { + struct_size_t(struct nvsp_1_message_send_receive_buffer_complete, + sections, 1)) { netdev_err(ndev, "nvsp_msg1 length too small: %u\n", msglen); return; } - fallthrough; + break; case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + @@ -906,7 +881,7 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - fallthrough; + break; case NVSP_MSG5_TYPE_SUBCHANNEL: if (msglen < sizeof(struct nvsp_message_header) + @@ -915,22 +890,41 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - /* Copy the response back */ - memcpy(&net_device->channel_init_pkt, nvsp_packet, - sizeof(struct nvsp_message)); - complete(&net_device->channel_init_wait); break; case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_1_message_send_rndis_packet_complete)) { + if (net_ratelimit()) + netdev_err(ndev, "nvsp_rndis_pkt_complete length too small: %u\n", + msglen); + return; + } + + /* If status indicates an error, output a message so we know + * there's a problem. But process the completion anyway so the + * resources are released. + */ + status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status; + if (status != NVSP_STAT_SUCCESS && net_ratelimit()) + netdev_err(ndev, "nvsp_rndis_pkt_complete error status: %x\n", + status); + netvsc_send_tx_complete(ndev, net_device, incoming_channel, desc, budget); - break; + return; default: netdev_err(ndev, "Unknown send completion type %d received!!\n", nvsp_packet->hdr.msg_type); + return; } + + /* Copy the response back */ + memcpy(&net_device->channel_init_pkt, nvsp_packet, + sizeof(struct nvsp_message)); + complete(&net_device->channel_init_wait); } static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) @@ -959,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + pend_size; int i; u32 padding = 0; - u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt; u32 remain; /* Add padding */ @@ -987,9 +980,6 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, void netvsc_dma_unmap(struct hv_device *hv_dev, struct hv_netvsc_packet *packet) { - u32 page_count = packet->cp_partial ? - packet->page_buf_cnt - packet->rmsg_pgcnt : - packet->page_buf_cnt; int i; if (!hv_is_isolation_supported()) @@ -998,7 +988,7 @@ void netvsc_dma_unmap(struct hv_device *hv_dev, if (!packet->dma_range) return; - for (i = 0; i < page_count; i++) + for (i = 0; i < packet->page_buf_cnt; i++) dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, packet->dma_range[i].mapping_size, DMA_TO_DEVICE); @@ -1028,9 +1018,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 page_count = packet->cp_partial ? - packet->page_buf_cnt - packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->page_buf_cnt; dma_addr_t dma; int i; @@ -1039,7 +1027,7 @@ static int netvsc_dma_map(struct hv_device *hv_dev, packet->dma_range = kcalloc(page_count, sizeof(*packet->dma_range), - GFP_KERNEL); + GFP_ATOMIC); if (!packet->dma_range) return -ENOMEM; @@ -1066,6 +1054,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev, return 0; } +/* Build an "array" of mpb entries describing the data to be transferred + * over VMBus. After the desc header fields, each "array" entry is variable + * size, and each entry starts after the end of the previous entry. The + * "offset" and "len" fields for each entry imply the size of the entry. + * + * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V + * uses that granularity, even if the system page size of the guest is larger. + * Each entry in the input "pb" array must describe a contiguous range of + * guest physical memory so that the pfns are sequential if the range crosses + * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE. + */ +static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb, + u32 page_buffer_count, + struct vmbus_packet_mpb_array *desc, + u32 *desc_size) +{ + struct hv_mpb_array *mpb_entry = &desc->range; + int i, j; + + for (i = 0; i < page_buffer_count; i++) { + u32 offset = pb[i].offset; + u32 len = pb[i].len; + + mpb_entry->offset = offset; + mpb_entry->len = len; + + for (j = 0; j < HVPFN_UP(offset + len); j++) + mpb_entry->pfn_array[j] = pb[i].pfn + j; + + mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j]; + } + + desc->rangecount = page_buffer_count; + *desc_size = (char *)mpb_entry - (char *)desc; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -1108,8 +1132,11 @@ static inline int netvsc_send_pkt( packet->dma_range = NULL; if (packet->page_buf_cnt) { + struct vmbus_channel_packet_page_buffer desc; + u32 desc_size; + if (packet->cp_partial) - pb += packet->rmsg_pgcnt; + pb++; ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); if (ret) { @@ -1117,11 +1144,12 @@ static inline int netvsc_send_pkt( goto exit; } - ret = vmbus_sendpacket_pagebuffer(out_channel, - pb, packet->page_buf_cnt, - &nvmsg, sizeof(nvmsg), - req_id); - + netvsc_build_mpb_array(pb, packet->page_buf_cnt, + (struct vmbus_packet_mpb_array *)&desc, + &desc_size); + ret = vmbus_sendpacket_mpb_desc(out_channel, + (struct vmbus_packet_mpb_array *)&desc, + desc_size, &nvmsg, sizeof(nvmsg), req_id); if (ret) netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { @@ -1270,7 +1298,7 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = section_index; if (packet->cp_partial) { - packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->page_buf_cnt--; packet->total_data_buflen = msd_len + packet->rmsg_size; } else { packet->page_buf_cnt = 0; @@ -1784,6 +1812,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Enable NAPI handler before init callbacks */ netif_napi_add(ndev, &net_device->chan_table[0].napi, netvsc_poll); + napi_enable(&net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, + &net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, + &net_device->chan_table[0].napi); /* Open the channel */ device->channel->next_request_id_callback = vmbus_next_request_id; @@ -1803,8 +1836,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, /* Channel is opened */ netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); - napi_enable(&net_device->chan_table[0].napi); - /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device, net_device, device_info); if (ret != 0) { @@ -1822,21 +1853,17 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, close: RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); - napi_disable(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ vmbus_close(device->channel); cleanup: + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); + napi_disable(&net_device->chan_table[0].napi); netif_napi_del(&net_device->chan_table[0].napi); cleanup2: - if (net_device->recv_original_buf) - hv_unmap_memory(net_device->recv_buf); - - if (net_device->send_original_buf) - hv_unmap_memory(net_device->send_buf); - free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 4a9522689fa4..1dd3755d9e6d 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) xdp.command = XDP_SETUP_PROG; xdp.prog = prog; - ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp); + ret = netif_xdp_propagate(vf_netdev, &xdp); if (ret && prog) bpf_prog_put(prog); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f9b219e6cd58..3d47d749ef9f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -29,6 +29,7 @@ #include <linux/bpf.h> #include <net/arp.h> +#include <net/netdev_lock.h> #include <net/route.h> #include <net/sock.h> #include <net/pkt_sched.h> @@ -42,9 +43,13 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); -MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); unsigned int netvsc_ring_bytes __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | @@ -321,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, return txq; } -static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, - struct hv_page_buffer *pb) -{ - int j = 0; - - hvpfn += offset >> HV_HYP_PAGE_SHIFT; - offset = offset & ~HV_HYP_PAGE_MASK; - - while (len > 0) { - unsigned long bytes; - - bytes = HV_HYP_PAGE_SIZE - offset; - if (bytes > len) - bytes = len; - pb[j].pfn = hvpfn; - pb[j].offset = offset; - pb[j].len = bytes; - - offset += bytes; - len -= bytes; - - if (offset == HV_HYP_PAGE_SIZE && len) { - hvpfn++; - offset = 0; - j++; - } - } - - return j + 1; -} - static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 slots_used = 0; - char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; int i; @@ -366,28 +338,27 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_hvpfn(hdr), - offset_in_hvpage(hdr), - len, - &pb[slots_used]); + pb[0].offset = offset_in_hvpage(hdr); + pb[0].len = len; + pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = slots_used; - slots_used += fill_pg_buf(virt_to_hvpfn(data), - offset_in_hvpage(data), - skb_headlen(skb), - &pb[slots_used]); + pb[1].offset = offset_in_hvpage(skb->data); + pb[1].len = skb_headlen(skb); + pb[1].pfn = virt_to_hvpfn(skb->data); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct hv_page_buffer *cur_pb = &pb[i + 2]; + u64 pfn = page_to_hvpfn(skb_frag_page(frag)); + u32 offset = skb_frag_off(frag); - slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), - skb_frag_off(frag), - skb_frag_size(frag), - &pb[slots_used]); + cur_pb->offset = offset_in_hvpage(offset); + cur_pb->len = skb_frag_size(frag); + cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT); } - return slots_used; + return frags + 2; } static int count_skb_frag_slots(struct sk_buff *skb) @@ -478,7 +449,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) struct net_device *vf_netdev; u32 rndis_msg_size; u32 hash; - struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer pb[MAX_DATA_RANGES]; /* If VF is present and up then redirect packets to it. * Skip the VF if it is marked down or has no carrier. @@ -983,7 +954,8 @@ struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev) dev_info->bprog = prog; } } else { - dev_info->num_chn = VRSS_CHANNEL_DEFAULT; + dev_info->num_chn = max(VRSS_CHANNEL_DEFAULT, + netif_get_num_default_rss_queues()); dev_info->send_sections = NETVSC_DEFAULT_TX; dev_info->send_section_size = NETVSC_SEND_SECTION_SIZE; dev_info->recv_sections = NETVSC_DEFAULT_RX; @@ -1229,14 +1201,14 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (ret) goto rollback_vf; - ndev->mtu = mtu; + WRITE_ONCE(ndev->mtu, mtu); ret = netvsc_attach(ndev, device_info); if (!ret) goto out; /* Attempt rollback to original MTU */ - ndev->mtu = orig_mtu; + WRITE_ONCE(ndev->mtu, orig_mtu); if (netvsc_attach(ndev, device_info)) netdev_err(ndev, "restoring mtu failed\n"); @@ -1399,7 +1371,7 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) struct net_device_context *ndc = netdev_priv(ndev); struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); - struct sockaddr *addr = p; + struct sockaddr_storage *addr = p; int err; err = eth_prepare_mac_addr_change(ndev, p); @@ -1415,12 +1387,12 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) return err; } - err = rndis_filter_set_device_mac(nvdev, addr->sa_data); + err = rndis_filter_set_device_mac(nvdev, addr->__data); if (!err) { eth_commit_mac_addr_change(ndev, p); } else if (vf_netdev) { /* rollback change on VF */ - memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN); + memcpy(addr->__data, ndev->dev_addr, ETH_ALEN); dev_set_mac_address(vf_netdev, addr, NULL); } @@ -1552,7 +1524,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, data[i++] = xdp_tx; } - pcpu_sum = kvmalloc_array(num_possible_cpus(), + pcpu_sum = kvmalloc_array(nr_cpu_ids, sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); if (!pcpu_sum) @@ -1582,10 +1554,10 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) - ethtool_sprintf(&p, netvsc_stats[i].name); + ethtool_puts(&p, netvsc_stats[i].name); for (i = 0; i < ARRAY_SIZE(vf_stats); i++) - ethtool_sprintf(&p, vf_stats[i].name); + ethtool_puts(&p, vf_stats[i].name); for (i = 0; i < nvdev->num_chn; i++) { ethtool_sprintf(&p, "tx_queue_%u_packets", i); @@ -1608,9 +1580,10 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) } static int -netvsc_get_rss_hash_opts(struct net_device_context *ndc, - struct ethtool_rxnfc *info) +netvsc_get_rxfh_fields(struct net_device *ndev, + struct ethtool_rxfh_fields *info) { + struct net_device_context *ndc = netdev_priv(ndev); const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3; info->data = RXH_IP_SRC | RXH_IP_DST; @@ -1651,30 +1624,24 @@ netvsc_get_rss_hash_opts(struct net_device_context *ndc, return 0; } -static int -netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, - u32 *rules) +static u32 netvsc_get_rx_ring_count(struct net_device *dev) { struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); if (!nvdev) - return -ENODEV; - - switch (info->cmd) { - case ETHTOOL_GRXRINGS: - info->data = nvdev->num_chn; return 0; - case ETHTOOL_GRXFH: - return netvsc_get_rss_hash_opts(ndc, info); - } - return -EOPNOTSUPP; + return nvdev->num_chn; } -static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, - struct ethtool_rxnfc *info) +static int +netvsc_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *info, + struct netlink_ext_ack *extack) { + struct net_device_context *ndc = netdev_priv(dev); + if (info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) { switch (info->flow_type) { @@ -1729,17 +1696,6 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc, return -EOPNOTSUPP; } -static int -netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info) -{ - struct net_device_context *ndc = netdev_priv(ndev); - - if (info->cmd == ETHTOOL_SRXFH) - return netvsc_set_rss_hash_opts(ndc, info); - - return -EOPNOTSUPP; -} - static u32 netvsc_get_rxfh_key_size(struct net_device *dev) { return NETVSC_HASH_KEYLEN; @@ -1747,11 +1703,13 @@ static u32 netvsc_get_rxfh_key_size(struct net_device *dev) static u32 netvsc_rss_indir_size(struct net_device *dev) { - return ITAB_NUM; + struct net_device_context *ndc = netdev_priv(dev); + + return ndc->rx_table_sz; } -static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, - u8 *hfunc) +static int netvsc_get_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh) { struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); @@ -1761,47 +1719,49 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, if (!ndev) return -ENODEV; - if (hfunc) - *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ rndis_dev = ndev->extension; - if (indir) { - for (i = 0; i < ITAB_NUM; i++) - indir[i] = ndc->rx_table[i]; + if (rxfh->indir) { + for (i = 0; i < ndc->rx_table_sz; i++) + rxfh->indir[i] = ndc->rx_table[i]; } - if (key) - memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); + if (rxfh->key) + memcpy(rxfh->key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); return 0; } -static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, - const u8 *key, const u8 hfunc) +static int netvsc_set_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); struct rndis_device *rndis_dev; + u8 *key = rxfh->key; int i; if (!ndev) return -ENODEV; - if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; rndis_dev = ndev->extension; - if (indir) { - for (i = 0; i < ITAB_NUM; i++) - if (indir[i] >= ndev->num_chn) + if (rxfh->indir) { + for (i = 0; i < ndc->rx_table_sz; i++) + if (rxfh->indir[i] >= ndev->num_chn) return -EINVAL; - for (i = 0; i < ITAB_NUM; i++) - ndc->rx_table[i] = indir[i]; + for (i = 0; i < ndc->rx_table_sz; i++) + ndc->rx_table[i] = rxfh->indir[i]; } if (!key) { - if (!indir) + if (!rxfh->indir) return 0; key = rndis_dev->rss_key; @@ -2002,12 +1962,13 @@ static const struct ethtool_ops ethtool_ops = { .get_channels = netvsc_get_channels, .set_channels = netvsc_set_channels, .get_ts_info = ethtool_op_get_ts_info, - .get_rxnfc = netvsc_get_rxnfc, - .set_rxnfc = netvsc_set_rxnfc, + .get_rx_ring_count = netvsc_get_rx_ring_count, .get_rxfh_key_size = netvsc_get_rxfh_key_size, .get_rxfh_indir_size = netvsc_rss_indir_size, .get_rxfh = netvsc_get_rxfh, .set_rxfh = netvsc_set_rxfh, + .get_rxfh_fields = netvsc_get_rxfh_fields, + .set_rxfh_fields = netvsc_set_rxfh_fields, .get_link_ksettings = netvsc_get_link_ksettings, .set_link_ksettings = netvsc_set_link_ksettings, .get_ringparam = netvsc_get_ringparam, @@ -2181,7 +2142,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2204,10 +2165,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - /* set slave flag before open to prevent IPv6 addrconf */ - vf_netdev->flags |= IFF_SLAVE; - - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2313,16 +2275,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) } - /* Fallback path to check synthetic vf with - * help of mac addr + /* Fallback path to check synthetic vf with help of mac addr. + * Because this function can be called before vf_netdev is + * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied + * from dev_addr, also try to match to its dev_addr. + * Note: On Hyper-V and Azure, it's not possible to set a MAC address + * on a VF that matches to the MAC of a unrelated NETVSC device. */ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { ndev = hv_get_drvdata(ndev_ctx->device_ctx); - if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { - netdev_notice(vf_netdev, - "falling back to mac addr based matching\n"); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) || + ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr)) return ndev; - } } netdev_notice(vf_netdev, @@ -2330,7 +2294,23 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) return NULL; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_prepare_bonding(struct net_device *vf_netdev) +{ + struct net_device *ndev; + + ndev = get_netvsc_byslot(vf_netdev); + if (!ndev) + return NOTIFY_DONE; + + /* Set slave flag and no addrconf flag before open + * to prevent IPv6 addrconf. + */ + vf_netdev->flags |= IFF_SLAVE; + vf_netdev->priv_flags |= IFF_NO_ADDRCONF; + return NOTIFY_DONE; +} + +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2370,7 +2350,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2436,6 +2416,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) } else { netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); + + /* In Azure, when accelerated networking in enabled, other NICs + * like MANA, MLX, are configured as a bonded nic with + * Netvsc(failover) NIC. For bonded NICs, the min of the max + * pkt aggregate size of the members is propagated in the stack. + * In order to allow these NICs (MANA/MLX) to use up to + * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to + * also support this in the guest. + * This value is only increased for netvsc NIC when datapath is + * switched over to the VF + */ + if (vf_is_up) + netif_set_tso_max_size(ndev, vf_netdev->tso_max_size); + else + netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size); } return NOTIFY_OK; @@ -2455,8 +2450,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_vf_setxdp(vf_netdev, NULL); - reinit_completion(&net_device_ctx->vf_add); netdev_rx_handler_unregister(vf_netdev); netdev_upper_dev_unlink(vf_netdev, ndev); @@ -2468,10 +2461,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2501,6 +2515,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2529,15 +2544,6 @@ static int netvsc_probe(struct hv_device *dev, goto devinfo_failed; } - nvdev = rndis_filter_device_add(dev, device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); - netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - goto rndis_failed; - } - - eth_hw_addr_set(net, device_info->mac_adr); - /* We must get rtnl lock before scheduling nvdev->subchan_work, * otherwise netvsc_subchan_work() can get rtnl lock first and wait * all subchannels to show up, but that may not happen because @@ -2545,9 +2551,23 @@ static int netvsc_probe(struct hv_device *dev, * -> ... -> device_add() -> ... -> __device_attach() can't get * the device lock, so all the subchannels can't be processed -- * finally netvsc_subchan_work() hangs forever. + * + * The rtnl lock also needs to be held before rndis_filter_device_add() + * which advertises nvsp_2_vsc_capability / sriov bit, and triggers + * VF NIC offering and registering. If VF NIC finished register_netdev() + * earlier it may cause name based config failure. */ rtnl_lock(); + nvdev = rndis_filter_device_add(dev, device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); + goto rndis_failed; + } + + eth_hw_addr_set(net, device_info->mac_adr); + if (nvdev->num_chn > 1) schedule_work(&nvdev->subchan_work); @@ -2559,6 +2579,9 @@ static int netvsc_probe(struct hv_device *dev, netdev_lockdep_set_classes(net); + net->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) @@ -2575,15 +2598,41 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netdev_lock_ops(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + netdev_unlock_ops(vf_netdev); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); return 0; register_failed: - rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: + rtnl_unlock(); netvsc_devinfo_put(device_info); devinfo_failed: free_percpu(net_device_ctx->vf_stats); @@ -2594,7 +2643,7 @@ no_net: return ret; } -static int netvsc_remove(struct hv_device *dev) +static void netvsc_remove(struct hv_device *dev) { struct net_device_context *ndev_ctx; struct net_device *vf_netdev, *net; @@ -2603,7 +2652,7 @@ static int netvsc_remove(struct hv_device *dev) net = hv_get_drvdata(dev); if (net == NULL) { dev_err(&dev->device, "No net device to remove\n"); - return 0; + return; } ndev_ctx = netdev_priv(net); @@ -2611,6 +2660,8 @@ static int netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2637,7 +2688,6 @@ static int netvsc_remove(struct hv_device *dev) free_percpu(ndev_ctx->vf_stats); free_netdev(net); - return 0; } static int netvsc_suspend(struct hv_device *dev) @@ -2653,6 +2703,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2721,6 +2772,52 @@ static struct hv_driver netvsc_drv = { }, }; +/* Set VF's namespace same as the synthetic NIC */ +static void netvsc_event_set_vf_ns(struct net_device *ndev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct net_device *vf_netdev; + int ret; + + vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + if (!vf_netdev) + return; + + if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) { + ret = dev_change_net_namespace(vf_netdev, dev_net(ndev), + "eth%d"); + if (ret) + netdev_err(vf_netdev, + "Cannot move to same namespace as %s: %d\n", + ndev->name, ret); + else + netdev_info(vf_netdev, + "Moved VF to namespace with: %s\n", + ndev->name); + } +} + +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2731,26 +2828,24 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) + if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; + } - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { + case NETDEV_POST_INIT: + return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: @@ -2782,14 +2877,19 @@ static int __init netvsc_drv_init(void) pr_info("Increased ring_size to %u (min allowed)\n", ring_size); } - netvsc_ring_bytes = ring_size * PAGE_SIZE; + netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096); + + register_netdevice_notifier(&netvsc_netdev_notifier); ret = vmbus_driver_register(&netvsc_drv); if (ret) - return ret; + goto err_vmbus_reg; - register_netdevice_notifier(&netvsc_netdev_notifier); return 0; + +err_vmbus_reg: + unregister_netdevice_notifier(&netvsc_netdev_notifier); + return ret; } MODULE_LICENSE("GPL"); diff --git a/drivers/net/hyperv/netvsc_trace.h b/drivers/net/hyperv/netvsc_trace.h index f7585563dea5..05e620cbdd29 100644 --- a/drivers/net/hyperv/netvsc_trace.h +++ b/drivers/net/hyperv/netvsc_trace.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(rndis_msg_class, __field( u32, msg_len ) ), TP_fast_assign( - __assign_str(name, ndev->name); + __assign_str(name); __entry->queue = q; __entry->req_id = msg->msg.init_req.req_id; __entry->msg_type = msg->ndis_msg_type; @@ -121,7 +121,7 @@ TRACE_EVENT(nvsp_send, __field( u32, msg_type ) ), TP_fast_assign( - __assign_str(name, ndev->name); + __assign_str(name); __entry->msg_type = msg->hdr.msg_type; ), TP_printk("dev=%s type=%s", @@ -142,7 +142,7 @@ TRACE_EVENT(nvsp_send_pkt, __field( u32, section_size ) ), TP_fast_assign( - __assign_str(name, ndev->name); + __assign_str(name); __entry->qid = chan->offermsg.offer.sub_channel_index; __entry->channel_type = rpkt->channel_type; __entry->section_index = rpkt->send_buf_section_index; @@ -165,7 +165,7 @@ TRACE_EVENT(nvsp_recv, __field( u32, msg_type ) ), TP_fast_assign( - __assign_str(name, ndev->name); + __assign_str(name); __entry->qid = chan->offermsg.offer.sub_channel_index; __entry->msg_type = msg->hdr.msg_type; ), diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index eea777ec2541..c35f9685b6bf 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, struct rndis_request *req) { struct hv_netvsc_packet *packet; - struct hv_page_buffer page_buf[2]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb; int ret; /* Setup the packet to send it */ @@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->total_data_buflen = req->request_msg.msg_len; packet->page_buf_cnt = 1; - pb[0].pfn = virt_to_phys(&req->request_msg) >> - HV_HYP_PAGE_SHIFT; - pb[0].len = req->request_msg.msg_len; - pb[0].offset = offset_in_hvpage(&req->request_msg); - - /* Add one page_buf when request_msg crossing page boundary */ - if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) { - packet->page_buf_cnt++; - pb[0].len = HV_HYP_PAGE_SIZE - - pb[0].offset; - pb[1].pfn = virt_to_phys((void *)&req->request_msg - + pb[0].len) >> HV_HYP_PAGE_SHIFT; - pb[1].offset = 0; - pb[1].len = req->request_msg.msg_len - - pb[0].len; - } + pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT; + pb.len = req->request_msg.msg_len; + pb.offset = offset_in_hvpage(&req->request_msg); trace_rndis_send(dev->ndev, 0, &req->request_msg); rcu_read_lock_bh(); - ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false); + ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false); rcu_read_unlock_bh(); return ret; @@ -927,7 +913,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, struct rndis_set_request *set; struct rndis_set_complete *set_complete; u32 extlen = sizeof(struct ndis_recv_scale_param) + - 4 * ITAB_NUM + NETVSC_HASH_KEYLEN; + 4 * ndc->rx_table_sz + NETVSC_HASH_KEYLEN; struct ndis_recv_scale_param *rssp; u32 *itab; u8 *keyp; @@ -953,7 +939,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; - rssp->indirect_tabsize = 4*ITAB_NUM; + rssp->indirect_tabsize = 4 * ndc->rx_table_sz; rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); rssp->hashkey_size = NETVSC_HASH_KEYLEN; rssp->hashkey_offset = rssp->indirect_taboffset + @@ -961,7 +947,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, /* Set indirection table entries */ itab = (u32 *)(rssp + 1); - for (i = 0; i < ITAB_NUM; i++) + for (i = 0; i < ndc->rx_table_sz; i++) itab[i] = ndc->rx_table[i]; /* Set hask key values */ @@ -1266,13 +1252,27 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE; + /* Enable napi before opening the vmbus channel to avoid races + * as the host placing data on the host->guest ring may be left + * out if napi was not enabled. + */ + napi_enable(&nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + &nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + &nvchan->napi); + ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); - if (ret == 0) - napi_enable(&nvchan->napi); - else + if (ret != 0) { netdev_notice(ndev, "sub channel open failed: %d\n", ret); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + NULL); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable(&nvchan->napi); + } if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) wake_up(&nvscdev->subchan_open); @@ -1351,9 +1351,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, struct net_device_context *net_device_ctx = netdev_priv(net); struct ndis_offload hwcaps; struct ndis_offload_params offloads; - unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; int ret; + nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE; + /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps); if (ret != 0) @@ -1385,8 +1386,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO; - if (hwcaps.lsov2.ip4_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip4_maxsz; + if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz; } if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { @@ -1406,8 +1407,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO6; - if (hwcaps.lsov2.ip6_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip6_maxsz; + if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz; } if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { @@ -1433,7 +1434,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, */ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; - netif_set_tso_max_size(net, gso_max_size); + netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size); ret = rndis_filter_set_offload_params(net, nvdev, &offloads); @@ -1548,6 +1549,18 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, if (ret || rsscap.num_recv_que < 2) goto out; + if (rsscap.num_indirect_tabent && + rsscap.num_indirect_tabent <= ITAB_NUM_MAX) + ndc->rx_table_sz = rsscap.num_indirect_tabent; + else + ndc->rx_table_sz = ITAB_NUM; + + ndc->rx_table = kcalloc(ndc->rx_table_sz, sizeof(u16), GFP_KERNEL); + if (!ndc->rx_table) { + ret = -ENOMEM; + goto err_dev_remv; + } + /* This guarantees that num_possible_rss_qs <= num_online_cpus */ num_possible_rss_qs = min_t(u32, num_online_cpus(), rsscap.num_recv_que); @@ -1558,7 +1571,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, net_device->num_chn = min(net_device->max_chn, device_info->num_chn); if (!netif_is_rxfh_configured(net)) { - for (i = 0; i < ITAB_NUM; i++) + for (i = 0; i < ndc->rx_table_sz; i++) ndc->rx_table[i] = ethtool_rxfh_indir_default( i, net_device->num_chn); } @@ -1596,11 +1609,19 @@ void rndis_filter_device_remove(struct hv_device *dev, struct netvsc_device *net_dev) { struct rndis_device *rndis_dev = net_dev->extension; + struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *ndc; + + ndc = netdev_priv(net); /* Halt and release the rndis device */ rndis_filter_halt_device(net_dev, rndis_dev); netvsc_device_remove(dev); + + ndc->rx_table_sz = 0; + kfree(ndc->rx_table); + ndc->rx_table = NULL; } int rndis_filter_open(struct netvsc_device *nvdev) |
