diff options
Diffstat (limited to 'drivers/hv/connection.c')
| -rw-r--r-- | drivers/hv/connection.c | 298 |
1 files changed, 175 insertions, 123 deletions
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 4fe117b761ce..5d9cb5bf2d62 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -1,24 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (c) 2009, Microsoft Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * * Authors: * Haiyang Zhang <haiyangz@microsoft.com> * Hank Janssen <hjanssen@microsoft.com> - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -27,10 +14,13 @@ #include <linux/wait.h> #include <linux/delay.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/hyperv.h> #include <linux/export.h> +#include <linux/io.h> +#include <linux/set_memory.h> #include <asm/mshyperv.h> #include "hyperv_vmbus.h" @@ -38,7 +28,14 @@ struct vmbus_connection vmbus_connection = { .conn_state = DISCONNECTED, + .unload_event = COMPLETION_INITIALIZER( + vmbus_connection.unload_event), .next_gpadl_handle = ATOMIC_INIT(0xE1E10), + + .ready_for_suspend_event = COMPLETION_INITIALIZER( + vmbus_connection.ready_for_suspend_event), + .all_offers_delivered_event = COMPLETION_INITIALIZER( + vmbus_connection.all_offers_delivered_event), }; EXPORT_SYMBOL_GPL(vmbus_connection); @@ -48,35 +45,36 @@ EXPORT_SYMBOL_GPL(vmbus_connection); __u32 vmbus_proto_version; EXPORT_SYMBOL_GPL(vmbus_proto_version); -static __u32 vmbus_get_next_version(__u32 current_version) -{ - switch (current_version) { - case (VERSION_WIN7): - return VERSION_WS2008; - - case (VERSION_WIN8): - return VERSION_WIN7; - - case (VERSION_WIN8_1): - return VERSION_WIN8; - - case (VERSION_WIN10): - return VERSION_WIN8_1; +/* + * Table of VMBus versions listed from newest to oldest. + * VERSION_WIN7 and VERSION_WS2008 are no longer supported in + * Linux guests and are not listed. + */ +static __u32 vmbus_versions[] = { + VERSION_WIN10_V6_0, + VERSION_WIN10_V5_3, + VERSION_WIN10_V5_2, + VERSION_WIN10_V5_1, + VERSION_WIN10_V5, + VERSION_WIN10_V4_1, + VERSION_WIN10, + VERSION_WIN8_1, + VERSION_WIN8 +}; - case (VERSION_WIN10_V5): - return VERSION_WIN10; +/* + * Maximal VMBus protocol version guests can negotiate. Useful to cap the + * VMBus version for testing and debugging purpose. + */ +static uint max_version = VERSION_WIN10_V6_0; - case (VERSION_WS2008): - default: - return VERSION_INVAL; - } -} +module_param(max_version, uint, S_IRUGO); +MODULE_PARM_DESC(max_version, + "Maximal VMBus protocol version which can be negotiated"); -static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, - __u32 version) +int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) { int ret = 0; - unsigned int cur_cpu; struct vmbus_channel_initiate_contact *msg; unsigned long flags; @@ -89,44 +87,38 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, msg->vmbus_version_requested = version; /* - * VMBus protocol 5.0 (VERSION_WIN10_V5) requires that we must use - * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, + * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must + * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, * and for subsequent messages, we must use the Message Connection ID * field in the host-returned Version Response Message. And, with - * VERSION_WIN10_V5, we don't use msg->interrupt_page, but we tell - * the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for + * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we + * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for * compatibility. * * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). */ if (version >= VERSION_WIN10_V5) { msg->msg_sint = VMBUS_MESSAGE_SINT; + msg->msg_vtl = ms_hyperv.vtl; vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; } else { msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; } - msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); - msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); + if (vmbus_is_confidential() && version >= VERSION_WIN10_V6_0) + msg->feature_flags = VMBUS_FEATURE_FLAG_CONFIDENTIAL_CHANNELS; + /* - * We want all channel messages to be delivered on CPU 0. - * This has been the behavior pre-win8. This is not - * perf issue and having all channel messages delivered on CPU 0 - * would be ok. - * For post win8 hosts, we support receiving channel messagges on - * all the CPUs. This is needed for kexec to work correctly where - * the CPU attempting to connect may not be CPU 0. + * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always + * bitwise OR it */ - if (version >= VERSION_WIN8_1) { - cur_cpu = get_cpu(); - msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); - vmbus_connection.connect_cpu = cur_cpu; - put_cpu(); - } else { - msg->target_vcpu = 0; - vmbus_connection.connect_cpu = 0; - } + msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) | + ms_hyperv.shared_gpa_boundary; + msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) | + ms_hyperv.shared_gpa_boundary; + + msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); /* * Add to list before we send the request since we may @@ -178,8 +170,8 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, */ int vmbus_connect(void) { - int ret = 0; struct vmbus_channel_msginfo *msginfo = NULL; + int i, ret = 0; __u32 version; /* Initialize the vmbus connection */ @@ -190,6 +182,14 @@ int vmbus_connect(void) goto cleanup; } + vmbus_connection.rescind_work_queue = + create_workqueue("hv_vmbus_rescind"); + if (!vmbus_connection.rescind_work_queue) { + ret = -ENOMEM; + goto cleanup; + } + vmbus_connection.ignore_any_offer_msg = false; + vmbus_connection.handle_primary_chan_wq = create_workqueue("hv_pri_chan"); if (!vmbus_connection.handle_primary_chan_wq) { @@ -211,11 +211,19 @@ int vmbus_connect(void) mutex_init(&vmbus_connection.channel_mutex); /* + * The following Hyper-V interrupt and monitor pages can be used by + * UIO for mapping to user-space, so they should always be allocated on + * system page boundaries. The system page size must be >= the Hyper-V + * page size. + */ + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + + /* * Setup the vmbus event connection for channel interrupt * abstraction stuff */ vmbus_connection.int_page = - (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0); + (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (vmbus_connection.int_page == NULL) { ret = -ENOMEM; goto cleanup; @@ -224,20 +232,43 @@ int vmbus_connect(void) vmbus_connection.recv_int_page = vmbus_connection.int_page; vmbus_connection.send_int_page = (void *)((unsigned long)vmbus_connection.int_page + - (PAGE_SIZE >> 1)); + (HV_HYP_PAGE_SIZE >> 1)); /* * Setup the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ - vmbus_connection.monitor_pages[0] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); - vmbus_connection.monitor_pages[1] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); + vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL); + vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL); if ((vmbus_connection.monitor_pages[0] == NULL) || (vmbus_connection.monitor_pages[1] == NULL)) { ret = -ENOMEM; goto cleanup; } + ret = set_memory_decrypted((unsigned long) + vmbus_connection.monitor_pages[0], 1); + ret |= set_memory_decrypted((unsigned long) + vmbus_connection.monitor_pages[1], 1); + if (ret) { + /* + * If set_memory_decrypted() fails, the encryption state + * of the memory is unknown. So leak the memory instead + * of risking returning decrypted memory to the free list. + * For simplicity, always handle both pages the same. + */ + vmbus_connection.monitor_pages[0] = NULL; + vmbus_connection.monitor_pages[1] = NULL; + goto cleanup; + } + + /* + * Set_memory_decrypted() will change the memory contents if + * decryption occurs, so zero monitor pages here. + */ + memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); + memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); + msginfo = kzalloc(sizeof(*msginfo) + sizeof(struct vmbus_channel_initiate_contact), GFP_KERNEL); @@ -253,26 +284,43 @@ int vmbus_connect(void) * version. */ - version = VERSION_CURRENT; + for (i = 0; ; i++) { + if (i == ARRAY_SIZE(vmbus_versions)) { + ret = -EDOM; + goto cleanup; + } + + version = vmbus_versions[i]; + if (version > max_version) + continue; - do { ret = vmbus_negotiate_version(msginfo, version); if (ret == -ETIMEDOUT) goto cleanup; if (vmbus_connection.conn_state == CONNECTED) break; + } - version = vmbus_get_next_version(version); - } while (version != VERSION_INVAL); - - if (version == VERSION_INVAL) + if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { + pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", + version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); + ret = -EINVAL; goto cleanup; + } vmbus_proto_version = version; pr_info("Vmbus version:%d.%d\n", version >> 16, version & 0xFFFF); + vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS, + sizeof(struct vmbus_channel *), + GFP_KERNEL); + if (vmbus_connection.channels == NULL) { + ret = -ENOMEM; + goto cleanup; + } + kfree(msginfo); return 0; @@ -300,18 +348,32 @@ void vmbus_disconnect(void) if (vmbus_connection.handle_primary_chan_wq) destroy_workqueue(vmbus_connection.handle_primary_chan_wq); + if (vmbus_connection.rescind_work_queue) + destroy_workqueue(vmbus_connection.rescind_work_queue); + if (vmbus_connection.work_queue) destroy_workqueue(vmbus_connection.work_queue); if (vmbus_connection.int_page) { - free_pages((unsigned long)vmbus_connection.int_page, 0); + free_page((unsigned long)vmbus_connection.int_page); vmbus_connection.int_page = NULL; } - free_pages((unsigned long)vmbus_connection.monitor_pages[0], 0); - free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0); - vmbus_connection.monitor_pages[0] = NULL; - vmbus_connection.monitor_pages[1] = NULL; + if (vmbus_connection.monitor_pages[0]) { + if (!set_memory_encrypted( + (unsigned long)vmbus_connection.monitor_pages[0], 1)) + free_page((unsigned long) + vmbus_connection.monitor_pages[0]); + vmbus_connection.monitor_pages[0] = NULL; + } + + if (vmbus_connection.monitor_pages[1]) { + if (!set_memory_encrypted( + (unsigned long)vmbus_connection.monitor_pages[1], 1)) + free_page((unsigned long) + vmbus_connection.monitor_pages[1]); + vmbus_connection.monitor_pages[1] = NULL; + } } /* @@ -320,33 +382,13 @@ void vmbus_disconnect(void) */ struct vmbus_channel *relid2channel(u32 relid) { - struct vmbus_channel *channel; - struct vmbus_channel *found_channel = NULL; - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; - break; - } else if (!list_empty(&channel->sc_list)) { - /* - * Deal with sub-channels. - */ - list_for_each_safe(cur, tmp, &channel->sc_list) { - cur_sc = list_entry(cur, struct vmbus_channel, - sc_list); - if (cur_sc->offermsg.child_relid == relid) { - found_channel = cur_sc; - break; - } - } - } + if (vmbus_connection.channels == NULL) { + pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid); + return NULL; } - - return found_channel; + if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) + return NULL; + return READ_ONCE(vmbus_connection.channels[relid]); } /* @@ -366,33 +408,29 @@ struct vmbus_channel *relid2channel(u32 relid) void vmbus_on_event(unsigned long data) { struct vmbus_channel *channel = (void *) data; - unsigned long time_limit = jiffies + 2; + void (*callback_fn)(void *context); trace_vmbus_on_event(channel); - do { - void (*callback_fn)(void *); + hv_debug_delay_test(channel, INTERRUPT_DELAY); - /* A channel once created is persistent even when - * there is no driver handling the device. An - * unloading driver sets the onchannel_callback to NULL. - */ - callback_fn = READ_ONCE(channel->onchannel_callback); - if (unlikely(callback_fn == NULL)) - return; - - (*callback_fn)(channel->channel_callback_context); + /* A channel once created is persistent even when + * there is no driver handling the device. An + * unloading driver sets the onchannel_callback to NULL. + */ + callback_fn = READ_ONCE(channel->onchannel_callback); + if (unlikely(!callback_fn)) + return; - if (channel->callback_mode != HV_CALL_BATCHED) - return; + (*callback_fn)(channel->channel_callback_context); - if (likely(hv_end_read(&channel->inbound) == 0)) - return; + if (channel->callback_mode != HV_CALL_BATCHED) + return; - hv_begin_read(&channel->inbound); - } while (likely(time_before(jiffies, time_limit))); + if (likely(hv_end_read(&channel->inbound) == 0)) + return; - /* The time limit (2 jiffies) has been reached */ + hv_begin_read(&channel->inbound); tasklet_schedule(&channel->callback_event); } @@ -422,7 +460,7 @@ int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) case HV_STATUS_INVALID_CONNECTION_ID: /* * See vmbus_negotiate_version(): VMBus protocol 5.0 - * requires that we must use + * and higher require that we must use * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate * Contact message, but on old hosts that only * support VMBus protocol 4.0 or lower, here we get @@ -475,6 +513,20 @@ void vmbus_set_event(struct vmbus_channel *channel) ++channel->sig_events; - hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); + if (ms_hyperv.paravisor_present) { + if (hv_isolation_type_snp()) + hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, + NULL, sizeof(channel->sig_event)); + else if (hv_isolation_type_tdx()) + hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT, + channel->sig_event, 0); + else + WARN_ON_ONCE(1); + } else { + u64 control = HVCALL_SIGNAL_EVENT; + + control |= hv_nested ? HV_HYPERCALL_NESTED : 0; + hv_do_fast_hypercall8(control, channel->sig_event); + } } EXPORT_SYMBOL_GPL(vmbus_set_event); |
