mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 13:30:45 -05:00
MSHV_VTL driver is going to be introduced, which is supposed to provide interface for Virtual Machine Monitors (VMMs) to control Virtual Trust Level (VTL). Export the symbols needed to make it work (vmbus_isr, hv_context and hv_post_message). Co-developed-by: Roman Kisel <romank@linux.microsoft.com> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> Co-developed-by: Saurabh Sengar <ssengar@linux.microsoft.com> Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com> Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202506110544.q0NDMQVc-lkp@intel.com/ Reviewed-by: Michael Kelley <mhklinux@outlook.com> Signed-off-by: Naman Jain <namjain@linux.microsoft.com> Signed-off-by: Wei Liu <wei.liu@kernel.org>
657 lines
18 KiB
C
657 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (c) 2009, Microsoft Corporation.
|
|
*
|
|
* Authors:
|
|
* Haiyang Zhang <haiyangz@microsoft.com>
|
|
* Hank Janssen <hjanssen@microsoft.com>
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/io.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/hyperv.h>
|
|
#include <linux/random.h>
|
|
#include <linux/clockchips.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/export.h>
|
|
#include <clocksource/hyperv_timer.h>
|
|
#include <asm/mshyperv.h>
|
|
#include <linux/set_memory.h>
|
|
#include "hyperv_vmbus.h"
|
|
|
|
/* The one and only */
|
|
struct hv_context hv_context;
|
|
EXPORT_SYMBOL_FOR_MODULES(hv_context, "mshv_vtl");
|
|
|
|
/*
|
|
* hv_init - Main initialization routine.
|
|
*
|
|
* This routine must be called before any other routines in here are called
|
|
*/
|
|
int hv_init(void)
|
|
{
|
|
hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
|
|
if (!hv_context.cpu_context)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* hv_post_message - Post a message using the hypervisor message IPC.
|
|
*
|
|
* This involves a hypercall.
|
|
*/
|
|
int hv_post_message(union hv_connection_id connection_id,
|
|
enum hv_message_type message_type,
|
|
void *payload, size_t payload_size)
|
|
{
|
|
struct hv_input_post_message *aligned_msg;
|
|
unsigned long flags;
|
|
u64 status;
|
|
|
|
if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
|
|
return -EMSGSIZE;
|
|
|
|
local_irq_save(flags);
|
|
|
|
/*
|
|
* A TDX VM with the paravisor must use the decrypted post_msg_page: see
|
|
* the comment in struct hv_per_cpu_context. A SNP VM with the paravisor
|
|
* can use the encrypted hyperv_pcpu_input_arg because it copies the
|
|
* input into the GHCB page, which has been decrypted by the paravisor.
|
|
*/
|
|
if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present)
|
|
aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page;
|
|
else
|
|
aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
|
|
aligned_msg->connectionid = connection_id;
|
|
aligned_msg->reserved = 0;
|
|
aligned_msg->message_type = message_type;
|
|
aligned_msg->payload_size = payload_size;
|
|
memcpy((void *)aligned_msg->payload, payload, payload_size);
|
|
|
|
if (ms_hyperv.paravisor_present && !vmbus_is_confidential()) {
|
|
/*
|
|
* If the VMBus isn't confidential, use the CoCo-specific
|
|
* mechanism to communicate with the hypervisor.
|
|
*/
|
|
if (hv_isolation_type_tdx())
|
|
status = hv_tdx_hypercall(HVCALL_POST_MESSAGE,
|
|
virt_to_phys(aligned_msg), 0);
|
|
else if (hv_isolation_type_snp())
|
|
status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
|
|
aligned_msg, NULL,
|
|
sizeof(*aligned_msg));
|
|
else
|
|
status = HV_STATUS_INVALID_PARAMETER;
|
|
} else {
|
|
u64 control = HVCALL_POST_MESSAGE;
|
|
|
|
control |= hv_nested ? HV_HYPERCALL_NESTED : 0;
|
|
/*
|
|
* If there is no paravisor, this will go to the hypervisor.
|
|
* In the Confidential VMBus case, there is the paravisor
|
|
* to which this will trap.
|
|
*/
|
|
status = hv_do_hypercall(control, aligned_msg, NULL);
|
|
}
|
|
|
|
local_irq_restore(flags);
|
|
|
|
return hv_result(status);
|
|
}
|
|
EXPORT_SYMBOL_FOR_MODULES(hv_post_message, "mshv_vtl");
|
|
|
|
static int hv_alloc_page(void **page, bool decrypt, const char *note)
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* After the page changes its encryption status, its contents might
|
|
* appear scrambled on some hardware. Thus `get_zeroed_page` would
|
|
* zero the page out in vain, so do that explicitly exactly once.
|
|
*
|
|
* By default, the page is allocated encrypted in a CoCo VM.
|
|
*/
|
|
*page = (void *)__get_free_page(GFP_KERNEL);
|
|
if (!*page)
|
|
return -ENOMEM;
|
|
|
|
if (decrypt)
|
|
ret = set_memory_decrypted((unsigned long)*page, 1);
|
|
if (ret)
|
|
goto failed;
|
|
|
|
memset(*page, 0, PAGE_SIZE);
|
|
return 0;
|
|
|
|
failed:
|
|
/*
|
|
* Report the failure but don't put the page back on the free list as
|
|
* its encryption status is unknown.
|
|
*/
|
|
pr_err("allocation failed for %s page, error %d, decrypted %d\n",
|
|
note, ret, decrypt);
|
|
*page = NULL;
|
|
return ret;
|
|
}
|
|
|
|
static int hv_free_page(void **page, bool encrypt, const char *note)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (!*page)
|
|
return 0;
|
|
|
|
if (encrypt)
|
|
ret = set_memory_encrypted((unsigned long)*page, 1);
|
|
|
|
/*
|
|
* In the case of the failure, the page is leaked. Something is wrong,
|
|
* prefer to lose the page with the unknown encryption status and stay afloat.
|
|
*/
|
|
if (ret)
|
|
pr_err("deallocation failed for %s page, error %d, encrypt %d\n",
|
|
note, ret, encrypt);
|
|
else
|
|
free_page((unsigned long)*page);
|
|
|
|
*page = NULL;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int hv_synic_alloc(void)
|
|
{
|
|
int cpu, ret = -ENOMEM;
|
|
struct hv_per_cpu_context *hv_cpu;
|
|
const bool decrypt = !vmbus_is_confidential();
|
|
|
|
/*
|
|
* First, zero all per-cpu memory areas so hv_synic_free() can
|
|
* detect what memory has been allocated and cleanup properly
|
|
* after any failures.
|
|
*/
|
|
for_each_present_cpu(cpu) {
|
|
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
memset(hv_cpu, 0, sizeof(*hv_cpu));
|
|
}
|
|
|
|
hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
|
|
GFP_KERNEL);
|
|
if (!hv_context.hv_numa_map) {
|
|
pr_err("Unable to allocate NUMA map\n");
|
|
goto err;
|
|
}
|
|
|
|
for_each_present_cpu(cpu) {
|
|
hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
|
|
tasklet_init(&hv_cpu->msg_dpc,
|
|
vmbus_on_msg_dpc, (unsigned long)hv_cpu);
|
|
|
|
if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) {
|
|
ret = hv_alloc_page(&hv_cpu->post_msg_page,
|
|
decrypt, "post msg");
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
|
|
/*
|
|
* If these SynIC pages are not allocated, SIEF and SIM pages
|
|
* are configured using what the root partition or the paravisor
|
|
* provides upon reading the SIEFP and SIMP registers.
|
|
*/
|
|
if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
|
|
ret = hv_alloc_page(&hv_cpu->hyp_synic_message_page,
|
|
decrypt, "hypervisor SynIC msg");
|
|
if (ret)
|
|
goto err;
|
|
ret = hv_alloc_page(&hv_cpu->hyp_synic_event_page,
|
|
decrypt, "hypervisor SynIC event");
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
|
|
if (vmbus_is_confidential()) {
|
|
ret = hv_alloc_page(&hv_cpu->para_synic_message_page,
|
|
false, "paravisor SynIC msg");
|
|
if (ret)
|
|
goto err;
|
|
ret = hv_alloc_page(&hv_cpu->para_synic_event_page,
|
|
false, "paravisor SynIC event");
|
|
if (ret)
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
/*
|
|
* Any memory allocations that succeeded will be freed when
|
|
* the caller cleans up by calling hv_synic_free()
|
|
*/
|
|
return ret;
|
|
}
|
|
|
|
void hv_synic_free(void)
|
|
{
|
|
int cpu;
|
|
const bool encrypt = !vmbus_is_confidential();
|
|
|
|
for_each_present_cpu(cpu) {
|
|
struct hv_per_cpu_context *hv_cpu =
|
|
per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
|
|
if (ms_hyperv.paravisor_present && hv_isolation_type_tdx())
|
|
hv_free_page(&hv_cpu->post_msg_page,
|
|
encrypt, "post msg");
|
|
if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
|
|
hv_free_page(&hv_cpu->hyp_synic_event_page,
|
|
encrypt, "hypervisor SynIC event");
|
|
hv_free_page(&hv_cpu->hyp_synic_message_page,
|
|
encrypt, "hypervisor SynIC msg");
|
|
}
|
|
if (vmbus_is_confidential()) {
|
|
hv_free_page(&hv_cpu->para_synic_event_page,
|
|
false, "paravisor SynIC event");
|
|
hv_free_page(&hv_cpu->para_synic_message_page,
|
|
false, "paravisor SynIC msg");
|
|
}
|
|
}
|
|
|
|
kfree(hv_context.hv_numa_map);
|
|
}
|
|
|
|
/*
|
|
* hv_hyp_synic_enable_regs - Initialize the Synthetic Interrupt Controller
|
|
* with the hypervisor.
|
|
*/
|
|
void hv_hyp_synic_enable_regs(unsigned int cpu)
|
|
{
|
|
struct hv_per_cpu_context *hv_cpu =
|
|
per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
union hv_synic_simp simp;
|
|
union hv_synic_siefp siefp;
|
|
union hv_synic_sint shared_sint;
|
|
|
|
/* Setup the Synic's message page with the hypervisor. */
|
|
simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
|
|
simp.simp_enabled = 1;
|
|
|
|
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
|
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
|
|
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
|
|
~ms_hyperv.shared_gpa_boundary;
|
|
hv_cpu->hyp_synic_message_page =
|
|
(void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
|
|
if (!hv_cpu->hyp_synic_message_page)
|
|
pr_err("Fail to map synic message page.\n");
|
|
} else {
|
|
simp.base_simp_gpa = virt_to_phys(hv_cpu->hyp_synic_message_page)
|
|
>> HV_HYP_PAGE_SHIFT;
|
|
}
|
|
|
|
hv_set_msr(HV_MSR_SIMP, simp.as_uint64);
|
|
|
|
/* Setup the Synic's event page with the hypervisor. */
|
|
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
|
|
siefp.siefp_enabled = 1;
|
|
|
|
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
|
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
|
|
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
|
|
~ms_hyperv.shared_gpa_boundary;
|
|
hv_cpu->hyp_synic_event_page =
|
|
(void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
|
|
if (!hv_cpu->hyp_synic_event_page)
|
|
pr_err("Fail to map synic event page.\n");
|
|
} else {
|
|
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->hyp_synic_event_page)
|
|
>> HV_HYP_PAGE_SHIFT;
|
|
}
|
|
|
|
hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64);
|
|
hv_enable_coco_interrupt(cpu, vmbus_interrupt, true);
|
|
|
|
/* Setup the shared SINT. */
|
|
if (vmbus_irq != -1)
|
|
enable_percpu_irq(vmbus_irq, 0);
|
|
shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT);
|
|
|
|
shared_sint.vector = vmbus_interrupt;
|
|
shared_sint.masked = false;
|
|
shared_sint.auto_eoi = hv_recommend_using_aeoi();
|
|
hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
|
}
|
|
|
|
static void hv_hyp_synic_enable_interrupts(void)
|
|
{
|
|
union hv_synic_scontrol sctrl;
|
|
|
|
/* Enable the global synic bit */
|
|
sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
|
|
sctrl.enable = 1;
|
|
|
|
hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
|
|
}
|
|
|
|
static void hv_para_synic_enable_regs(unsigned int cpu)
|
|
{
|
|
union hv_synic_simp simp;
|
|
union hv_synic_siefp siefp;
|
|
struct hv_per_cpu_context *hv_cpu
|
|
= per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
|
|
/* Setup the Synic's message page with the paravisor. */
|
|
simp.as_uint64 = hv_para_get_synic_register(HV_MSR_SIMP);
|
|
simp.simp_enabled = 1;
|
|
simp.base_simp_gpa = virt_to_phys(hv_cpu->para_synic_message_page)
|
|
>> HV_HYP_PAGE_SHIFT;
|
|
hv_para_set_synic_register(HV_MSR_SIMP, simp.as_uint64);
|
|
|
|
/* Setup the Synic's event page with the paravisor. */
|
|
siefp.as_uint64 = hv_para_get_synic_register(HV_MSR_SIEFP);
|
|
siefp.siefp_enabled = 1;
|
|
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->para_synic_event_page)
|
|
>> HV_HYP_PAGE_SHIFT;
|
|
hv_para_set_synic_register(HV_MSR_SIEFP, siefp.as_uint64);
|
|
}
|
|
|
|
static void hv_para_synic_enable_interrupts(void)
|
|
{
|
|
union hv_synic_scontrol sctrl;
|
|
|
|
/* Enable the global synic bit */
|
|
sctrl.as_uint64 = hv_para_get_synic_register(HV_MSR_SCONTROL);
|
|
sctrl.enable = 1;
|
|
hv_para_set_synic_register(HV_MSR_SCONTROL, sctrl.as_uint64);
|
|
}
|
|
|
|
int hv_synic_init(unsigned int cpu)
|
|
{
|
|
if (vmbus_is_confidential())
|
|
hv_para_synic_enable_regs(cpu);
|
|
|
|
/*
|
|
* The SINT is set in hv_hyp_synic_enable_regs() by calling
|
|
* hv_set_msr(). hv_set_msr() in turn has special case code for the
|
|
* SINT MSRs that write to the hypervisor version of the MSR *and*
|
|
* the paravisor version of the MSR (but *without* the proxy bit when
|
|
* VMBus is confidential).
|
|
*
|
|
* Then enable interrupts via the paravisor if VMBus is confidential,
|
|
* and otherwise via the hypervisor.
|
|
*/
|
|
|
|
hv_hyp_synic_enable_regs(cpu);
|
|
if (vmbus_is_confidential())
|
|
hv_para_synic_enable_interrupts();
|
|
else
|
|
hv_hyp_synic_enable_interrupts();
|
|
|
|
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void hv_hyp_synic_disable_regs(unsigned int cpu)
|
|
{
|
|
struct hv_per_cpu_context *hv_cpu =
|
|
per_cpu_ptr(hv_context.cpu_context, cpu);
|
|
union hv_synic_sint shared_sint;
|
|
union hv_synic_simp simp;
|
|
union hv_synic_siefp siefp;
|
|
|
|
shared_sint.as_uint64 = hv_get_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT);
|
|
|
|
shared_sint.masked = 1;
|
|
|
|
/* Need to correctly cleanup in the case of SMP!!! */
|
|
/* Disable the interrupt */
|
|
hv_set_msr(HV_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
|
|
hv_enable_coco_interrupt(cpu, vmbus_interrupt, false);
|
|
|
|
simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
|
|
/*
|
|
* In Isolation VM, simp and sief pages are allocated by
|
|
* paravisor. These pages also will be used by kdump
|
|
* kernel. So just reset enable bit here and keep page
|
|
* addresses.
|
|
*/
|
|
simp.simp_enabled = 0;
|
|
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
|
if (hv_cpu->hyp_synic_message_page) {
|
|
iounmap(hv_cpu->hyp_synic_message_page);
|
|
hv_cpu->hyp_synic_message_page = NULL;
|
|
}
|
|
} else {
|
|
simp.base_simp_gpa = 0;
|
|
}
|
|
|
|
hv_set_msr(HV_MSR_SIMP, simp.as_uint64);
|
|
|
|
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
|
|
siefp.siefp_enabled = 0;
|
|
|
|
if (ms_hyperv.paravisor_present || hv_root_partition()) {
|
|
if (hv_cpu->hyp_synic_event_page) {
|
|
iounmap(hv_cpu->hyp_synic_event_page);
|
|
hv_cpu->hyp_synic_event_page = NULL;
|
|
}
|
|
} else {
|
|
siefp.base_siefp_gpa = 0;
|
|
}
|
|
|
|
hv_set_msr(HV_MSR_SIEFP, siefp.as_uint64);
|
|
}
|
|
|
|
static void hv_hyp_synic_disable_interrupts(void)
|
|
{
|
|
union hv_synic_scontrol sctrl;
|
|
|
|
/* Disable the global synic bit */
|
|
sctrl.as_uint64 = hv_get_msr(HV_MSR_SCONTROL);
|
|
sctrl.enable = 0;
|
|
hv_set_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
|
|
}
|
|
|
|
static void hv_para_synic_disable_regs(unsigned int cpu)
|
|
{
|
|
union hv_synic_simp simp;
|
|
union hv_synic_siefp siefp;
|
|
|
|
/* Disable SynIC's message page in the paravisor. */
|
|
simp.as_uint64 = hv_para_get_synic_register(HV_MSR_SIMP);
|
|
simp.simp_enabled = 0;
|
|
hv_para_set_synic_register(HV_MSR_SIMP, simp.as_uint64);
|
|
|
|
/* Disable SynIC's event page in the paravisor. */
|
|
siefp.as_uint64 = hv_para_get_synic_register(HV_MSR_SIEFP);
|
|
siefp.siefp_enabled = 0;
|
|
hv_para_set_synic_register(HV_MSR_SIEFP, siefp.as_uint64);
|
|
}
|
|
|
|
static void hv_para_synic_disable_interrupts(void)
|
|
{
|
|
union hv_synic_scontrol sctrl;
|
|
|
|
/* Disable the global synic bit */
|
|
sctrl.as_uint64 = hv_para_get_synic_register(HV_MSR_SCONTROL);
|
|
sctrl.enable = 0;
|
|
hv_para_set_synic_register(HV_MSR_SCONTROL, sctrl.as_uint64);
|
|
}
|
|
|
|
#define HV_MAX_TRIES 3
|
|
/*
|
|
* Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one
|
|
* bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times.
|
|
* Return 'true', if there is still any set bit after this operation; 'false', otherwise.
|
|
*
|
|
* If a bit is set, that means there is a pending channel interrupt. The expectation is
|
|
* that the normal interrupt handling mechanism will find and process the channel interrupt
|
|
* "very soon", and in the process clear the bit.
|
|
*/
|
|
static bool __hv_synic_event_pending(union hv_synic_event_flags *event, int sint)
|
|
{
|
|
unsigned long *recv_int_page;
|
|
bool pending;
|
|
u32 relid;
|
|
int tries = 0;
|
|
|
|
if (!event)
|
|
return false;
|
|
|
|
event += sint;
|
|
recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
|
|
retry:
|
|
pending = false;
|
|
for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
|
|
/* Special case - VMBus channel protocol messages */
|
|
if (relid == 0)
|
|
continue;
|
|
pending = true;
|
|
break;
|
|
}
|
|
if (pending && tries++ < HV_MAX_TRIES) {
|
|
usleep_range(10000, 20000);
|
|
goto retry;
|
|
}
|
|
return pending;
|
|
}
|
|
|
|
static bool hv_synic_event_pending(void)
|
|
{
|
|
struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
|
|
union hv_synic_event_flags *hyp_synic_event_page = hv_cpu->hyp_synic_event_page;
|
|
union hv_synic_event_flags *para_synic_event_page = hv_cpu->para_synic_event_page;
|
|
|
|
return
|
|
__hv_synic_event_pending(hyp_synic_event_page, VMBUS_MESSAGE_SINT) ||
|
|
__hv_synic_event_pending(para_synic_event_page, VMBUS_MESSAGE_SINT);
|
|
}
|
|
|
|
static int hv_pick_new_cpu(struct vmbus_channel *channel)
|
|
{
|
|
int ret = -EBUSY;
|
|
int start;
|
|
int cpu;
|
|
|
|
lockdep_assert_cpus_held();
|
|
lockdep_assert_held(&vmbus_connection.channel_mutex);
|
|
|
|
/*
|
|
* We can't assume that the relevant interrupts will be sent before
|
|
* the cpu is offlined on older versions of hyperv.
|
|
*/
|
|
if (vmbus_proto_version < VERSION_WIN10_V5_3)
|
|
return -EBUSY;
|
|
|
|
start = get_random_u32_below(nr_cpu_ids);
|
|
|
|
for_each_cpu_wrap(cpu, cpu_online_mask, start) {
|
|
if (channel->target_cpu == cpu ||
|
|
channel->target_cpu == VMBUS_CONNECT_CPU)
|
|
continue;
|
|
|
|
ret = vmbus_channel_set_cpu(channel, cpu);
|
|
if (!ret)
|
|
break;
|
|
}
|
|
|
|
if (ret)
|
|
ret = vmbus_channel_set_cpu(channel, VMBUS_CONNECT_CPU);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
|
|
*/
|
|
int hv_synic_cleanup(unsigned int cpu)
|
|
{
|
|
struct vmbus_channel *channel, *sc;
|
|
int ret = 0;
|
|
|
|
if (vmbus_connection.conn_state != CONNECTED)
|
|
goto always_cleanup;
|
|
|
|
/*
|
|
* Hyper-V does not provide a way to change the connect CPU once
|
|
* it is set; we must prevent the connect CPU from going offline
|
|
* while the VM is running normally. But in the panic or kexec()
|
|
* path where the vmbus is already disconnected, the CPU must be
|
|
* allowed to shut down.
|
|
*/
|
|
if (cpu == VMBUS_CONNECT_CPU)
|
|
return -EBUSY;
|
|
|
|
/*
|
|
* Search for channels which are bound to the CPU we're about to
|
|
* cleanup.
|
|
*/
|
|
mutex_lock(&vmbus_connection.channel_mutex);
|
|
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
|
|
if (channel->target_cpu == cpu) {
|
|
ret = hv_pick_new_cpu(channel);
|
|
if (ret) {
|
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
|
return ret;
|
|
}
|
|
}
|
|
list_for_each_entry(sc, &channel->sc_list, sc_list) {
|
|
if (sc->target_cpu == cpu) {
|
|
ret = hv_pick_new_cpu(sc);
|
|
if (ret) {
|
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
|
return ret;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
mutex_unlock(&vmbus_connection.channel_mutex);
|
|
|
|
/*
|
|
* Scan the event flags page looking for bits that are set and waiting
|
|
* with a timeout for vmbus_chan_sched() to process such bits. If bits
|
|
* are still set after this operation and VMBus is connected, fail the
|
|
* CPU offlining operation.
|
|
*/
|
|
if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
|
|
return -EBUSY;
|
|
|
|
always_cleanup:
|
|
hv_stimer_legacy_cleanup(cpu);
|
|
|
|
/*
|
|
* First, disable the event and message pages
|
|
* used for communicating with the host, and then
|
|
* disable the host interrupts if VMBus is not
|
|
* confidential.
|
|
*/
|
|
hv_hyp_synic_disable_regs(cpu);
|
|
if (!vmbus_is_confidential())
|
|
hv_hyp_synic_disable_interrupts();
|
|
|
|
/*
|
|
* Perform the same steps for the Confidential VMBus.
|
|
* The sequencing provides the guarantee that no data
|
|
* may be posted for processing before disabling interrupts.
|
|
*/
|
|
if (vmbus_is_confidential()) {
|
|
hv_para_synic_disable_regs(cpu);
|
|
hv_para_synic_disable_interrupts();
|
|
}
|
|
if (vmbus_irq != -1)
|
|
disable_percpu_irq(vmbus_irq);
|
|
|
|
return ret;
|
|
}
|