From 4cd661c248b6671914ad59e16760bb6d908dfc61 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Wed, 13 Aug 2025 11:20:57 -0700 Subject: [PATCH 01/19] hyperv: Add missing field to hv_output_map_device_interrupt This field is unused, but the correct structure size is needed when computing the amount of space for the output argument to reside, so that it does not cross a page boundary. Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- include/hyperv/hvhdk_mini.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index 42e7876455b5..858f6a3925b3 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -301,6 +301,7 @@ struct hv_input_map_device_interrupt { /* HV_OUTPUT_MAP_DEVICE_INTERRUPT */ struct hv_output_map_device_interrupt { struct hv_interrupt_entry interrupt_entry; + u64 ext_status_deprecated[5]; } __packed; /* HV_INPUT_UNMAP_DEVICE_INTERRUPT */ From 47691ced158ab3a7ce2189b857b19c0c99a9aa80 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 12 Aug 2025 19:48:45 +0000 Subject: [PATCH 02/19] clocksource: hyper-v: Skip unnecessary checks for the root partition The HV_ACCESS_TSC_INVARIANT bit is always zero when Linux runs as the root partition. The root partition will see directly what the hardware provides. The old logic in ms_hyperv_init_platform caused the native TSC clock source to be incorrectly marked as unstable on x86. Fix it. Skip the unnecessary checks in code for the root partition. Add one extra comment in code to clarify the behavior. Reviewed-by: Nuno Das Neves Signed-off-by: Wei Liu --- arch/x86/kernel/cpu/mshyperv.c | 11 ++++++++++- drivers/clocksource/hyperv_timer.c | 10 +++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c78f860419d6..25773af116bc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -565,6 +565,11 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif #endif + /* + * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. Root + * partition doesn't need to write to synthetic MSR to enable invariant + * TSC feature. It sees what the hardware provides. + */ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { /* * Writing to synthetic MSR 0x40000118 updates/changes the @@ -636,8 +641,12 @@ static void __init ms_hyperv_init_platform(void) * TSC should be marked as unstable only after Hyper-V * clocksource has been initialized. This ensures that the * stability of the sched_clock is not altered. + * + * HV_ACCESS_TSC_INVARIANT is always zero for the root partition. No + * need to check for it. */ - if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + if (!hv_root_partition() && + !(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) mark_tsc_unstable("running on Hyper-V"); hardlockup_detector_disable(); diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 2edc13ca184e..10356d4ec55c 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -549,14 +549,22 @@ static void __init hv_init_tsc_clocksource(void) union hv_reference_tsc_msr tsc_msr; /* + * When running as a guest partition: + * * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly * handles frequency and offset changes due to live migration, * pause/resume, and other VM management operations. So lower the * Hyper-V Reference TSC rating, causing the generic TSC to be used. * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference * TSC will be preferred over the virtualized ARM64 arch counter. + * + * When running as the root partition: + * + * There is no HV_ACCESS_TSC_INVARIANT feature. Always lower the rating + * of the Hyper-V Reference TSC. */ - if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { + if ((ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) || + hv_root_partition()) { hyperv_cs_tsc.rating = 250; hyperv_cs_msr.rating = 245; } From f26c9306dff818bbf4ef545c5a5ee0eca7149922 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Tue, 2 Sep 2025 16:48:33 -0700 Subject: [PATCH 03/19] mshv: Add support for a new parent partition configuration Detect booting as an "L1VH" partition. This is a new scenario very similar to root partition where the mshv_root driver can be used to create and manage guest partitions. It mostly works the same as root partition, but there are some differences in how various features are handled. hv_l1vh_partition() is introduced to handle these cases. Add hv_parent_partition() which returns true for either case, replacing some hv_root_partition() checks. Signed-off-by: Nuno Das Neves Acked-by: Wei Liu Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- drivers/hv/hv_common.c | 22 +++++++++++++--------- drivers/hv/mshv_root_main.c | 26 +++++++++++++------------- include/asm-generic/mshyperv.h | 11 +++++++++++ 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 49898d10faff..e109a620c83f 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -257,7 +257,7 @@ static void hv_kmsg_dump_register(void) static inline bool hv_output_page_exists(void) { - return hv_root_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); + return hv_parent_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE); } void __init hv_get_partition_id(void) @@ -377,7 +377,7 @@ int __init hv_common_init(void) BUG_ON(!hyperv_pcpu_output_arg); } - if (hv_root_partition()) { + if (hv_parent_partition()) { hv_synic_eventring_tail = alloc_percpu(u8 *); BUG_ON(!hv_synic_eventring_tail); } @@ -531,7 +531,7 @@ int hv_common_cpu_init(unsigned int cpu) if (msr_vp_index > hv_max_vp_index) hv_max_vp_index = msr_vp_index; - if (hv_root_partition()) { + if (hv_parent_partition()) { synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail); *synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT, sizeof(u8), flags); @@ -558,7 +558,7 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - if (hv_root_partition()) { + if (hv_parent_partition()) { synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); kfree(*synic_eventring_tail); *synic_eventring_tail = NULL; @@ -729,13 +729,17 @@ void hv_identify_partition_type(void) * the root partition setting if also a Confidential VM. */ if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) && - (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && !(ms_hyperv.priv_high & HV_ISOLATION)) { - pr_info("Hyper-V: running as root partition\n"); - if (IS_ENABLED(CONFIG_MSHV_ROOT)) - hv_curr_partition_type = HV_PARTITION_TYPE_ROOT; - else + + if (!IS_ENABLED(CONFIG_MSHV_ROOT)) { pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n"); + } else if (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) { + pr_info("Hyper-V: running as root partition\n"); + hv_curr_partition_type = HV_PARTITION_TYPE_ROOT; + } else { + pr_info("Hyper-V: running as L1VH partition\n"); + hv_curr_partition_type = HV_PARTITION_TYPE_L1VH; + } } } diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 72df774e410a..aa20a5c96afa 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -37,12 +37,6 @@ MODULE_AUTHOR("Microsoft"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv"); -/* TODO move this to mshyperv.h when needed outside driver */ -static inline bool hv_parent_partition(void) -{ - return hv_root_partition(); -} - /* TODO move this to another file when debugfs code is added */ enum hv_stats_vp_counters { /* HV_THREAD_COUNTER */ #if defined(CONFIG_X86) @@ -2074,9 +2068,13 @@ static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out) /* Retrieve and stash the supported scheduler type */ static int __init mshv_retrieve_scheduler_type(struct device *dev) { - int ret; + int ret = 0; + + if (hv_l1vh_partition()) + hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT; + else + ret = hv_retrieve_scheduler_type(&hv_scheduler_type); - ret = hv_retrieve_scheduler_type(&hv_scheduler_type); if (ret) return ret; @@ -2203,9 +2201,6 @@ static int __init mshv_root_partition_init(struct device *dev) { int err; - if (mshv_retrieve_scheduler_type(dev)) - return -ENODEV; - err = root_scheduler_init(dev); if (err) return err; @@ -2227,7 +2222,7 @@ static int __init mshv_parent_partition_init(void) struct device *dev; union hv_hypervisor_version_info version_info; - if (!hv_root_partition() || is_kdump_kernel()) + if (!hv_parent_partition() || is_kdump_kernel()) return -ENODEV; if (hv_get_hypervisor_version(&version_info)) @@ -2264,7 +2259,12 @@ static int __init mshv_parent_partition_init(void) mshv_cpuhp_online = ret; - ret = mshv_root_partition_init(dev); + ret = mshv_retrieve_scheduler_type(dev); + if (ret) + goto remove_cpu_state; + + if (hv_root_partition()) + ret = mshv_root_partition_init(dev); if (ret) goto remove_cpu_state; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index a729b77983fa..dbd4c2f3aee3 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -31,6 +31,7 @@ enum hv_partition_type { HV_PARTITION_TYPE_GUEST, HV_PARTITION_TYPE_ROOT, + HV_PARTITION_TYPE_L1VH, }; struct ms_hyperv_info { @@ -354,12 +355,22 @@ static inline bool hv_root_partition(void) { return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT; } +static inline bool hv_l1vh_partition(void) +{ + return hv_curr_partition_type == HV_PARTITION_TYPE_L1VH; +} +static inline bool hv_parent_partition(void) +{ + return hv_root_partition() || hv_l1vh_partition(); +} int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); #else /* CONFIG_MSHV_ROOT */ static inline bool hv_root_partition(void) { return false; } +static inline bool hv_l1vh_partition(void) { return false; } +static inline bool hv_parent_partition(void) { return false; } static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) { return -EOPNOTSUPP; From ac7b0a5cd331862c3d4d49eae71ed93f8f5082f3 Mon Sep 17 00:00:00 2001 From: Abhishek Tiwari Date: Mon, 1 Sep 2025 05:00:00 -0700 Subject: [PATCH 04/19] Drivers: hv: util: Cosmetic changes for hv_utils_transport.c Fix issues reported by checkpatch.pl script for hv_utils_transport.c file - Update pr_warn() calls to use __func__ for consistent logging context. - else should follow close brace '}' No functional changes intended. Signed-off-by: Abhishek Tiwari Reviewed-by: Naman Jain Signed-off-by: Wei Liu --- drivers/hv/hv_utils_transport.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index 832885198643..b3de35ff6334 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -129,8 +129,7 @@ static int hvt_op_open(struct inode *inode, struct file *file) * device gets released. */ hvt->mode = HVUTIL_TRANSPORT_CHARDEV; - } - else if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) { + } else if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) { /* * We're switching from netlink communication to using char * device. Issue the reset first. @@ -195,7 +194,7 @@ static void hvt_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) } spin_unlock(&hvt_list_lock); if (!hvt_found) { - pr_warn("hvt_cn_callback: spurious message received!\n"); + pr_warn("%s: spurious message received!\n", __func__); return; } @@ -210,7 +209,7 @@ static void hvt_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) hvt_found->on_msg(msg->data, msg->len); else - pr_warn("hvt_cn_callback: unexpected netlink message!\n"); + pr_warn("%s: unexpected netlink message!\n", __func__); mutex_unlock(&hvt->lock); } @@ -260,8 +259,9 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len, hvt->outmsg_len = len; hvt->on_read = on_read_cb; wake_up_interruptible(&hvt->outmsg_q); - } else + } else { ret = -ENOMEM; + } out_unlock: mutex_unlock(&hvt->lock); return ret; From 2d0ddbb65cef99aab241378b0f4ff2d6ea8c3a5a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 31 Aug 2025 09:04:06 -0700 Subject: [PATCH 05/19] Drivers: hv: Simplify data structures for VMBus channel close message struct vmbus_close_msg is used for sending the VMBus channel close message. It contains a struct vmbus_channel_msginfo, which has a flex array member at the end. The latter's presence in the middle of struct vmbus_close_msg causes warnings when built with -Wflex-array-member-not-at-end. But the struct vmbus_channel_msginfo is unused because the Hyper-V host does not send a response to the channel close message. So remove the struct vmbus_channel_msginfo. Then, since the only remaining field is struct vmbus_channel_close_channel, also remove the containing struct vmbus_close_msg and directly use struct vmbus_channel_close_channel. Besides eliminating unnecessary complexity, these changes resolve the -Wflex-array-member-not-at-end warnings. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Wei Liu --- drivers/hv/channel.c | 2 +- include/linux/hyperv.h | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 7c7c66e0dc3f..162d6aeece7b 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -925,7 +925,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) /* Send a closing message */ - msg = &channel->close_msg.msg; + msg = &channel->close_msg; msg->header.msgtype = CHANNELMSG_CLOSECHANNEL; msg->child_relid = channel->offermsg.child_relid; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a59c5c3e95fb..59826c89171c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -707,11 +707,6 @@ struct vmbus_channel_msginfo { unsigned char msg[]; }; -struct vmbus_close_msg { - struct vmbus_channel_msginfo info; - struct vmbus_channel_close_channel msg; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -800,7 +795,7 @@ struct vmbus_channel { struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ - struct vmbus_close_msg close_msg; + struct vmbus_channel_close_channel close_msg; /* Statistics */ u64 interrupts; /* Host to Guest interrupts */ From 7ad8c34f2435137e2a0dfd0a5dd000e219c642ce Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 28 Aug 2025 12:16:18 +0300 Subject: [PATCH 06/19] x86/hyperv: Add kexec/kdump support on Azure CVMs Azure CVM instance types featuring a paravisor hang upon kdump. The investigation shows that makedumpfile causes a hang when it steps on a page which was previously share with the host (HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY). The new kernel has no knowledge of these 'special' regions (which are Vmbus connection pages, GPADL buffers, ...). There are several ways to approach the issue: - Convey the knowledge about these regions to the new kernel somehow. - Unshare these regions before accessing in the new kernel (it is unclear if there's a way to query the status for a given GPA range). - Unshare these regions before jumping to the new kernel (which this patch implements). To make the procedure as robust as possible, store PFN ranges of shared regions in a linked list instead of storing GVAs and re-using hv_vtom_set_host_visibility(). This also allows to avoid memory allocation on the kdump/kexec path. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Wei Liu --- arch/x86/hyperv/ivm.c | 211 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index ade6c665c97e..a4615b889f3e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -462,6 +462,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value) hv_ghcb_msr_read(msr, value); } +/* + * Keep track of the PFN regions which were shared with the host. The access + * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()). + */ +struct hv_enc_pfn_region { + struct list_head list; + u64 pfn; + int count; +}; + +static LIST_HEAD(hv_list_enc); +static DEFINE_RAW_SPINLOCK(hv_list_enc_lock); + +static int hv_list_enc_add(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + /* Check if the PFN already exists in some region first */ + list_for_each_entry(ent, &hv_list_enc, list) { + if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn)) + /* Nothing to do - pfn is already in the list */ + goto unlock_done; + } + + /* + * Check if the PFN is adjacent to an existing region. Growing + * a region can make it adjacent to another one but merging is + * not (yet) implemented for simplicity. A PFN cannot be added + * to two regions to keep the logic in hv_list_enc_remove() + * correct. + */ + list_for_each_entry(ent, &hv_list_enc, list) { + if (ent->pfn + ent->count == pfn) { + /* Grow existing region up */ + ent->count++; + goto unlock_done; + } else if (pfn + 1 == ent->pfn) { + /* Grow existing region down */ + ent->pfn--; + ent->count++; + goto unlock_done; + } + } + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + /* No adjacent region found -- create a new one */ + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = pfn; + ent->count = 1; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +static int hv_list_enc_remove(const u64 *pfn_list, int count) +{ + struct hv_enc_pfn_region *ent, *t; + struct hv_enc_pfn_region new_region; + unsigned long flags; + u64 pfn; + int i; + + for (i = 0; i < count; i++) { + pfn = pfn_list[i]; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_for_each_entry_safe(ent, t, &hv_list_enc, list) { + if (pfn == ent->pfn + ent->count - 1) { + /* Removing tail pfn */ + ent->count--; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn == ent->pfn) { + /* Removing head pfn */ + ent->count--; + ent->pfn++; + if (!ent->count) { + list_del(&ent->list); + kfree(ent); + } + goto unlock_done; + } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) { + /* + * Removing a pfn in the middle. Cut off the tail + * of the existing region and create a template for + * the new one. + */ + new_region.pfn = pfn + 1; + new_region.count = ent->count - (pfn - ent->pfn + 1); + ent->count = pfn - ent->pfn; + goto unlock_split; + } + + } +unlock_done: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + continue; + +unlock_split: + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + + ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->pfn = new_region.pfn; + ent->count = new_region.count; + + raw_spin_lock_irqsave(&hv_list_enc_lock, flags); + list_add(&ent->list, &hv_list_enc); + raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); + } + + return 0; +} + +/* Stop new private<->shared conversions */ +static void hv_vtom_kexec_begin(void) +{ + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* + * Crash kernel reaches here with interrupts disabled: can't wait for + * conversions to finish. + * + * If race happened, just report and proceed. + */ + if (!set_memory_enc_stop_conversion()) + pr_warn("Failed to stop shared<->private conversions\n"); +} + +static void hv_vtom_kexec_finish(void) +{ + struct hv_gpa_range_for_visibility *input; + struct hv_enc_pfn_region *ent; + unsigned long flags; + u64 hv_status; + int cur, i; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + + if (unlikely(!input)) + goto out; + + list_for_each_entry(ent, &hv_list_enc, list) { + for (i = 0, cur = 0; i < ent->count; i++) { + input->gpa_page_list[cur] = ent->pfn + i; + cur++; + + if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) { + input->partition_id = HV_PARTITION_ID_SELF; + input->host_visibility = VMBUS_PAGE_NOT_VISIBLE; + input->reserved0 = 0; + input->reserved1 = 0; + hv_status = hv_do_rep_hypercall( + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, + cur, 0, input, NULL); + WARN_ON_ONCE(!hv_result_success(hv_status)); + cur = 0; + } + } + + } + +out: + local_irq_restore(flags); +} + /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * @@ -475,6 +664,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], struct hv_gpa_range_for_visibility *input; u64 hv_status; unsigned long flags; + int ret; /* no-op if partition isolation is not enabled */ if (!hv_is_isolation_supported()) @@ -486,6 +676,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], return -EINVAL; } + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_remove(pfn, count); + else + ret = hv_list_enc_add(pfn, count); + if (ret) + return ret; + local_irq_save(flags); input = *this_cpu_ptr(hyperv_pcpu_input_arg); @@ -506,8 +703,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], if (hv_result_success(hv_status)) return 0; + + if (visibility == VMBUS_PAGE_NOT_VISIBLE) + ret = hv_list_enc_add(pfn, count); else - return -EFAULT; + ret = hv_list_enc_remove(pfn, count); + /* + * There's no good way to recover from -ENOMEM here, the accounting is + * wrong either way. + */ + WARN_ON_ONCE(ret); + + return -EFAULT; } /* @@ -669,6 +876,8 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; + x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin; + x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish; /* Set WB as the default cache mode. */ guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); From 0ebac01a00be972020c002a7fe0bb6b6fca8410f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:50 -0700 Subject: [PATCH 07/19] mshv: Handle NEED_RESCHED_LAZY before transferring to guest Check for NEED_RESCHED_LAZY, not just NEED_RESCHED, prior to transferring control to a guest. Failure to check for lazy resched can unnecessarily delay rescheduling until the next tick when using a lazy preemption model. Note, ideally both the checking and processing of TIF bits would be handled in common code, to avoid having to keep three separate paths synchronized, but defer such cleanups to the future to keep the fix as standalone as possible. Cc: Nuno Das Neves Cc: Mukesh R Fixes: 621191d709b1 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs") Signed-off-by: Sean Christopherson Tested-by: Nuno Das Neves Reviewed-by: Nuno Das Neves Signed-off-by: Wei Liu --- drivers/hv/mshv_common.c | 2 +- drivers/hv/mshv_root_main.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c index 6f227a8a5af7..eb3df3e296bb 100644 --- a/drivers/hv/mshv_common.c +++ b/drivers/hv/mshv_common.c @@ -151,7 +151,7 @@ int mshv_do_pre_guest_mode_work(ulong th_flags) if (th_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) return -EINTR; - if (th_flags & _TIF_NEED_RESCHED) + if (th_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) schedule(); if (th_flags & _TIF_NOTIFY_RESUME) diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index aa20a5c96afa..3d0d5af80d2e 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -484,7 +484,8 @@ mshv_vp_wait_for_hv_kick(struct mshv_vp *vp) static int mshv_pre_guest_mode_work(struct mshv_vp *vp) { const ulong work_flags = _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | - _TIF_NEED_RESCHED | _TIF_NOTIFY_RESUME; + _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | + _TIF_NOTIFY_RESUME; ulong th_flags; th_flags = read_thread_flags(); From 6d0386ea99875313fdfd074eb74013b6e3b48a76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:53 -0700 Subject: [PATCH 08/19] entry/kvm: KVM: Move KVM details related to signal/-EINTR into KVM proper Move KVM's morphing of pending signals into userspace exits into KVM proper, and drop the @vcpu param from xfer_to_guest_mode_handle_work(). How KVM responds to -EINTR is a detail that really belongs in KVM itself, and invoking kvm_handle_signal_exit() from kernel code creates an inverted module dependency. E.g. attempting to move kvm_handle_signal_exit() into kvm_main.c would generate an linker error when building kvm.ko as a module. Dropping KVM details will also converting the KVM "entry" code into a more generic virtualization framework so that it can be used when running as a Hyper-V root partition. Lastly, eliminating usage of "struct kvm_vcpu" outside of KVM is also nice to have for KVM x86 developers, as keeping the details of kvm_vcpu purely within KVM allows changing the layout of the structure without having to boot into a new kernel, e.g. allows rebuilding and reloading kvm.ko with a modified kvm_vcpu structure as part of debug/development. Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Signed-off-by: Wei Liu --- arch/arm64/kvm/arm.c | 3 +-- arch/loongarch/kvm/vcpu.c | 3 +-- arch/riscv/kvm/vcpu.c | 3 +-- arch/x86/kvm/vmx/vmx.c | 1 - arch/x86/kvm/x86.c | 3 +-- include/linux/entry-kvm.h | 11 +++-------- include/linux/kvm_host.h | 13 ++++++++++++- kernel/entry/kvm.c | 13 +++++-------- 8 files changed, 24 insertions(+), 26 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 888f7c7abf54..418fd3043467 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -1177,7 +1176,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (!ret) ret = 1; diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index d1b8c50941ca..514256b25ba1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include @@ -251,7 +250,7 @@ static int kvm_enter_guest_check(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret < 0) return ret; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index f001e56403f9..251e787f2ebc 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -910,7 +909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - ret = xfer_to_guest_mode_handle_work(vcpu); + ret = kvm_xfer_to_guest_mode_handle_work(vcpu); if (ret) continue; ret = 1; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..d7c86613e50a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..0b13b8bf69e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include @@ -11241,7 +11240,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (__xfer_to_guest_mode_work_pending()) { kvm_vcpu_srcu_read_unlock(vcpu); - r = xfer_to_guest_mode_handle_work(vcpu); + r = kvm_xfer_to_guest_mode_handle_work(vcpu); kvm_vcpu_srcu_read_lock(vcpu); if (r) return r; diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 16149f6625e4..3644de7e6019 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -21,8 +21,6 @@ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ ARCH_XFER_TO_GUEST_MODE_WORK) -struct kvm_vcpu; - /** * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest * mode work handling function. @@ -32,12 +30,10 @@ struct kvm_vcpu; * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be * replaced by architecture specific code. */ -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work); +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); #ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work) +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; } @@ -46,11 +42,10 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, /** * xfer_to_guest_mode_handle_work - Check and handle pending work which needs * to be handled before going to guest mode - * @vcpu: Pointer to current's VCPU data * * Returns: 0 or an error code */ -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +int xfer_to_guest_mode_handle_work(void); /** * xfer_to_guest_mode_prepare - Perform last minute preparation work that diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..598b9473e46d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H - +#include #include #include #include @@ -2450,6 +2450,17 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } + +static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +{ + int r = xfer_to_guest_mode_handle_work(); + + if (r) { + WARN_ON_ONCE(r != -EINTR); + kvm_handle_signal_exit(vcpu); + } + return r; +} #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ /* diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 8485f63863af..6fc762eaacca 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -1,17 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include -static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) +static int xfer_to_guest_mode_work(unsigned long ti_work) { do { int ret; - if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { - kvm_handle_signal_exit(vcpu); + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) return -EINTR; - } if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) schedule(); @@ -19,7 +16,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NOTIFY_RESUME) resume_user_mode_work(NULL); - ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); + ret = arch_xfer_to_guest_mode_handle_work(ti_work); if (ret) return ret; @@ -28,7 +25,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) return 0; } -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +int xfer_to_guest_mode_handle_work(void) { unsigned long ti_work; @@ -44,6 +41,6 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) if (!(ti_work & XFER_TO_GUEST_MODE_WORK)) return 0; - return xfer_to_guest_mode_work(vcpu, ti_work); + return xfer_to_guest_mode_work(ti_work); } EXPORT_SYMBOL_GPL(xfer_to_guest_mode_handle_work); From 9be7e1e320ff2e7db4b23c8ec5f599bbfac94ede Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:54 -0700 Subject: [PATCH 09/19] entry: Rename "kvm" entry code assets to "virt" to genericize APIs Rename the "kvm" entry code files and Kconfigs to use generic "virt" nomenclature so that the code can be reused by other hypervisors (or rather, their root/dom0 partition drivers), without incorrectly suggesting the code somehow relies on and/or involves KVM. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Reviewed-by: Joel Fernandes Signed-off-by: Wei Liu --- MAINTAINERS | 2 +- arch/arm64/kvm/Kconfig | 2 +- arch/loongarch/kvm/Kconfig | 2 +- arch/riscv/kvm/Kconfig | 2 +- arch/x86/kvm/Kconfig | 2 +- include/linux/{entry-kvm.h => entry-virt.h} | 8 ++++---- include/linux/kvm_host.h | 6 +++--- include/linux/rcupdate.h | 2 +- kernel/entry/Makefile | 2 +- kernel/entry/{kvm.c => virt.c} | 2 +- kernel/rcu/tree.c | 6 +++--- virt/kvm/Kconfig | 2 +- 12 files changed, 19 insertions(+), 19 deletions(-) rename include/linux/{entry-kvm.h => entry-virt.h} (94%) rename kernel/entry/{kvm.c => virt.c} (97%) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..c255048333f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10200,7 +10200,7 @@ L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/entry F: include/linux/entry-common.h -F: include/linux/entry-kvm.h +F: include/linux/entry-virt.h F: include/linux/irq-entry-common.h F: kernel/entry/ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 713248f240e0..6f4fc3caa31a 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -25,7 +25,7 @@ menuconfig KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_VFIO select HAVE_KVM_DIRTY_RING_ACQ_REL select NEED_KVM_DIRTY_RING_WITH_BITMAP diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index 40eea6da7c25..ae64bbdf83a7 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -31,7 +31,7 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS help diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 5a62091b0809..c50328212917 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,7 +30,7 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 2c86673155c9..f81074b0c0a8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,7 +40,7 @@ config KVM_X86 select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_NO_POLL - select KVM_XFER_TO_GUEST_WORK + select VIRT_XFER_TO_GUEST_WORK select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select HAVE_KVM_PM_NOTIFIER if PM diff --git a/include/linux/entry-kvm.h b/include/linux/entry-virt.h similarity index 94% rename from include/linux/entry-kvm.h rename to include/linux/entry-virt.h index 3644de7e6019..42c89e3e5ca7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-virt.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ENTRYKVM_H -#define __LINUX_ENTRYKVM_H +#ifndef __LINUX_ENTRYVIRT_H +#define __LINUX_ENTRYVIRT_H #include #include @@ -10,7 +10,7 @@ #include /* Transfer to guest mode work */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK #ifndef ARCH_XFER_TO_GUEST_MODE_WORK # define ARCH_XFER_TO_GUEST_MODE_WORK (0) @@ -90,6 +90,6 @@ static inline bool xfer_to_guest_mode_work_pending(void) lockdep_assert_irqs_disabled(); return __xfer_to_guest_mode_work_pending(); } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 598b9473e46d..70ac2267d5d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H -#include +#include #include #include #include @@ -2444,7 +2444,7 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; @@ -2461,7 +2461,7 @@ static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) } return r; } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..1e1f3aa375d9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -129,7 +129,7 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else static __always_inline void rcu_irq_work_resched(void) { } diff --git a/kernel/entry/Makefile b/kernel/entry/Makefile index 77fcd83dd663..2333d70802e4 100644 --- a/kernel/entry/Makefile +++ b/kernel/entry/Makefile @@ -14,4 +14,4 @@ CFLAGS_common.o += -fno-stack-protector obj-$(CONFIG_GENERIC_IRQ_ENTRY) += common.o obj-$(CONFIG_GENERIC_SYSCALL) += syscall-common.o syscall_user_dispatch.o -obj-$(CONFIG_KVM_XFER_TO_GUEST_WORK) += kvm.o +obj-$(CONFIG_VIRT_XFER_TO_GUEST_WORK) += virt.o diff --git a/kernel/entry/kvm.c b/kernel/entry/virt.c similarity index 97% rename from kernel/entry/kvm.c rename to kernel/entry/virt.c index 6fc762eaacca..c52f99249763 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/virt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include static int xfer_to_guest_mode_work(unsigned long ti_work) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..995489b72535 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -573,7 +573,7 @@ void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len) } EXPORT_SYMBOL_GPL(rcutorture_format_gp_seqs); -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) /* * An empty function that will trigger a reschedule on * IRQ tail once IRQs get re-enabled on userspace/guest resume. @@ -602,7 +602,7 @@ noinstr void rcu_irq_work_resched(void) if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) return; - if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) + if (IS_ENABLED(CONFIG_VIRT_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) return; instrumentation_begin(); @@ -611,7 +611,7 @@ noinstr void rcu_irq_work_resched(void) } instrumentation_end(); } -#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) */ +#endif /* #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) */ #ifdef CONFIG_PROVE_RCU /** diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 727b542074e7..ce843db53831 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -87,7 +87,7 @@ config HAVE_KVM_VCPU_RUN_PID_CHANGE config HAVE_KVM_NO_POLL bool -config KVM_XFER_TO_GUEST_WORK +config VIRT_XFER_TO_GUEST_WORK bool config HAVE_KVM_PM_NOTIFIER From c5eebe075e1129748f74e567da7bd8fbe77e485b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:55 -0700 Subject: [PATCH 10/19] mshv: Use common "entry virt" APIs to do work in root before running guest Use the kernel's common "entry virt" APIs to handle pending work prior to (re)entering guest mode, now that the virt APIs don't have a superfluous dependency on KVM. No functional change intended. Signed-off-by: Sean Christopherson Tested-by: Nuno Das Neves Reviewed-by: Nuno Das Neves Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 1 + drivers/hv/mshv.h | 2 -- drivers/hv/mshv_common.c | 22 ---------------------- drivers/hv/mshv_root_main.c | 32 ++++++-------------------------- 4 files changed, 7 insertions(+), 50 deletions(-) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 57623ca7f350..cdb210cd3710 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -66,6 +66,7 @@ config MSHV_ROOT # no particular order, making it impossible to reassemble larger pages depends on PAGE_SIZE_4KB select EVENTFD + select VIRT_XFER_TO_GUEST_WORK default n help Select this option to enable support for booting and running as root diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h index 0340a67acd0a..d4813df92b9c 100644 --- a/drivers/hv/mshv.h +++ b/drivers/hv/mshv.h @@ -25,6 +25,4 @@ int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count, int hv_call_get_partition_property(u64 partition_id, u64 property_code, u64 *property_value); -int mshv_do_pre_guest_mode_work(ulong th_flags); - #endif /* _MSHV_H */ diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c index eb3df3e296bb..aa2be51979fd 100644 --- a/drivers/hv/mshv_common.c +++ b/drivers/hv/mshv_common.c @@ -138,25 +138,3 @@ int hv_call_get_partition_property(u64 partition_id, return 0; } EXPORT_SYMBOL_GPL(hv_call_get_partition_property); - -/* - * Handle any pre-processing before going into the guest mode on this cpu, most - * notably call schedule(). Must be invoked with both preemption and - * interrupts enabled. - * - * Returns: 0 on success, -errno on error. - */ -int mshv_do_pre_guest_mode_work(ulong th_flags) -{ - if (th_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - return -EINTR; - - if (th_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) - schedule(); - - if (th_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(NULL); - - return 0; -} -EXPORT_SYMBOL_GPL(mshv_do_pre_guest_mode_work); diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index 3d0d5af80d2e..e3b2bd417c46 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -8,6 +8,7 @@ * Authors: Microsoft Linux virtualization team */ +#include #include #include #include @@ -481,29 +482,6 @@ mshv_vp_wait_for_hv_kick(struct mshv_vp *vp) return 0; } -static int mshv_pre_guest_mode_work(struct mshv_vp *vp) -{ - const ulong work_flags = _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | - _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | - _TIF_NOTIFY_RESUME; - ulong th_flags; - - th_flags = read_thread_flags(); - while (th_flags & work_flags) { - int ret; - - /* nb: following will call schedule */ - ret = mshv_do_pre_guest_mode_work(th_flags); - - if (ret) - return ret; - - th_flags = read_thread_flags(); - } - - return 0; -} - /* Must be called with interrupts enabled */ static long mshv_run_vp_with_root_scheduler(struct mshv_vp *vp) { @@ -524,9 +502,11 @@ static long mshv_run_vp_with_root_scheduler(struct mshv_vp *vp) u32 flags = 0; struct hv_output_dispatch_vp output; - ret = mshv_pre_guest_mode_work(vp); - if (ret) - break; + if (__xfer_to_guest_mode_work_pending()) { + ret = xfer_to_guest_mode_handle_work(); + if (ret) + break; + } if (vp->run.flags.intercept_suspend) flags |= HV_DISPATCH_VP_FLAG_CLEAR_INTERCEPT_SUSPEND; From 4691db0704ac1c266377c99f00288a014fdb7af1 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 18 Jul 2025 21:57:50 +0200 Subject: [PATCH 11/19] x86/hyperv: Switch to msi_create_parent_irq_domain() Move away from the legacy MSI domain setup, switch to use msi_create_parent_irq_domain(). While doing the conversion, I noticed that hv_irq_compose_msi_msg() is doing more than it is supposed to (composing message content). The interrupt allocation bits should be moved into hv_msi_domain_alloc(). However, I have no hardware to test this change, therefore I leave a TODO note. Signed-off-by: Nam Cao Acked-by: Thomas Gleixner Tested-by: Nuno Das Neves Reviewed-by: Nuno Das Neves Signed-off-by: Wei Liu --- arch/x86/hyperv/irqdomain.c | 111 ++++++++++++++++++++++++------------ drivers/hv/Kconfig | 1 + 2 files changed, 77 insertions(+), 35 deletions(-) diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 090f5ac9f492..c3ba12b1bc07 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -11,6 +11,7 @@ #include #include #include +#include #include static int hv_map_interrupt(union hv_device_id device_id, bool level, @@ -289,59 +290,99 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) (void)hv_unmap_msi_interrupt(dev, &old_entry); } -static void hv_msi_free_irq(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) -{ - struct irq_data *irqd = irq_get_irq_data(virq); - struct msi_desc *desc; - - if (!irqd) - return; - - desc = irq_data_get_msi_desc(irqd); - if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) - return; - - hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); -} - /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. */ static struct irq_chip hv_pci_msi_controller = { .name = "HV-PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, - .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = hv_irq_compose_msi_msg, - .irq_set_affinity = msi_domain_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED, + .irq_set_affinity = irq_chip_set_affinity_parent, }; -static struct msi_domain_ops pci_msi_domain_ops = { - .msi_free = hv_msi_free_irq, - .msi_prepare = pci_msi_prepare, +static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + + chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED; + + info->ops->msi_prepare = pci_msi_prepare; + + return true; +} + +#define HV_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX) +#define HV_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS) + +static struct msi_parent_ops hv_msi_parent_ops = { + .supported_flags = HV_MSI_FLAGS_SUPPORTED, + .required_flags = HV_MSI_FLAGS_REQUIRED, + .bus_select_token = DOMAIN_BUS_NEXUS, + .bus_select_mask = MATCH_PCI_MSI, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, + .prefix = "HV-", + .init_dev_msi_info = hv_init_dev_msi_info, }; -static struct msi_domain_info hv_pci_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, - .ops = &pci_msi_domain_ops, - .chip = &hv_pci_msi_controller, - .handler = handle_edge_irq, - .handler_name = "edge", +static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + /* + * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except + * entry_to_msi_msg() should be in here. + */ + + int ret; + + ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg); + if (ret) + return ret; + + for (int i = 0; i < nr_irqs; ++i) { + irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL, + handle_edge_irq, NULL, "edge"); + } + return 0; +} + +static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs) +{ + for (int i = 0; i < nr_irqs; ++i) { + struct irq_data *irqd = irq_domain_get_irq_data(d, virq); + struct msi_desc *desc; + + desc = irq_data_get_msi_desc(irqd); + if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev))) + continue; + + hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd); + } + irq_domain_free_irqs_top(d, virq, nr_irqs); +} + +static const struct irq_domain_ops hv_msi_domain_ops = { + .select = msi_lib_irq_domain_select, + .alloc = hv_msi_domain_alloc, + .free = hv_msi_domain_free, }; struct irq_domain * __init hv_create_pci_msi_domain(void) { struct irq_domain *d = NULL; - struct fwnode_handle *fn; - fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); - if (fn) - d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_fwnode("HV-PCI-MSI"), + .ops = &hv_msi_domain_ops, + .parent = x86_vector_domain, + }; + + if (info.fwnode) + d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops); /* No point in going further if we can't get an irq domain */ BUG_ON(!d); diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index cdb210cd3710..e24f6299c376 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -10,6 +10,7 @@ config HYPERV select X86_HV_CALLBACK_VECTOR if X86 select OF_EARLY_FLATTREE if OF select SYSFB if EFI && !HYPERV_VTL_MODE + select IRQ_MSI_LIB if X86 help Select this option to run Linux as a Hyper-V client operating system. From fd9be098f7eb4bb6b1768145fd48e74a292e3730 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 17 Sep 2025 01:59:31 -0700 Subject: [PATCH 12/19] Drivers: hv: vmbus: Clean up sscanf format specifier in target_cpu_store() The target_cpu_store() function parses the target CPU from the sysfs buffer using sscanf(). The format string currently uses "%uu", which is redundant. The compiler ignores the extra "u", so there is no incorrect parsing at runtime. Update the format string to use "%u" for clarity and consistency. Signed-off-by: Alok Tiwari Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2ed5a1e89d69..478c4a76896c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1742,7 +1742,7 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel, u32 target_cpu; ssize_t ret; - if (sscanf(buf, "%uu", &target_cpu) != 1) + if (sscanf(buf, "%u", &target_cpu) != 1) return -EIO; cpus_read_lock(); From 332bf98d6c5a198d3078110b9000841dac3fd7b2 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sat, 13 Sep 2025 12:24:42 -0700 Subject: [PATCH 13/19] Drivers: hv: vmbus: Fix sysfs output format for ring buffer index The sysfs attributes out_read_index and out_write_index in vmbus_drv.c currently use %d to print outbound.current_read_index and outbound.current_write_index. These fields are u32 values, so printing them with %d (signed) is not logically correct. Update the format specifier to %u to correctly match their type. No functional change, only fixes the sysfs output format. Signed-off-by: Alok Tiwari Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 478c4a76896c..fbab9f2d7fa6 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -322,7 +322,7 @@ static ssize_t out_read_index_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sysfs_emit(buf, "%d\n", outbound.current_read_index); + return sysfs_emit(buf, "%u\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -341,7 +341,7 @@ static ssize_t out_write_index_show(struct device *dev, &outbound); if (ret < 0) return ret; - return sysfs_emit(buf, "%d\n", outbound.current_write_index); + return sysfs_emit(buf, "%u\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); From a3a4d6cb0b968e5d842e79f5dd9d7e07670e9b8a Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sat, 13 Sep 2025 12:24:44 -0700 Subject: [PATCH 14/19] Drivers: hv: vmbus: Fix typos in vmbus_drv.c Fix two minor typos in vmbus_drv.c: - Correct "reponsible" -> "responsible" in a comment. - Add missing newline in pr_err() message ("channeln" -> "channel\n"). These are cosmetic changes only and do not affect functionality. Signed-off-by: Alok Tiwari Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index fbab9f2d7fa6..69591dc7bad2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1947,7 +1947,7 @@ static const struct kobj_type vmbus_chan_ktype = { * is running. * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid - * exposing the ring buffer by default, this function is reponsible to enable visibility of + * exposing the ring buffer by default, this function is responsible to enable visibility of * ring for userspace to use. * Note: Race conditions can happen with userspace and it is not encouraged to create new * use-cases for this. This was added to maintain backward compatibility, while solving @@ -2110,7 +2110,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = vmbus_add_channel_kobj(child_device_obj, child_device_obj->channel); if (ret) { - pr_err("Unable to register primary channeln"); + pr_err("Unable to register primary channel\n"); goto err_kset_unregister; } hv_debug_add_dev_dir(child_device_obj); From 94b04355e6397a0a70b69c2571fa5c7d9990b835 Mon Sep 17 00:00:00 2001 From: Mukesh Rathor Date: Mon, 15 Sep 2025 16:46:03 -0700 Subject: [PATCH 15/19] Drivers: hv: Add CONFIG_HYPERV_VMBUS option At present VMBus driver is hinged off of CONFIG_HYPERV which entails lot of builtin code and encompasses too much. It's not always clear what depends on builtin hv code and what depends on VMBus. Setting CONFIG_HYPERV as a module and fudging the Makefile to switch to builtin adds even more confusion. VMBus is an independent module and should have its own config option. Also, there are scenarios like baremetal dom0/root where support is built in with CONFIG_HYPERV but without VMBus. Lastly, there are more features coming down that use CONFIG_HYPERV and add more dependencies on it. So, create a fine grained HYPERV_VMBUS option and update Kconfigs for dependency on VMBus. Signed-off-by: Mukesh Rathor Acked-by: Bjorn Helgaas # drivers/pci Signed-off-by: Wei Liu --- drivers/gpu/drm/Kconfig | 2 +- drivers/hid/Kconfig | 2 +- drivers/hv/Kconfig | 11 +++++++++-- drivers/hv/Makefile | 2 +- drivers/input/serio/Kconfig | 4 ++-- drivers/net/hyperv/Kconfig | 2 +- drivers/pci/Kconfig | 2 +- drivers/scsi/Kconfig | 2 +- drivers/uio/Kconfig | 2 +- drivers/video/fbdev/Kconfig | 2 +- include/asm-generic/mshyperv.h | 8 +++++--- net/vmw_vsock/Kconfig | 2 +- 12 files changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f7ea8e895c0c..58f34da061c6 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -398,7 +398,7 @@ source "drivers/gpu/drm/imagination/Kconfig" config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" - depends on DRM && PCI && HYPERV + depends on DRM && PCI && HYPERV_VMBUS select DRM_CLIENT_SELECTION select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index a57901203aeb..fe3dc8c0db99 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1162,7 +1162,7 @@ config GREENASIA_FF config HID_HYPERV_MOUSE tristate "Microsoft Hyper-V mouse driver" - depends on HYPERV + depends on HYPERV_VMBUS help Select this option to enable the Hyper-V mouse driver. diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index e24f6299c376..29f8637f441a 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -45,18 +45,25 @@ config HYPERV_TIMER config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" - depends on HYPERV && CONNECTOR && NLS + depends on HYPERV_VMBUS && CONNECTOR && NLS depends on PTP_1588_CLOCK_OPTIONAL help Select this option to enable the Hyper-V Utilities. config HYPERV_BALLOON tristate "Microsoft Hyper-V Balloon driver" - depends on HYPERV + depends on HYPERV_VMBUS select PAGE_REPORTING help Select this option to enable Hyper-V Balloon driver. +config HYPERV_VMBUS + tristate "Microsoft Hyper-V VMBus driver" + depends on HYPERV + default HYPERV + help + Select this option to enable Hyper-V Vmbus driver. + config MSHV_ROOT tristate "Microsoft Hyper-V root partition support" depends on HYPERV && (X86_64 || ARM64) diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index 976189c725dc..4bb41663767d 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_HYPERV) += hv_vmbus.o +obj-$(CONFIG_HYPERV_VMBUS) += hv_vmbus.o obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o obj-$(CONFIG_MSHV_ROOT) += mshv_root.o diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 17edc1597446..c7ef347a4dff 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -276,8 +276,8 @@ config SERIO_OLPC_APSP config HYPERV_KEYBOARD tristate "Microsoft Synthetic Keyboard driver" - depends on HYPERV - default HYPERV + depends on HYPERV_VMBUS + default HYPERV_VMBUS help Select this option to enable the Hyper-V Keyboard driver. diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index c8cbd85adcf9..982964c1a9fb 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" - depends on HYPERV + depends on HYPERV_VMBUS select UCS2_STRING select NLS help diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 9a249c65aedc..7065a8e5f9b1 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -221,7 +221,7 @@ config PCI_LABEL config PCI_HYPERV tristate "Hyper-V PCI Frontend" - depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS + depends on ((X86 && X86_64) || ARM64) && HYPERV_VMBUS && PCI_MSI && SYSFS select PCI_HYPERV_INTERFACE select IRQ_MSI_LIB help diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 5522310bab8d..19d0884479a2 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -589,7 +589,7 @@ config XEN_SCSI_FRONTEND config HYPERV_STORAGE tristate "Microsoft Hyper-V virtual storage driver" - depends on SCSI && HYPERV + depends on SCSI && HYPERV_VMBUS depends on m || SCSI_FC_ATTRS != m default HYPERV help diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig index b060dcd7c635..6f86a61231e6 100644 --- a/drivers/uio/Kconfig +++ b/drivers/uio/Kconfig @@ -140,7 +140,7 @@ config UIO_MF624 config UIO_HV_GENERIC tristate "Generic driver for Hyper-V VMBus" - depends on HYPERV + depends on HYPERV_VMBUS help Generic driver that you can bind, dynamically, to any Hyper-V VMBus device. It is useful to provide direct access diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index c21484d15f0c..72c63eaeb983 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -1774,7 +1774,7 @@ config FB_BROADSHEET config FB_HYPERV tristate "Microsoft Hyper-V Synthetic Video support" - depends on FB && HYPERV + depends on FB && HYPERV_VMBUS select DMA_CMA if HAVE_DMA_CONTIGUOUS && CMA select FB_IOMEM_HELPERS_DEFERRED help diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index dbd4c2f3aee3..64ba6bc807d9 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -163,6 +163,7 @@ static inline u64 hv_generate_guest_id(u64 kernel_version) return guest_id; } +#if IS_ENABLED(CONFIG_HYPERV_VMBUS) /* Free the message slot and signal end-of-message if required */ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) { @@ -198,6 +199,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } +extern int vmbus_interrupt; +extern int vmbus_irq; +#endif /* CONFIG_HYPERV_VMBUS */ + int hv_get_hypervisor_version(union hv_hypervisor_version_info *info); void hv_setup_vmbus_handler(void (*handler)(void)); @@ -211,9 +216,6 @@ void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); void hv_setup_mshv_handler(void (*handler)(void)); -extern int vmbus_interrupt; -extern int vmbus_irq; - #if IS_ENABLED(CONFIG_HYPERV) /* * Hypervisor's notion of virtual processor ID is different from diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 56356d2980c8..8e803c4828c4 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -72,7 +72,7 @@ config VIRTIO_VSOCKETS_COMMON config HYPERV_VSOCKETS tristate "Hyper-V transport for Virtual Sockets" - depends on VSOCKETS && HYPERV + depends on VSOCKETS && HYPERV_VMBUS help This module implements a Hyper-V transport for Virtual Sockets. From e3ec97c3abaf2fb68cc755cae3229288696b9f3d Mon Sep 17 00:00:00 2001 From: Mukesh Rathor Date: Mon, 15 Sep 2025 16:46:04 -0700 Subject: [PATCH 16/19] Drivers: hv: Make CONFIG_HYPERV bool With CONFIG_HYPERV and CONFIG_HYPERV_VMBUS separated, change CONFIG_HYPERV to bool from tristate. CONFIG_HYPERV now becomes the core Hyper-V hypervisor support, such as hypercalls, clocks/timers, Confidential Computing setup, PCI passthru, etc. that doesn't involve VMBus or VMBus devices. Signed-off-by: Mukesh Rathor Signed-off-by: Wei Liu --- drivers/Makefile | 2 +- drivers/hv/Kconfig | 2 +- drivers/hv/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index b5749cf67044..7ad5744db0b6 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -161,7 +161,7 @@ obj-$(CONFIG_SOUNDWIRE) += soundwire/ # Virtualization drivers obj-$(CONFIG_VIRT_DRIVERS) += virt/ -obj-$(subst m,y,$(CONFIG_HYPERV)) += hv/ +obj-$(CONFIG_HYPERV) += hv/ obj-$(CONFIG_PM_DEVFREQ) += devfreq/ obj-$(CONFIG_EXTCON) += extcon/ diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 29f8637f441a..0b8c391a0342 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -3,7 +3,7 @@ menu "Microsoft Hyper-V guest support" config HYPERV - tristate "Microsoft Hyper-V client drivers" + bool "Microsoft Hyper-V core hypervisor support" depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ || (ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile index 4bb41663767d..1a1677bf4dac 100644 --- a/drivers/hv/Makefile +++ b/drivers/hv/Makefile @@ -16,5 +16,5 @@ mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ mshv_root_hv_call.o mshv_portid_table.o # Code that must be built-in -obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o +obj-$(CONFIG_HYPERV) += hv_common.o obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o mshv_common.o From 34b4620349fe4776babf9c735e1c68e620784763 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar T S M Date: Thu, 2 Oct 2025 21:10:36 +0000 Subject: [PATCH 17/19] fbdev/hyperv_fb: deprecate this in favor of Hyper-V DRM driver The Hyper-V DRM driver is available since kernel version 5.14 and it provides full KMS support and fbdev emulation via the DRM fbdev helpers. Deprecate this driver in favor of Hyper-V DRM driver. Signed-off-by: Prasanna Kumar T S M Acked-by: Thomas Zimmermann Reviewed-by: Michael Kelley Reviewed-by: Saurabh Sengar Signed-off-by: Wei Liu --- drivers/video/fbdev/Kconfig | 5 ++++- drivers/video/fbdev/hyperv_fb.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 72c63eaeb983..c3b9954df804 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -1773,13 +1773,16 @@ config FB_BROADSHEET a bridge adapter. config FB_HYPERV - tristate "Microsoft Hyper-V Synthetic Video support" + tristate "Microsoft Hyper-V Synthetic Video support (DEPRECATED)" depends on FB && HYPERV_VMBUS select DMA_CMA if HAVE_DMA_CONTIGUOUS && CMA select FB_IOMEM_HELPERS_DEFERRED help This framebuffer driver supports Microsoft Hyper-V Synthetic Video. + This driver is deprecated, please use the Hyper-V DRM driver at + drivers/gpu/drm/hyperv (CONFIG_DRM_HYPERV) instead. + config FB_SIMPLE tristate "Simple framebuffer support" depends on FB diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 75338ffc703f..c99e2ea4b3de 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1357,6 +1357,8 @@ static int __init hvfb_drv_init(void) { int ret; + pr_warn("Deprecated: use Hyper-V DRM driver instead\n"); + if (fb_modesetting_disabled("hyper_fb")) return -ENODEV; From 020b4e86834f7a35e9986cacd10eb0202d8faa08 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar T S M Date: Wed, 17 Sep 2025 07:03:24 -0700 Subject: [PATCH 18/19] MAINTAINERS: Mark hyperv_fb driver Obsolete The hyperv_fb driver is deprecated in favor of Hyper-V DRM driver. Split the hyperv_fb entry from the hyperv drivers list, mark it obsolete. Signed-off-by: Prasanna Kumar T S M Reviewed-by: Saurabh Sengar Reviewed-by: Michael Kelley Acked-by: Thomas Zimmermann Signed-off-by: Wei Liu --- MAINTAINERS | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c255048333f0..900f8de44321 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11404,7 +11404,6 @@ F: drivers/pci/controller/pci-hyperv-intf.c F: drivers/pci/controller/pci-hyperv.c F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c -F: drivers/video/fbdev/hyperv_fb.c F: include/asm-generic/mshyperv.h F: include/clocksource/hyperv_timer.h F: include/hyperv/hvgdk.h @@ -11418,6 +11417,16 @@ F: include/uapi/linux/hyperv.h F: net/vmw_vsock/hyperv_transport.c F: tools/hv/ +HYPER-V FRAMEBUFFER DRIVER +M: "K. Y. Srinivasan" +M: Haiyang Zhang +M: Wei Liu +M: Dexuan Cui +L: linux-hyperv@vger.kernel.org +S: Obsolete +T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git +F: drivers/video/fbdev/hyperv_fb.c + HYPERBUS SUPPORT M: Vignesh Raghavendra R: Tudor Ambarus From b595edcb24727e7f93e7962c3f6f971cc16dd29e Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 1 Oct 2025 16:08:46 -0700 Subject: [PATCH 19/19] hyperv: Remove the spurious null directive line The file contains a line that consists of the lone # symbol followed by a newline. While that is a valid syntax as defined by the C99+ grammar (6.10.7 "Null directive"), it serves no apparent purpose in this case. Remove the null preprocessor directive. No functional changes. Fixes: e68bda71a238 ("hyperv: Add new Hyper-V headers in include/hyperv") Signed-off-by: Roman Kisel Reviewed-by: Easwar Hariharan Signed-off-by: Wei Liu --- include/hyperv/hvgdk_mini.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 1be7f6a02304..77abddfc750e 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -597,8 +597,6 @@ struct ms_hyperv_tsc_page { /* HV_REFERENCE_TSC_PAGE */ #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) -# - /* Hyper-V defined statically assigned SINTs */ #define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000 #define HV_SYNIC_IOMMU_FAULT_SINT_INDEX 0x00000001