diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 01a3abef8abb..f1f1d2e5dc7c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7835,8 +7835,10 @@ Will return -EBUSY if a VCPU has already been created. Valid feature flags in args[0] are:: - #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) - #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) + #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + #define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST (1ULL << 2) + #define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST (1ULL << 3) Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC, @@ -7849,6 +7851,28 @@ as a broadcast even in x2APIC mode in order to support physical x2APIC without interrupt remapping. This is undesirable in logical mode, where 0xff represents CPUs 0-7 in cluster 0. +Setting KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST instructs KVM to enable +Suppress EOI Broadcasts. KVM will advertise support for Suppress EOI +Broadcast to the guest and suppress LAPIC EOI broadcasts when the guest +sets the Suppress EOI Broadcast bit in the SPIV register. This flag is +supported only when using a split IRQCHIP. + +Setting KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST disables support for +Suppress EOI Broadcasts entirely, i.e. instructs KVM to NOT advertise +support to the guest. + +Modern VMMs should either enable KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST +or KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST. If not, legacy quirky +behavior will be used by KVM: in split IRQCHIP mode, KVM will advertise +support for Suppress EOI Broadcasts but not actually suppress EOI +broadcasts; for in-kernel IRQCHIP mode, KVM will not advertise support for +Suppress EOI Broadcasts. + +Setting both KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST and +KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST will fail with an EINVAL error, +as will setting KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST without a split +IRCHIP. + 7.8 KVM_CAP_S390_USER_INSTR0 ---------------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 80fd7ca69c87..0414859bfa66 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1228,6 +1228,12 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +enum kvm_suppress_eoi_broadcast_mode { + KVM_SUPPRESS_EOI_BROADCAST_QUIRKED, /* Legacy behavior */ + KVM_SUPPRESS_EOI_BROADCAST_ENABLED, /* Enable Suppress EOI broadcast */ + KVM_SUPPRESS_EOI_BROADCAST_DISABLED /* Disable Suppress EOI broadcast */ +}; + struct kvm_x86_msr_filter { u8 count; bool default_allow:1; @@ -1477,6 +1483,7 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + enum kvm_suppress_eoi_broadcast_mode suppress_eoi_broadcast_mode; bool has_mapped_host_mmio; bool guest_can_read_msr_platform_info; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 7ceff6583652..1208932e5cc3 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -914,8 +914,10 @@ struct kvm_sev_snp_launch_finish { __u64 pad1[4]; }; -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +#define KVM_X2APIC_API_USE_32BIT_IDS _BITULL(0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK _BITULL(1) +#define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST _BITULL(2) +#define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST _BITULL(3) struct kvm_hyperv_eventfd { __u32 conn_id; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 2c2783296aed..a26fa4222f29 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -561,7 +561,7 @@ static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, spin_lock(&ioapic->lock); if (trigger_mode != IOAPIC_LEVEL_TRIG || - kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) + kvm_lapic_suppress_eoi_broadcast(apic)) return; ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 01c2557bfa05..738ec3c1b0b5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -105,6 +105,63 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) apic_test_vector(vector, apic->regs + APIC_IRR); } +static bool kvm_lapic_advertise_suppress_eoi_broadcast(struct kvm *kvm) +{ + switch (kvm->arch.suppress_eoi_broadcast_mode) { + case KVM_SUPPRESS_EOI_BROADCAST_ENABLED: + return true; + case KVM_SUPPRESS_EOI_BROADCAST_DISABLED: + return false; + case KVM_SUPPRESS_EOI_BROADCAST_QUIRKED: + /* + * The default in-kernel I/O APIC emulates the 82093AA and does not + * implement an EOI register. Some guests (e.g. Windows with the + * Hyper-V role enabled) disable LAPIC EOI broadcast without + * checking the I/O APIC version, which can cause level-triggered + * interrupts to never be EOI'd. + * + * To avoid this, KVM doesn't advertise Suppress EOI Broadcast + * support when using the default in-kernel I/O APIC. + * + * Historically, in split IRQCHIP mode, KVM always advertised + * Suppress EOI Broadcast support but did not actually suppress + * EOIs, resulting in quirky behavior. + */ + return !ioapic_in_kernel(kvm); + default: + WARN_ON_ONCE(1); + return false; + } +} + +bool kvm_lapic_suppress_eoi_broadcast(struct kvm_lapic *apic) +{ + struct kvm *kvm = apic->vcpu->kvm; + + if (!(kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) + return false; + + switch (kvm->arch.suppress_eoi_broadcast_mode) { + case KVM_SUPPRESS_EOI_BROADCAST_ENABLED: + return true; + case KVM_SUPPRESS_EOI_BROADCAST_DISABLED: + return false; + case KVM_SUPPRESS_EOI_BROADCAST_QUIRKED: + /* + * Historically, in split IRQCHIP mode, KVM ignored the suppress + * EOI broadcast bit set by the guest and broadcasts EOIs to the + * userspace I/O APIC. For In-kernel I/O APIC, the support itself + * is not advertised, can only be enabled via KVM_SET_APIC_STATE, + * and KVM's I/O APIC doesn't emulate Directed EOIs; but if the + * feature is enabled, it is respected (with odd behavior). + */ + return ioapic_in_kernel(kvm); + default: + WARN_ON_ONCE(1); + return false; + } +} + __read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu); EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_has_noapic_vcpu); @@ -554,15 +611,9 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16); - /* - * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) - * which doesn't have EOI register; Some buggy OSes (e.g. Windows with - * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC - * version first and level-triggered interrupts never get EOIed in - * IOAPIC. - */ + if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) && - !ioapic_in_kernel(vcpu->kvm)) + kvm_lapic_advertise_suppress_eoi_broadcast(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); } @@ -1517,6 +1568,15 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) /* Request a KVM exit to inform the userspace IOAPIC. */ if (irqchip_split(apic->vcpu->kvm)) { + /* + * Don't exit to userspace if the guest has enabled Directed + * EOI, a.k.a. Suppress EOI Broadcasts, in which case the local + * APIC doesn't broadcast EOIs (the guest must EOI the target + * I/O APIC(s) directly). + */ + if (kvm_lapic_suppress_eoi_broadcast(apic)) + return; + apic->vcpu->arch.pending_ioapic_eoi = vector; kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); return; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 282b9b7da98c..e5f5a222eced 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -231,6 +231,8 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); +bool kvm_lapic_suppress_eoi_broadcast(struct kvm_lapic *apic); + void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1ea94f4a3dcb..67e666921a12 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -121,8 +121,10 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE -#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ - KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) +#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK | \ + KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST | \ + KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST) static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); @@ -4932,6 +4934,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_X2APIC_API: r = KVM_X2APIC_API_VALID_FLAGS; + if (kvm && !irqchip_split(kvm)) + r &= ~KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST; break; case KVM_CAP_NESTED_STATE: r = kvm_x86_ops.nested_ops->get_state ? @@ -6740,11 +6744,24 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS) break; + if ((cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) && + (cap->args[0] & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST)) + break; + + if ((cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) && + !irqchip_split(kvm)) + break; + if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS) kvm->arch.x2apic_format = true; if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) kvm->arch.x2apic_broadcast_quirk_disabled = true; + if (cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) + kvm->arch.suppress_eoi_broadcast_mode = KVM_SUPPRESS_EOI_BROADCAST_ENABLED; + if (cap->args[0] & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST) + kvm->arch.suppress_eoi_broadcast_mode = KVM_SUPPRESS_EOI_BROADCAST_DISABLED; + r = 0; break; case KVM_CAP_X86_DISABLE_EXITS: