mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 07:35:36 -05:00
Merge tag 'kvm-x86-fixes-6.19-rc1' of https://github.com/kvm-x86/linux into HEAD
KVM fixes for 6.19-rc1 - Add a missing "break" to fix param parsing in the rseq selftest. - Apply runtime updates to the _current_ CPUID when userspace is setting CPUID, e.g. as part of vCPU hotplug, to fix a false positive and to avoid dropping the pending update. - Disallow toggling KVM_MEM_GUEST_MEMFD on an existing memslot, as it's not supported by KVM and leads to a use-after-free due to KVM failing to unbind the memslot from the previously-associated guest_memfd instance. - Harden against similar KVM_MEM_GUEST_MEMFD goofs, and prepare for supporting flags-only changes on KVM_MEM_GUEST_MEMFD memlslots, e.g. for dirty logging. - Set exit_code[63:32] to -1 (all 0xffs) when synthesizing a nested SVM_EXIT_ERR (a.k.a. VMEXIT_INVALID) #VMEXIT, as VMEXIT_INVALID is defined as -1ull (a 64-bit value). - Update SVI when activating APICv to fix a bug where a post-activation EOI for an in-service IRQ would effective be lost due to SVI being stale. - Immediately refresh APICv controls (if necessary) on a nested VM-Exit instead of deferring the update via KVM_REQ_APICV_UPDATE, as the request is effectively ignored because KVM thinks the vCPU already has the correct APICv settings.
This commit is contained in:
@@ -509,11 +509,18 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
|
|||||||
u32 vcpu_caps[NR_KVM_CPU_CAPS];
|
u32 vcpu_caps[NR_KVM_CPU_CAPS];
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Apply pending runtime CPUID updates to the current CPUID entries to
|
||||||
|
* avoid false positives due to mismatches on KVM-owned feature flags.
|
||||||
|
*/
|
||||||
|
if (vcpu->arch.cpuid_dynamic_bits_dirty)
|
||||||
|
kvm_update_cpuid_runtime(vcpu);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Swap the existing (old) entries with the incoming (new) entries in
|
* Swap the existing (old) entries with the incoming (new) entries in
|
||||||
* order to massage the new entries, e.g. to account for dynamic bits
|
* order to massage the new entries, e.g. to account for dynamic bits
|
||||||
* that KVM controls, without clobbering the current guest CPUID, which
|
* that KVM controls, without losing the current guest CPUID, which KVM
|
||||||
* KVM needs to preserve in order to unwind on failure.
|
* needs to preserve in order to unwind on failure.
|
||||||
*
|
*
|
||||||
* Similarly, save the vCPU's current cpu_caps so that the capabilities
|
* Similarly, save the vCPU's current cpu_caps so that the capabilities
|
||||||
* can be updated alongside the CPUID entries when performing runtime
|
* can be updated alongside the CPUID entries when performing runtime
|
||||||
|
|||||||
@@ -985,7 +985,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
|||||||
if (!nested_vmcb_check_save(vcpu) ||
|
if (!nested_vmcb_check_save(vcpu) ||
|
||||||
!nested_vmcb_check_controls(vcpu)) {
|
!nested_vmcb_check_controls(vcpu)) {
|
||||||
vmcb12->control.exit_code = SVM_EXIT_ERR;
|
vmcb12->control.exit_code = SVM_EXIT_ERR;
|
||||||
vmcb12->control.exit_code_hi = 0;
|
vmcb12->control.exit_code_hi = -1u;
|
||||||
vmcb12->control.exit_info_1 = 0;
|
vmcb12->control.exit_info_1 = 0;
|
||||||
vmcb12->control.exit_info_2 = 0;
|
vmcb12->control.exit_info_2 = 0;
|
||||||
goto out;
|
goto out;
|
||||||
@@ -1018,7 +1018,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
|||||||
svm->soft_int_injected = false;
|
svm->soft_int_injected = false;
|
||||||
|
|
||||||
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
|
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
|
||||||
svm->vmcb->control.exit_code_hi = 0;
|
svm->vmcb->control.exit_code_hi = -1u;
|
||||||
svm->vmcb->control.exit_info_1 = 0;
|
svm->vmcb->control.exit_info_1 = 0;
|
||||||
svm->vmcb->control.exit_info_2 = 0;
|
svm->vmcb->control.exit_info_2 = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -2443,6 +2443,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
|
|||||||
|
|
||||||
if (cr0 ^ val) {
|
if (cr0 ^ val) {
|
||||||
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
|
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
|
||||||
|
svm->vmcb->control.exit_code_hi = 0;
|
||||||
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
|
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4617,6 +4618,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
|
|||||||
if (static_cpu_has(X86_FEATURE_NRIPS))
|
if (static_cpu_has(X86_FEATURE_NRIPS))
|
||||||
vmcb->control.next_rip = info->next_rip;
|
vmcb->control.next_rip = info->next_rip;
|
||||||
vmcb->control.exit_code = icpt_info.exit_code;
|
vmcb->control.exit_code = icpt_info.exit_code;
|
||||||
|
vmcb->control.exit_code_hi = 0;
|
||||||
vmexit = nested_svm_exit_handled(svm);
|
vmexit = nested_svm_exit_handled(svm);
|
||||||
|
|
||||||
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
|
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
|
||||||
|
|||||||
@@ -761,9 +761,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
|
|||||||
|
|
||||||
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
|
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
|
||||||
{
|
{
|
||||||
svm->vmcb->control.exit_code = exit_code;
|
svm->vmcb->control.exit_code = exit_code;
|
||||||
svm->vmcb->control.exit_info_1 = 0;
|
svm->vmcb->control.exit_code_hi = 0;
|
||||||
svm->vmcb->control.exit_info_2 = 0;
|
svm->vmcb->control.exit_info_1 = 0;
|
||||||
|
svm->vmcb->control.exit_info_2 = 0;
|
||||||
return nested_svm_vmexit(svm);
|
return nested_svm_vmexit(svm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "vmx.h"
|
#include "vmx.h"
|
||||||
#include "smm.h"
|
#include "smm.h"
|
||||||
|
#include "x86_ops.h"
|
||||||
|
|
||||||
static bool __read_mostly enable_shadow_vmcs = 1;
|
static bool __read_mostly enable_shadow_vmcs = 1;
|
||||||
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
|
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
|
||||||
@@ -5165,7 +5166,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
|||||||
|
|
||||||
if (vmx->nested.update_vmcs01_apicv_status) {
|
if (vmx->nested.update_vmcs01_apicv_status) {
|
||||||
vmx->nested.update_vmcs01_apicv_status = false;
|
vmx->nested.update_vmcs01_apicv_status = false;
|
||||||
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
|
vmx_refresh_apicv_exec_ctrl(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vmx->nested.update_vmcs01_hwapic_isr) {
|
if (vmx->nested.update_vmcs01_hwapic_isr) {
|
||||||
|
|||||||
@@ -6937,15 +6937,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
|
|||||||
* VM-Exit, otherwise L1 with run with a stale SVI.
|
* VM-Exit, otherwise L1 with run with a stale SVI.
|
||||||
*/
|
*/
|
||||||
if (is_guest_mode(vcpu)) {
|
if (is_guest_mode(vcpu)) {
|
||||||
/*
|
|
||||||
* KVM is supposed to forward intercepted L2 EOIs to L1 if VID
|
|
||||||
* is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
|
|
||||||
* Note, userspace can stuff state while L2 is active; assert
|
|
||||||
* that VID is disabled if and only if the vCPU is in KVM_RUN
|
|
||||||
* to avoid false positives if userspace is setting APIC state.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(vcpu->wants_to_run &&
|
|
||||||
nested_cpu_has_vid(get_vmcs12(vcpu)));
|
|
||||||
to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
|
to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10886,9 +10886,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
|
|||||||
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
|
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
|
||||||
* still active when the interrupt got accepted. Make sure
|
* still active when the interrupt got accepted. Make sure
|
||||||
* kvm_check_and_inject_events() is called to check for that.
|
* kvm_check_and_inject_events() is called to check for that.
|
||||||
|
*
|
||||||
|
* Update SVI when APICv gets enabled, otherwise SVI won't reflect the
|
||||||
|
* highest bit in vISR and the next accelerated EOI in the guest won't
|
||||||
|
* be virtualized correctly (the CPU uses SVI to determine which vISR
|
||||||
|
* vector to clear).
|
||||||
*/
|
*/
|
||||||
if (!apic->apicv_active)
|
if (!apic->apicv_active)
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
|
else
|
||||||
|
kvm_apic_update_hwapic_isr(vcpu);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|||||||
@@ -215,6 +215,7 @@ int main(int argc, char *argv[])
|
|||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'u':
|
case 'u':
|
||||||
skip_sanity_check = true;
|
skip_sanity_check = true;
|
||||||
|
break;
|
||||||
case 'l':
|
case 'l':
|
||||||
latency = atoi_paranoid(optarg);
|
latency = atoi_paranoid(optarg);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
|
|||||||
static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
|
static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_cpuid_entry2 *ent;
|
struct kvm_cpuid_entry2 *ent;
|
||||||
|
struct kvm_sregs sregs;
|
||||||
int rc;
|
int rc;
|
||||||
u32 eax, ebx, x;
|
u32 eax, ebx, x;
|
||||||
|
|
||||||
@@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
|
|||||||
rc = __vcpu_set_cpuid(vcpu);
|
rc = __vcpu_set_cpuid(vcpu);
|
||||||
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
|
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Toggle CR4 bits that affect dynamic CPUID feature flags to verify
|
||||||
|
* setting unmodified CPUID succeeds with runtime CPUID updates.
|
||||||
|
*/
|
||||||
|
vcpu_sregs_get(vcpu, &sregs);
|
||||||
|
if (kvm_cpu_has(X86_FEATURE_XSAVE))
|
||||||
|
sregs.cr4 ^= X86_CR4_OSXSAVE;
|
||||||
|
if (kvm_cpu_has(X86_FEATURE_PKU))
|
||||||
|
sregs.cr4 ^= X86_CR4_PKE;
|
||||||
|
vcpu_sregs_set(vcpu, &sregs);
|
||||||
|
|
||||||
|
rc = __vcpu_set_cpuid(vcpu);
|
||||||
|
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
|
||||||
|
|
||||||
/* Changing CPU features is forbidden */
|
/* Changing CPU features is forbidden */
|
||||||
ent = vcpu_get_cpuid_entry(vcpu, 0x7);
|
ent = vcpu_get_cpuid_entry(vcpu, 0x7);
|
||||||
ebx = ent->ebx;
|
ebx = ent->ebx;
|
||||||
|
|||||||
@@ -1749,6 +1749,12 @@ static void kvm_commit_memory_region(struct kvm *kvm,
|
|||||||
kvm_free_memslot(kvm, old);
|
kvm_free_memslot(kvm, old);
|
||||||
break;
|
break;
|
||||||
case KVM_MR_MOVE:
|
case KVM_MR_MOVE:
|
||||||
|
/*
|
||||||
|
* Moving a guest_memfd memslot isn't supported, and will never
|
||||||
|
* be supported.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD);
|
||||||
|
fallthrough;
|
||||||
case KVM_MR_FLAGS_ONLY:
|
case KVM_MR_FLAGS_ONLY:
|
||||||
/*
|
/*
|
||||||
* Free the dirty bitmap as needed; the below check encompasses
|
* Free the dirty bitmap as needed; the below check encompasses
|
||||||
@@ -1757,6 +1763,15 @@ static void kvm_commit_memory_region(struct kvm *kvm,
|
|||||||
if (old->dirty_bitmap && !new->dirty_bitmap)
|
if (old->dirty_bitmap && !new->dirty_bitmap)
|
||||||
kvm_destroy_dirty_bitmap(old);
|
kvm_destroy_dirty_bitmap(old);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unbind the guest_memfd instance as needed; the @new slot has
|
||||||
|
* already created its own binding. TODO: Drop the WARN when
|
||||||
|
* dirty logging guest_memfd memslots is supported. Until then,
|
||||||
|
* flags-only changes on guest_memfd slots should be impossible.
|
||||||
|
*/
|
||||||
|
if (WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD))
|
||||||
|
kvm_gmem_unbind(old);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The final quirk. Free the detached, old slot, but only its
|
* The final quirk. Free the detached, old slot, but only its
|
||||||
* memory, not any metadata. Metadata, including arch specific
|
* memory, not any metadata. Metadata, including arch specific
|
||||||
@@ -2086,7 +2101,7 @@ static int kvm_set_memory_region(struct kvm *kvm,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if ((mem->userspace_addr != old->userspace_addr) ||
|
if ((mem->userspace_addr != old->userspace_addr) ||
|
||||||
(npages != old->npages) ||
|
(npages != old->npages) ||
|
||||||
((mem->flags ^ old->flags) & KVM_MEM_READONLY))
|
((mem->flags ^ old->flags) & (KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (base_gfn != old->base_gfn)
|
if (base_gfn != old->base_gfn)
|
||||||
|
|||||||
Reference in New Issue
Block a user