From 522c9a64c7049f50c7b1299741c13fac3f231cd4 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 8 Sep 2022 21:06:59 +0800 Subject: [PATCH 01/10] KVM: arm64: Use kmemleak_free_part_phys() to unregister hyp_mem_base With commit 0c24e061196c ("mm: kmemleak: add rbtree and store physical address for objects allocated with PA"), kmemleak started to put the objects allocated with physical address onto object_phys_tree_root tree. The kmemleak_free_part() therefore no longer worked as expected on physically allocated objects (hyp_mem_base in this case) as it attempted to search and remove things in object_tree_root tree. Fix it by using kmemleak_free_part_phys() to unregister hyp_mem_base. This fixes an immediate crash when booting a KVM host in protected mode with kmemleak enabled. Signed-off-by: Zenghui Yu Acked-by: Catalin Marinas Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220908130659.2021-1-yuzenghui@huawei.com --- arch/arm64/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 2ff0ef62abad..917086be5c6b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2114,7 +2114,7 @@ static int finalize_hyp_mode(void) * at, which would end badly once inaccessible. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); - kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size); + kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); return pkvm_drop_host_privileges(); } From e8c924a4fb6ef9a224423fb5542954acbda234b3 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Thu, 15 Sep 2022 13:55:14 -0400 Subject: [PATCH 02/10] KVM: s390: pci: fix plain integer as NULL pointer warnings Fix some sparse warnings that a plain integer 0 is being used instead of NULL. Reported-by: kernel test robot Signed-off-by: Matthew Rosato Link: https://lore.kernel.org/r/20220915175514.167899-1-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Janosch Frank --- arch/s390/kvm/pci.c | 4 ++-- arch/s390/kvm/pci.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index bb8c335d17b9..3c12637ce08c 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -58,7 +58,7 @@ static int zpci_setup_aipb(u8 nisc) if (!zpci_aipb) return -ENOMEM; - aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, 0); + aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL); if (!aift->sbv) { rc = -ENOMEM; goto free_aipb; @@ -373,7 +373,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) gaite->gisc = 0; gaite->aisbo = 0; gaite->gisa = 0; - aift->kzdev[zdev->aisb] = 0; + aift->kzdev[zdev->aisb] = NULL; /* Clear zdev info */ airq_iv_free_bit(aift->sbv, zdev->aisb); airq_iv_release(zdev->aibv); diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h index 3a3606c3a0fe..486d06ef563f 100644 --- a/arch/s390/kvm/pci.h +++ b/arch/s390/kvm/pci.h @@ -46,9 +46,9 @@ extern struct zpci_aift *aift; static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift, unsigned long si) { - if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || aift->kzdev == 0 || - aift->kzdev[si] == 0) - return 0; + if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || !aift->kzdev || + !aift->kzdev[si]) + return NULL; return aift->kzdev[si]->kvm; }; From b3cefd6bf16e7234ffbd4209f6083060f4e35f59 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Thu, 25 Aug 2022 21:25:40 +0200 Subject: [PATCH 03/10] KVM: s390: Pass initialized arg even if unused This silences smatch warnings reported by kbuild bot: arch/s390/kvm/gaccess.c:859 guest_range_to_gpas() error: uninitialized symbol 'prot'. arch/s390/kvm/gaccess.c:1064 access_guest_with_key() error: uninitialized symbol 'prot'. This is because it cannot tell that the value is not used in this case. The trans_exc* only examine prot if code is PGM_PROTECTION. Pass a dummy value for other codes. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220825192540.1560559-1-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Janosch Frank --- arch/s390/kvm/gaccess.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 082ec5f2c3a5..0243b6e38d36 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -489,6 +489,8 @@ enum prot_type { PROT_TYPE_ALC = 2, PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, + /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ + PROT_NONE, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, @@ -504,6 +506,10 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { + case PROT_NONE: + /* We should never get here, acts like termination */ + WARN_ON_ONCE(1); + break; case PROT_TYPE_IEP: tec->b61 = 1; fallthrough; @@ -968,8 +974,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, return rc; } else { gpa = kvm_s390_real_to_abs(vcpu, ga); - if (kvm_is_error_gpa(vcpu->kvm, gpa)) + if (kvm_is_error_gpa(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; + prot = PROT_NONE; + } } if (rc) return trans_exc(vcpu, rc, ga, ar, mode, prot); @@ -1112,8 +1120,6 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION && try_storage_prot_override) rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], data, fragment_len, PAGE_SPO_ACC); - if (rc == PGM_PROTECTION) - prot = PROT_TYPE_KEYC; if (rc) break; len -= fragment_len; @@ -1123,6 +1129,10 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc > 0) { bool terminate = (mode == GACC_STORE) && (idx > 0); + if (rc == PGM_PROTECTION) + prot = PROT_TYPE_KEYC; + else + prot = PROT_NONE; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: From 70ba8fae27758cc2679a948b03d8a9ca18f1dcb1 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Wed, 7 Sep 2022 11:59:52 -0400 Subject: [PATCH 04/10] KVM: s390: pci: fix GAIT physical vs virtual pointers usage The GAIT and all of its entries must be represented by physical addresses as this structure is shared with underlying firmware. We can keep a virtual address of the GAIT origin in order to handle processing in the kernel, but when traversing the entries we must again convert the physical AISB stored in that GAIT entry into a virtual address in order to process it. Note: this currently doesn't fix a real bug, since virtual addresses are indentical to physical ones. Reviewed-by: Pierre Morel Acked-by: Nico Boehr Signed-off-by: Matthew Rosato Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220907155952.87356-1-mjrosato@linux.ibm.com Message-Id: <20220907155952.87356-1-mjrosato@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/interrupt.c | 2 +- arch/s390/kvm/pci.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b9c944b262c7..ab569faf0df2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3324,7 +3324,7 @@ static void aen_host_forward(unsigned long si) if (gaite->count == 0) return; if (gaite->aisb != 0) - set_bit_inv(gaite->aisbo, (unsigned long *)gaite->aisb); + set_bit_inv(gaite->aisbo, phys_to_virt(gaite->aisb)); kvm = kvm_s390_pci_si_to_kvm(aift, si); if (!kvm) diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 3c12637ce08c..90aaba80696a 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -71,7 +71,7 @@ static int zpci_setup_aipb(u8 nisc) rc = -ENOMEM; goto free_sbv; } - aift->gait = (struct zpci_gaite *)page_to_phys(page); + aift->gait = (struct zpci_gaite *)page_to_virt(page); zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector); zpci_aipb->aipb.gait = virt_to_phys(aift->gait); From 189e7d876e48d7c791fe1c9c01516f70f5621a9f Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Tue, 20 Sep 2022 15:30:25 -0400 Subject: [PATCH 05/10] KVM: s390: pci: register pci hooks without interpretation The kvm registration hooks must be registered even if the facilities necessary for zPCI interpretation are unavailable, as vfio-pci-zdev will expect to use the hooks regardless. This fixes an issue where vfio-pci-zdev will fail its open function because of a missing kvm_register when running on hardware that does not support zPCI interpretation. Fixes: ca922fecda6c ("KVM: s390: pci: Hook to access KVM lowlevel from VFIO") Signed-off-by: Matthew Rosato Reviewed-by: Pierre Morel Link: https://lore.kernel.org/r/20220920193025.135655-1-mjrosato@linux.ibm.com Message-Id: <20220920193025.135655-1-mjrosato@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/s390/kvm/pci.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index edfd4bbd0cba..b7ef0b71014d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -505,7 +505,7 @@ int kvm_arch_init(void *opaque) goto out; } - if (kvm_s390_pci_interp_allowed()) { + if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { rc = kvm_s390_pci_init(); if (rc) { pr_err("Unable to allocate AIFT for PCI\n"); @@ -527,7 +527,7 @@ int kvm_arch_init(void *opaque) void kvm_arch_exit(void) { kvm_s390_gib_destroy(); - if (kvm_s390_pci_interp_allowed()) + if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) kvm_s390_pci_exit(); debug_unregister(kvm_s390_dbf); debug_unregister(kvm_s390_dbf_uv); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 90aaba80696a..c50c1645c0ae 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -672,23 +672,31 @@ int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args) int kvm_s390_pci_init(void) { + zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; + zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; + + if (!kvm_s390_pci_interp_allowed()) + return 0; + aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL); if (!aift) return -ENOMEM; spin_lock_init(&aift->gait_lock); mutex_init(&aift->aift_lock); - zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; - zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; return 0; } void kvm_s390_pci_exit(void) { - mutex_destroy(&aift->aift_lock); zpci_kvm_hook.kvm_register = NULL; zpci_kvm_hook.kvm_unregister = NULL; + if (!kvm_s390_pci_interp_allowed()) + return; + + mutex_destroy(&aift->aift_lock); + kfree(aift); } From 561cafebb2cf97b0927b4fb0eba22de6200f682e Mon Sep 17 00:00:00 2001 From: Jinrong Liang Date: Tue, 2 Aug 2022 15:12:40 +0800 Subject: [PATCH 06/10] selftests: kvm: Fix a compile error in selftests/kvm/rseq_test.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following warning appears when executing: make -C tools/testing/selftests/kvm rseq_test.c: In function ‘main’: rseq_test.c:237:33: warning: implicit declaration of function ‘gettid’; did you mean ‘getgid’? [-Wimplicit-function-declaration] (void *)(unsigned long)gettid()); ^~~~~~ getgid /usr/bin/ld: /tmp/ccr5mMko.o: in function `main': ../kvm/tools/testing/selftests/kvm/rseq_test.c:237: undefined reference to `gettid' collect2: error: ld returned 1 exit status make: *** [../lib.mk:173: ../kvm/tools/testing/selftests/kvm/rseq_test] Error 1 Use the more compatible syscall(SYS_gettid) instead of gettid() to fix it. More subsequent reuse may cause it to be wrapped in a lib file. Signed-off-by: Jinrong Liang Message-Id: <20220802071240.84626-1-cloudliang@tencent.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/rseq_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index fac248a43666..6f88da7e60be 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -227,7 +227,7 @@ int main(int argc, char *argv[]) ucall_init(vm, NULL); pthread_create(&migration_thread, NULL, migration_worker, - (void *)(unsigned long)gettid()); + (void *)(unsigned long)syscall(SYS_gettid)); for (i = 0; !done; i++) { vcpu_run(vcpu); From 604f533262aea08671a74c2fd630c7deb9eb3c0d Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 7 Sep 2022 16:06:57 +0800 Subject: [PATCH 07/10] KVM: x86/mmu: add missing update to max_mmu_rmap_size The update to statistic max_mmu_rmap_size is unintentionally removed by commit 4293ddb788c1 ("KVM: x86/mmu: Remove redundant spte present check in mmu_set_spte"). Add missing update to it or max_mmu_rmap_size will always be nonsensical 0. Fixes: 4293ddb788c1 ("KVM: x86/mmu: Remove redundant spte present check in mmu_set_spte") Signed-off-by: Miaohe Lin Message-Id: <20220907080657.42898-1-linmiaohe@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e418ef3ecfcb..3552e6af3684 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1596,6 +1596,8 @@ static void __rmap_add(struct kvm *kvm, rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); rmap_count = pte_list_add(cache, spte, rmap_head); + if (rmap_count > kvm->stat.max_mmu_rmap_size) + kvm->stat.max_mmu_rmap_size = rmap_count; if (rmap_count > RMAP_RECYCLE_THRESHOLD) { kvm_zap_all_rmap_sptes(kvm, rmap_head); kvm_flush_remote_tlbs_with_address( From ee519b3a2ae3027c341bce829ee8c51f4f494f5b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 24 Aug 2022 03:30:55 +0000 Subject: [PATCH 08/10] KVM: x86: Reinstate kvm_vcpu_arch.guest_supported_xcr0 Reinstate the per-vCPU guest_supported_xcr0 by partially reverting commit 988896bb6182; the implicit assessment that guest_supported_xcr0 is always the same as guest_fpu.fpstate->user_xfeatures was incorrect. kvm_vcpu_after_set_cpuid() isn't the only place that sets user_xfeatures, as user_xfeatures is set to fpu_user_cfg.default_features when guest_fpu is allocated via fpu_alloc_guest_fpstate() => __fpstate_reset(). guest_supported_xcr0 on the other hand is zero-allocated. If userspace never invokes KVM_SET_CPUID2, supported XCR0 will be '0', whereas the allowed user XFEATURES will be non-zero. Practically speaking, the edge case likely doesn't matter as no sane userspace will live migrate a VM without ever doing KVM_SET_CPUID2. The primary motivation is to prepare for KVM intentionally and explicitly setting bits in user_xfeatures that are not set in guest_supported_xcr0. Because KVM_{G,S}ET_XSAVE can be used to svae/restore FP+SSE state even if the host doesn't support XSAVE, KVM needs to set the FP+SSE bits in user_xfeatures even if they're not allowed in XCR0, e.g. because XCR0 isn't exposed to the guest. At that point, the simplest fix is to track the two things separately (allowed save/restore vs. allowed XCR0). Fixes: 988896bb6182 ("x86/kvm/fpu: Remove kvm_vcpu_arch.guest_supported_xcr0") Cc: stable@vger.kernel.org Cc: Leonardo Bras Signed-off-by: Sean Christopherson Message-Id: <20220824033057.3576315-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 5 ++--- arch/x86/kvm/x86.c | 9 ++------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2c96c43c313a..aa381ab69a19 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -729,6 +729,7 @@ struct kvm_vcpu_arch { struct fpu_guest guest_fpu; u64 xcr0; + u64 guest_supported_xcr0; struct kvm_pio_request pio; void *pio_data; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 75dcf7a72605..2e0f27ad736a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -315,7 +315,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_cpuid_entry2 *best; - u64 guest_supported_xcr0; best = kvm_find_cpuid_entry(vcpu, 1); if (best && apic) { @@ -327,10 +326,10 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - guest_supported_xcr0 = + vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); - vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0; + vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0; kvm_update_pv_runtime(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43a6a7efc6ec..c95cf18a796c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1011,15 +1011,10 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state); -static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.guest_fpu.fpstate->user_xfeatures; -} - #ifdef CONFIG_X86_64 static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu) { - return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC; + return vcpu->arch.guest_supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC; } #endif @@ -1042,7 +1037,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see kvm_vcpu_reset()). */ - valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP; + valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1; From a1020a25e69755a8a1a37735d674b91d6f02939f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 24 Aug 2022 03:30:56 +0000 Subject: [PATCH 09/10] KVM: x86: Always enable legacy FP/SSE in allowed user XFEATURES Allow FP and SSE state to be saved and restored via KVM_{G,SET}_XSAVE on XSAVE-capable hosts even if their bits are not exposed to the guest via XCR0. Failing to allow FP+SSE first showed up as a QEMU live migration failure, where migrating a VM from a pre-XSAVE host, e.g. Nehalem, to an XSAVE host failed due to KVM rejecting KVM_SET_XSAVE. However, the bug also causes problems even when migrating between XSAVE-capable hosts as KVM_GET_SAVE won't set any bits in user_xfeatures if XSAVE isn't exposed to the guest, i.e. KVM will fail to actually migrate FP+SSE. Because KVM_{G,S}ET_XSAVE are designed to allowing migrating between hosts with and without XSAVE, KVM_GET_XSAVE on a non-XSAVE (by way of fpu_copy_guest_fpstate_to_uabi()) always sets the FP+SSE bits in the header so that KVM_SET_XSAVE will work even if the new host supports XSAVE. Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") bz: https://bugzilla.redhat.com/show_bug.cgi?id=2079311 Cc: stable@vger.kernel.org Cc: Leonardo Bras Signed-off-by: Dr. David Alan Gilbert [sean: add comment, massage changelog] Signed-off-by: Sean Christopherson Message-Id: <20220824033057.3576315-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2e0f27ad736a..4c1c2c06e96b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -329,7 +329,13 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); - vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0; + /* + * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if + * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't + * supported by the host. + */ + vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | + XFEATURE_MASK_FPSSE; kvm_update_pv_runtime(vcpu); From 50b2d49bafa16e6311ab2da82f5aafc5f9ada99b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 24 Aug 2022 03:30:57 +0000 Subject: [PATCH 10/10] KVM: x86: Inject #UD on emulated XSETBV if XSAVES isn't enabled Inject #UD when emulating XSETBV if CR4.OSXSAVE is not set. This also covers the "XSAVE not supported" check, as setting CR4.OSXSAVE=1 #GPs if XSAVE is not supported (and userspace gets to keep the pieces if it forces incoherent vCPU state). Add a comment to kvm_emulate_xsetbv() to call out that the CPU checks CR4.OSXSAVE before checking for intercepts. AMD'S APM implies that #UD has priority (says that intercepts are checked before #GP exceptions), while Intel's SDM says nothing about interception priority. However, testing on hardware shows that both AMD and Intel CPUs prioritize the #UD over interception. Fixes: 02d4160fbd76 ("x86: KVM: add xsetbv to the emulator") Cc: stable@vger.kernel.org Cc: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Message-Id: <20220824033057.3576315-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 +++ arch/x86/kvm/x86.c | 1 + 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d5ec3a2ed5a4..aacb28c83e43 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4132,6 +4132,9 @@ static int em_xsetbv(struct x86_emulate_ctxt *ctxt) { u32 eax, ecx, edx; + if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE)) + return emulate_ud(ctxt); + eax = reg_read(ctxt, VCPU_REGS_RAX); edx = reg_read(ctxt, VCPU_REGS_RDX); ecx = reg_read(ctxt, VCPU_REGS_RCX); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c95cf18a796c..b0c47b41c264 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1065,6 +1065,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { + /* Note, #UD due to CR4.OSXSAVE=0 has priority over the intercept. */ if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) { kvm_inject_gp(vcpu, 0);