mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-13 22:19:17 -04:00
Merge tag 'kvm-x86-sev-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM SEV cache maintenance changes for 6.17 - Drop a superfluous WBINVD (on all CPUs!) when destroying a VM. - Use WBNOINVD instead of WBINVD when possible, for SEV cache maintenance, e.g. to minimize collateral damage when reclaiming memory from an SEV guest. - When reclaiming memory from an SEV guest, only do cache flushes on CPUs that have ever run a vCPU for the guest, i.e. don't flush the caches for CPUs that can't possibly have cache lines with dirty, encrypted data.
This commit is contained in:
@@ -117,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
|
||||
*/
|
||||
down_write(&sev_deactivate_lock);
|
||||
|
||||
/* SNP firmware requires use of WBINVD for ASID recycling. */
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
if (sev_snp_enabled)
|
||||
@@ -446,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
||||
init_args.probe = false;
|
||||
ret = sev_platform_init(&init_args);
|
||||
if (ret)
|
||||
goto e_free;
|
||||
goto e_free_asid;
|
||||
|
||||
if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
|
||||
ret = -ENOMEM;
|
||||
goto e_free_asid;
|
||||
}
|
||||
|
||||
/* This needs to happen after SEV/SNP firmware initialization. */
|
||||
if (vm_type == KVM_X86_SNP_VM) {
|
||||
@@ -464,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
||||
return 0;
|
||||
|
||||
e_free:
|
||||
free_cpumask_var(sev->have_run_cpus);
|
||||
e_free_asid:
|
||||
argp->error = init_args.error;
|
||||
sev_asid_free(sev);
|
||||
sev->asid = 0;
|
||||
@@ -708,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
|
||||
}
|
||||
}
|
||||
|
||||
static void sev_writeback_caches(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* Note, the caller is responsible for ensuring correctness if the mask
|
||||
* can be modified, e.g. if a CPU could be doing VMRUN.
|
||||
*/
|
||||
if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Ensure that all dirty guest tagged cache entries are written back
|
||||
* before releasing the pages back to the system for use. CLFLUSH will
|
||||
* not do this without SME_COHERENT, and flushing many cache lines
|
||||
* individually is slower than blasting WBINVD for large VMs, so issue
|
||||
* WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
|
||||
* on CPUs that have done VMRUN, i.e. may have dirtied data using the
|
||||
* VM's ASID.
|
||||
*
|
||||
* For simplicity, never remove CPUs from the bitmap. Ideally, KVM
|
||||
* would clear the mask when flushing caches, but doing so requires
|
||||
* serializing multiple calls and having responding CPUs (to the IPI)
|
||||
* mark themselves as still running if they are running (or about to
|
||||
* run) a vCPU for the VM.
|
||||
*/
|
||||
wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus);
|
||||
}
|
||||
|
||||
static unsigned long get_num_contig_pages(unsigned long idx,
|
||||
struct page **inpages, unsigned long npages)
|
||||
{
|
||||
@@ -2037,6 +2072,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
|
||||
if (ret)
|
||||
goto out_source_vcpu;
|
||||
|
||||
/*
|
||||
* Allocate a new have_run_cpus for the destination, i.e. don't copy
|
||||
* the set of CPUs from the source. If a CPU was used to run a vCPU in
|
||||
* the source VM but is never used for the destination VM, then the CPU
|
||||
* can only have cached memory that was accessible to the source VM.
|
||||
*/
|
||||
if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
|
||||
ret = -ENOMEM;
|
||||
goto out_source_vcpu;
|
||||
}
|
||||
|
||||
sev_migrate_from(kvm, source_kvm);
|
||||
kvm_vm_dead(source_kvm);
|
||||
cg_cleanup_sev = src_sev;
|
||||
@@ -2694,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
|
||||
goto failed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that all guest tagged cache entries are flushed before
|
||||
* releasing the pages back to the system for use. CLFLUSH will
|
||||
* not do this, so issue a WBINVD.
|
||||
*/
|
||||
wbinvd_on_all_cpus();
|
||||
sev_writeback_caches(kvm);
|
||||
|
||||
__unregister_enc_region_locked(kvm, region);
|
||||
|
||||
@@ -2741,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
|
||||
goto e_unlock;
|
||||
}
|
||||
|
||||
mirror_sev = to_kvm_sev_info(kvm);
|
||||
if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
|
||||
ret = -ENOMEM;
|
||||
goto e_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* The mirror kvm holds an enc_context_owner ref so its asid can't
|
||||
* disappear until we're done with it
|
||||
*/
|
||||
source_sev = to_kvm_sev_info(source_kvm);
|
||||
kvm_get_kvm(source_kvm);
|
||||
mirror_sev = to_kvm_sev_info(kvm);
|
||||
list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
|
||||
|
||||
/* Set enc_context_owner and copy its encryption context over */
|
||||
@@ -2809,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm)
|
||||
|
||||
WARN_ON(!list_empty(&sev->mirror_vms));
|
||||
|
||||
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
|
||||
free_cpumask_var(sev->have_run_cpus);
|
||||
|
||||
/*
|
||||
* If this is a mirror VM, remove it from the owner's list of a mirrors
|
||||
* and skip ASID cleanup (the ASID is tied to the lifetime of the owner).
|
||||
* Note, mirror VMs don't support registering encrypted regions.
|
||||
*/
|
||||
if (is_mirroring_enc_context(kvm)) {
|
||||
struct kvm *owner_kvm = sev->enc_context_owner;
|
||||
|
||||
@@ -2820,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that all guest tagged cache entries are flushed before
|
||||
* releasing the pages back to the system for use. CLFLUSH will
|
||||
* not do this, so issue a WBINVD.
|
||||
*/
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
/*
|
||||
* if userspace was terminated before unregistering the memory regions
|
||||
@@ -3095,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
|
||||
|
||||
/*
|
||||
* VM Page Flush takes a host virtual address and a guest ASID. Fall
|
||||
* back to WBINVD if this faults so as not to make any problems worse
|
||||
* by leaving stale encrypted data in the cache.
|
||||
* back to full writeback of caches if this faults so as not to make
|
||||
* any problems worse by leaving stale encrypted data in the cache.
|
||||
*/
|
||||
if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
|
||||
goto do_wbinvd;
|
||||
goto do_sev_writeback_caches;
|
||||
|
||||
return;
|
||||
|
||||
do_wbinvd:
|
||||
wbinvd_on_all_cpus();
|
||||
do_sev_writeback_caches:
|
||||
sev_writeback_caches(vcpu->kvm);
|
||||
}
|
||||
|
||||
void sev_guest_memory_reclaimed(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* With SNP+gmem, private/encrypted memory is unreachable via the
|
||||
* hva-based mmu notifiers, so these events are only actually
|
||||
* pertaining to shared pages where there is no need to perform
|
||||
* the WBINVD to flush associated caches.
|
||||
* hva-based mmu notifiers, i.e. these events are explicitly scoped to
|
||||
* shared pages, where there's no need to flush caches.
|
||||
*/
|
||||
if (!sev_guest(kvm) || sev_snp_guest(kvm))
|
||||
return;
|
||||
|
||||
wbinvd_on_all_cpus();
|
||||
sev_writeback_caches(kvm);
|
||||
}
|
||||
|
||||
void sev_free_vcpu(struct kvm_vcpu *vcpu)
|
||||
@@ -3450,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
|
||||
if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* To optimize cache flushes when memory is reclaimed from an SEV VM,
|
||||
* track physical CPUs that enter the guest for SEV VMs and thus can
|
||||
* have encrypted, dirty data in the cache, and flush caches only for
|
||||
* CPUs that have entered the guest.
|
||||
*/
|
||||
if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
|
||||
cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);
|
||||
|
||||
/* Assign the asid allocated with this SEV guest */
|
||||
svm->asid = asid;
|
||||
|
||||
@@ -3882,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
|
||||
* From this point forward, the VMSA will always be a guest-mapped page
|
||||
* rather than the initial one allocated by KVM in svm->sev_es.vmsa. In
|
||||
* theory, svm->sev_es.vmsa could be free'd and cleaned up here, but
|
||||
* that involves cleanups like wbinvd_on_all_cpus() which would ideally
|
||||
* be handled during teardown rather than guest boot. Deferring that
|
||||
* also allows the existing logic for SEV-ES VMSAs to be re-used with
|
||||
* that involves cleanups like flushing caches, which would ideally be
|
||||
* handled during teardown rather than guest boot. Deferring that also
|
||||
* allows the existing logic for SEV-ES VMSAs to be re-used with
|
||||
* minimal SNP-specific changes.
|
||||
*/
|
||||
svm->sev_es.snp_has_guest_vmsa = true;
|
||||
@@ -4875,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
|
||||
|
||||
/*
|
||||
* SEV-ES avoids host/guest cache coherency issues through
|
||||
* WBINVD hooks issued via MMU notifiers during run-time, and
|
||||
* WBNOINVD hooks issued via MMU notifiers during run-time, and
|
||||
* KVM's VM destroy path at shutdown. Those MMU notifier events
|
||||
* don't cover gmem since there is no requirement to map pages
|
||||
* to a HVA in order to use them for a running guest. While the
|
||||
|
||||
@@ -110,6 +110,7 @@ struct kvm_sev_info {
|
||||
void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */
|
||||
void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
|
||||
struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
|
||||
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
|
||||
};
|
||||
|
||||
#define SEV_POLICY_NODBG BIT_ULL(0)
|
||||
|
||||
@@ -4994,11 +4994,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
return r;
|
||||
}
|
||||
|
||||
static void wbinvd_ipi(void *garbage)
|
||||
{
|
||||
wbinvd();
|
||||
}
|
||||
|
||||
static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_arch_has_noncoherent_dma(vcpu->kvm);
|
||||
@@ -5022,8 +5017,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
if (kvm_x86_call(has_wbinvd_exit)())
|
||||
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
|
||||
else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
|
||||
smp_call_function_single(vcpu->cpu,
|
||||
wbinvd_ipi, NULL, 1);
|
||||
wbinvd_on_cpu(vcpu->cpu);
|
||||
}
|
||||
|
||||
kvm_x86_call(vcpu_load)(vcpu, cpu);
|
||||
|
||||
Reference in New Issue
Block a user