Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "arm64:

   - Add the pKVM side of the workaround for ARM's erratum 4193714,
     provided that the EL3 firmware does its part of the job. KVM will
     refuse to initialise otherwise

   - Correctly handle 52bit VAs for guest EL2 stage-1 translations when
     running under NV with E2H==0

   - Correctly deal with permission faults in guest_memfd memslots

   - Fix the steal-time selftest after the infrastructure was reworked

   - Make sure the host cannot pass a non-sensical clock update to the
     EL2 tracing infrastructure

   - Appoint Steffen Eiden as a reviewer in anticipation of the KVM/s390
     ability to run arm64 guests, which will inevitably lead to arm64
     code being directly used on s390

   - Make sure that EL2 is configured with both exception entry and exit
     being Context Synchronization Events

   - Handle the current vcpu being NULL on EL2 panic

   - Fix the selftest_vcpu memcache being empty at the point of donation
     or sharing

   - Check that the memcache has enough capacity before engaging on the
     share/donate path

   - Fix __deactivate_fgt() to use its parameter rather than a variable
     in the macro context

  s390:

   - Fix array overrun with large amounts of PCI devices

  x86:

   - Never use L0's PAUSE loop exiting while L2 is running, since it's
     unlikely that a nested guest will help solving the hypervisor's
     spinlock contention

   - Fix emulation of MOVNTDQA

   - Fix typo in Xen hypercall tracepoint

   - Add back an optimization that was left behind when recently fixing
     a bug

   - Add module parameter to disable CET, whose implementation seems to
     have issues. For now it remains enabled by default

  Generic:

   - Reject offset causing an unsigned overflow in kvm_reset_dirty_gfn()

  Documentation:

   - Update stale links

  Selftests:

   - Fix guest_memfd_test with host page size > guest page size"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits)
  KVM: VMX: introduce module parameter to disable CET
  KVM: x86: Swap the dst and src operand for MOVNTDQA
  KVM: x86: use again the flush argument of __link_shadow_page()
  KVM: selftests: Ensure gmem file sizes are multiple of host page size
  Documentation: kvm: update links in the references section of AMD Memory Encryption
  KVM: nSVM: Never use L0's PAUSE loop exiting while L2 is running
  KVM: x86: Fix Xen hypercall tracepoint argument assignment
  KVM: Reject wrapped offset in kvm_reset_dirty_gfn()
  KVM: arm64: Pre-check vcpu memcache for host->guest donate
  KVM: arm64: Pre-check vcpu memcache for host->guest share
  KVM: arm64: Seed pkvm_ownership_selftest vcpu memcache
  KVM: arm64: Fix __deactivate_fgt macro parameter typo
  KVM: arm64: Guard against NULL vcpu on VHE hyp panic path
  KVM: arm64: Make EL2 exception entry and exit context-synchronization events
  MAINTAINERS: Add Steffen as reviewer for KVM/arm64
  KVM: arm64: Remove potential UB on nvhe tracing clock update
  KVM: selftests: arm64: Fix steal_time test after UAPI refactoring
  KVM: arm64: Handle permission faults with guest_memfd
  KVM: arm64: nv: Consider the DS bit when translating TCR_EL2
  KVM: arm64: Work around C1-Pro erratum 4193714 for protected guests
  ...
This commit is contained in:
Linus Torvalds
2026-05-13 11:53:51 -07:00
24 changed files with 204 additions and 65 deletions

View File

@@ -656,8 +656,8 @@ References
See [white-paper]_, [api-spec]_, [amd-apm]_, [kvm-forum]_, and [snp-fw-abi]_
for more info.
.. [white-paper] https://developer.amd.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
.. [api-spec] https://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
.. [amd-apm] https://support.amd.com/TechDocs/24593.pdf (section 15.34)
.. [white-paper] https://docs.amd.com/v/u/en-US/memory-encryption-white-paper
.. [api-spec] https://docs.amd.com/v/u/en-US/55766_PUB_3.24_SEV_API
.. [amd-apm] https://docs.amd.com/v/u/en-US/24593_3.44_APM_Vol2 (section 15.34)
.. [kvm-forum] https://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
.. [snp-fw-abi] https://www.amd.com/system/files/TechDocs/56860.pdf
.. [snp-fw-abi] https://www.amd.com/content/dam/amd/en/documents/developer/56860.pdf

View File

@@ -14055,6 +14055,7 @@ KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
M: Oliver Upton <oupton@kernel.org>
R: Joey Gouly <joey.gouly@arm.com>
R: Steffen Eiden <seiden@linux.ibm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)

View File

@@ -23,6 +23,7 @@ static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
{
return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
((tcr & TCR_EL2_DS) ? TCR_DS : 0) |
((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
tcr_el2_ps_to_tcr_el1_ips(tcr) |
(tcr & TCR_EL2_TG0_MASK) |

View File

@@ -844,7 +844,7 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
SCTLR_ELx_ITFSB | SCTLR_ELx_EIS | SCTLR_ELx_EOS | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)

View File

@@ -4,6 +4,7 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
@@ -2638,6 +2639,22 @@ static int init_pkvm_host_sve_state(void)
return 0;
}
static int pkvm_check_sme_dvmsync_fw_call(void)
{
struct arm_smccc_res res;
if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714))
return 0;
arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
if (res.a0) {
kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n");
return -ENODEV;
}
return 0;
}
/*
* Finalizes the initialization of hyp mode, once everything else is initialized
* and the initialziation process cannot fail.
@@ -2838,6 +2855,10 @@ static int __init init_hyp_mode(void)
if (err)
goto out_err;
err = pkvm_check_sme_dvmsync_fw_call();
if (err)
goto out_err;
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");

View File

@@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
__activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
}
#define __deactivate_fgt(htcxt, vcpu, reg) \
#define __deactivate_fgt(hctxt, vcpu, reg) \
do { \
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
SYS_ ## reg); \

View File

@@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
struct clock_data *clock = &trace_clock_data;
u64 bank = clock->cur ^ 1;
if (!mult || shift >= 64)
return;
clock->data[bank].mult = mult;
clock->data[bank].shift = shift;
clock->data[bank].epoch_ns = epoch_ns;

View File

@@ -5,6 +5,7 @@
*/
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -14,6 +15,7 @@
#include <hyp/fault.h>
#include <nvhe/arm-smccc.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool;
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
#define current_vm (*this_cpu_ptr(&__current_vm))
static void pkvm_sme_dvmsync_fw_call(void)
{
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
struct arm_smccc_res res;
/*
* Ignore the return value. Probing for the workaround
* availability took place in init_hyp_mode().
*/
hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
}
}
static void guest_lock_component(struct pkvm_hyp_vm *vm)
{
hyp_spin_lock(&vm->lock);
@@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
addr, size, &host_s2_pool,
KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
if (!ret)
if (!ret) {
/*
* After stage2 maintenance has happened, but before the page
* owner has changed.
*/
pkvm_sme_dvmsync_fw_call();
__host_update_page_state(addr, size, PKVM_NOPAGE);
}
return ret;
}
@@ -1369,6 +1390,22 @@ int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
return ret && ret != -EHWPOISON ? ret : 0;
}
/*
* share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
* KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
* bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
* conservative by one for the block.
*/
static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
return -ENOMEM;
return 0;
}
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
@@ -1388,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
if (ret)
goto unlock;
ret = __guest_check_pgtable_memcache(vcpu);
if (ret)
goto unlock;
meta = host_stage2_encode_gfn_meta(vm, gfn);
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
PKVM_ID_GUEST, meta));
@@ -1453,6 +1494,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
}
}
ret = __guest_check_pgtable_memcache(vcpu);
if (ret)
goto unlock;
for_each_hyp_page(page, phys, size) {
set_host_state(page, PKVM_PAGE_SHARED_OWNED);
page->host_share_guest_count++;

View File

@@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = {
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt)
{
struct hyp_page *p = hyp_virt_to_page(virt);
unsigned long min_pages, seeded = 0;
int i;
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
/*
* Mirror pkvm_refill_memcache() for the share/donate pre-checks;
* the selftest invokes those functions directly and would
* otherwise see an empty memcache.
*/
min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu);
for (i = 0; i < pkvm_selftest_pages(); i++) {
if (p[i].refcount)
continue;
p[i].refcount = 1;
hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
if (seeded < min_pages) {
push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache,
hyp_page_to_virt(&p[i]), hyp_virt_to_phys);
seeded++;
} else {
hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
}
}
selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm();

View File

@@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par)
host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
if (vcpu)
__deactivate_traps(vcpu);
sysreg_restore_host_state_vhe(host_ctxt);
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n",

View File

@@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc {
static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
{
bool write_fault, exec_fault;
bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
unsigned long mmu_seq;
struct page *page;
struct kvm *kvm = s2fd->vcpu->kvm;
void *memcache;
void *memcache = NULL;
kvm_pfn_t pfn;
gfn_t gfn;
int ret;
memcache = get_mmu_memcache(s2fd->vcpu);
ret = topup_mmu_memcache(s2fd->vcpu, memcache);
if (ret)
return ret;
if (!perm_fault) {
memcache = get_mmu_memcache(s2fd->vcpu);
ret = topup_mmu_memcache(s2fd->vcpu, memcache);
if (ret)
return ret;
}
if (s2fd->nested)
gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
@@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
goto out_unlock;
}
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
__pfn_to_phys(pfn), prot,
memcache, flags);
if (perm_fault) {
/*
* Drop the SW bits in favour of those stored in the
* PTE, which will be preserved.
*/
prot &= ~KVM_NV_GUEST_MAP_SZ;
ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa,
prot, flags);
} else {
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
__pfn_to_phys(pfn), prot,
memcache, flags);
}
out_unlock:
kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);

View File

@@ -3310,8 +3310,7 @@ static void aen_host_forward(unsigned long si)
struct zpci_gaite *gaite;
struct kvm *kvm;
gaite = (struct zpci_gaite *)aift->gait +
(si * sizeof(struct zpci_gaite));
gaite = aift->gait + si;
if (gaite->count == 0)
return;
if (gaite->aisb != 0)

View File

@@ -166,7 +166,7 @@ static int kvm_zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.noi = airq_iv_end(zdev->aibv);
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
fib.fmt0.aibvo = 0;
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
fib.gd = zdev->gisa;
@@ -290,8 +290,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
phys_to_virt(fib->fmt0.aibv));
spin_lock_irq(&aift->gait_lock);
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
sizeof(struct zpci_gaite));
gaite = aift->gait + zdev->aisb;
/* If assist not requested, host will get all alerts */
if (assist)
@@ -309,7 +308,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
/* Update guest FIB for re-issue */
fib->fmt0.aisbo = zdev->aisb & 63;
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
fib->fmt0.isc = gisc;
/* Save some guest fib values in the host for later use */
@@ -357,8 +356,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
if (zdev->kzdev->fib.fmt0.aibv == 0)
goto out;
spin_lock_irq(&aift->gait_lock);
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
sizeof(struct zpci_gaite));
gaite = aift->gait + zdev->aisb;
isc = gaite->gisc;
gaite->count--;
if (gaite->count == 0) {

View File

@@ -4481,7 +4481,7 @@ static const struct opcode opcode_map_0f_38[256] = {
X16(N), X16(N),
/* 0x20 - 0x2f */
X8(N),
X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
X2(N), GP(SrcMem | DstReg | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
/* 0x30 - 0x7f */
X16(N), X16(N), X16(N), X16(N), X16(N),
/* 0x80 - 0xef */

View File

@@ -2526,6 +2526,23 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
__shadow_walk_next(iterator, *iterator->sptep);
}
/*
* Note: while normally KVM uses a "bool flush" return value to let
* the caller batch flushes, __link_shadow_page() flushes immediately
* before populating the parent PTE with the new shadow page. The
* typical callers, direct_map() and FNAME(fetch)(), are not going
* to zap more than one huge SPTE anyway.
*
* The only exception, where @flush can be false, is when a huge SPTE
* is replaced with a shadow page SPTE with a fully populated page table,
* which can happen from shadow_mmu_split_huge_page(). In this case,
* no memory is unmapped across the change to the page tables and no
* immediate flush is needed for correctness.
*
* Even in that case, calls to kvm_mmu_commit_zap_page() are not
* batched. Doing so would require adding an invalid_list argument
* all the way down to __walk_slot_rmaps().
*/
static void __link_shadow_page(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache, u64 *sptep,
struct kvm_mmu_page *sp, bool flush)
@@ -2541,8 +2558,10 @@ static void __link_shadow_page(struct kvm *kvm,
parent_sp = sptep_to_sp(sptep);
WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K);
mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list);
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true);
if (mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list))
kvm_mmu_commit_zap_page(kvm, &invalid_list);
else if (flush)
kvm_flush_remote_tlbs_sptep(kvm, sptep);
}
spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));

View File

@@ -160,6 +160,16 @@ void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm)
if (!intercept_smi)
vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI);
/*
* Intercept PAUSE if and only if L1 wants to. KVM intercepts PAUSE so
* that a vCPU that may be spinning waiting for a lock can be scheduled
* out in favor of the vCPU that holds said lock. KVM doesn't support
* yielding across L2 vCPUs, as KVM has limited visilibity into which
* L2 vCPUs are in the same L2 VM, i.e. may be contending for locks.
*/
if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE))
vmcb_clr_intercept(&vmcb02->control, INTERCEPT_PAUSE);
if (nested_vmcb_needs_vls_intercept(svm)) {
/*
* If the virtual VMLOAD/VMSAVE is not enabled for the L2,
@@ -819,7 +829,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 pause_count12, pause_thresh12;
nested_svm_transition_tlb_flush(vcpu);
@@ -947,31 +956,13 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
pause_count12 = vmcb12_ctrl->pause_filter_count;
vmcb02->control.pause_filter_count = vmcb12_ctrl->pause_filter_count;
else
pause_count12 = 0;
vmcb02->control.pause_filter_count = 0;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
pause_thresh12 = vmcb12_ctrl->pause_filter_thresh;
vmcb02->control.pause_filter_thresh = vmcb12_ctrl->pause_filter_thresh;
else
pause_thresh12 = 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
/* use guest values since host doesn't intercept PAUSE */
vmcb02->control.pause_filter_count = pause_count12;
vmcb02->control.pause_filter_thresh = pause_thresh12;
} else {
/* start from host values otherwise */
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
/* ... but ensure filtering is disabled if so requested. */
if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) {
if (!pause_count12)
vmcb02->control.pause_filter_count = 0;
if (!pause_thresh12)
vmcb02->control.pause_filter_thresh = 0;
}
}
vmcb02->control.pause_filter_thresh = 0;
/*
* Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
@@ -1298,12 +1289,6 @@ void nested_svm_vmexit(struct vcpu_svm *svm)
/* in case we halted in L2 */
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
if (!kvm_pause_in_guest(vcpu->kvm)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
}
/*
* Invalidate last_bus_lock_rip unless KVM is still waiting for the
* guest to make forward progress before re-enabling bus lock detection.

View File

@@ -913,7 +913,15 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
if (kvm_pause_in_guest(vcpu->kvm))
/* Adjusting pause_filter_count makes no sense if PLE is disabled. */
WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
/*
* While running L2, KVM should intercept PAUSE if and only if L1 wants
* to intercept PAUSE, and L1's intercept should take priority, i.e.
* KVM should never handle a PAUSE intercept from L2.
*/
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return;
control->pause_filter_count = __grow_ple_window(old,
@@ -934,7 +942,10 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
if (kvm_pause_in_guest(vcpu->kvm))
/* Adjusting pause_filter_count makes no sense if PLE is disabled. */
WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
if (is_guest_mode(vcpu))
return;
control->pause_filter_count =

View File

@@ -154,7 +154,7 @@ TRACE_EVENT(kvm_xen_hypercall,
__entry->a2 = a2;
__entry->a3 = a3;
__entry->a4 = a4;
__entry->a4 = a5;
__entry->a5 = a5;
),
TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",

View File

@@ -14,6 +14,7 @@ extern bool __read_mostly flexpriority_enabled;
extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_cet;
extern bool __read_mostly enable_pml;
extern int __read_mostly pt_mode;

View File

@@ -108,6 +108,9 @@ module_param_named(unrestricted_guest,
bool __read_mostly enable_ept_ad_bits = 1;
module_param_named(eptad, enable_ept_ad_bits, bool, 0444);
bool __read_mostly enable_cet = 1;
module_param_named(cet, enable_cet, bool, 0444);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, 0444);
@@ -4476,7 +4479,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
* SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter
* 3 and 4 for details.
*/
if (cpu_has_load_cet_ctrl()) {
if (enable_cet) {
vmcs_writel(HOST_S_CET, kvm_host.s_cet);
vmcs_writel(HOST_SSP, 0);
vmcs_writel(HOST_INTR_SSP_TABLE, 0);
@@ -4532,6 +4535,10 @@ static u32 vmx_get_initial_vmentry_ctrl(void)
if (vmx_pt_mode_is_system())
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL);
if (!enable_cet)
vmentry_ctrl &= ~VM_ENTRY_LOAD_CET_STATE;
/*
* IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
*/
@@ -4546,6 +4553,9 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
if (!enable_cet)
vmexit_ctrl &= ~VM_EXIT_LOAD_CET_STATE;
/*
* Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
* nested virtualization and thus allowed to be set in vmcs12.
@@ -8155,7 +8165,7 @@ static __init void vmx_set_cpu_caps(void)
* VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
* fails, so disable CET in this case too.
*/
if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
if (!enable_cet || !enable_unrestricted_guest ||
!cpu_has_vmx_basic_no_hw_errcode_cc()) {
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
@@ -8630,6 +8640,9 @@ __init int vmx_hardware_setup(void)
!cpu_has_vmx_invept_global())
enable_ept = 0;
if (!cpu_has_load_cet_ctrl())
enable_cet = 0;
/* NX support is required for shadow paging. */
if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
pr_err_ratelimited("NX (Execute Disable) not supported\n");

View File

@@ -105,6 +105,12 @@
ARM_SMCCC_SMC_32, \
0, 0x3fff)
/* C1-Pro erratum 4193714: SME DVMSync early acknowledgement */
#define ARM_SMCCC_CPU_WORKAROUND_4193714 \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_32, \
ARM_SMCCC_OWNER_CPU, 0x10)
#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_32, \

View File

@@ -510,7 +510,12 @@ static void test_guest_memfd_guest(void)
"Default VM type should support INIT_SHARED, supported flags = 0x%x",
vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
size = vm->page_size;
/*
* Use the max of the host or guest page size for all operations, as
* KVM requires guest_memfd files and memslots to be sized to multiples
* of the host page size.
*/
size = max_t(size_t, vm->page_size, page_size);
fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
GUEST_MEMFD_FLAG_INIT_SHARED);
vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
@@ -519,7 +524,7 @@ static void test_guest_memfd_guest(void)
memset(mem, 0xaa, size);
kvm_munmap(mem, size);
virt_pg_map(vm, gpa, gpa);
virt_map(vm, gpa, gpa, size / vm->page_size);
vcpu_args_set(vcpu, 2, gpa, size);
vcpu_run(vcpu);

View File

@@ -220,6 +220,8 @@ static void check_steal_time_uapi(void)
};
vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0);
virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1);
st_ipa = (ulong)ST_GPA_BASE | 1;
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);

View File

@@ -63,7 +63,8 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
if (!memslot || (offset + __fls(mask)) >= memslot->npages)
if (!memslot || offset >= memslot->npages ||
offset + __fls(mask) >= memslot->npages)
return;
KVM_MMU_LOCK(kvm);