mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-15 22:31:47 -04:00
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"arm64:
- Add the pKVM side of the workaround for ARM's erratum 4193714,
provided that the EL3 firmware does its part of the job. KVM will
refuse to initialise otherwise
- Correctly handle 52bit VAs for guest EL2 stage-1 translations when
running under NV with E2H==0
- Correctly deal with permission faults in guest_memfd memslots
- Fix the steal-time selftest after the infrastructure was reworked
- Make sure the host cannot pass a non-sensical clock update to the
EL2 tracing infrastructure
- Appoint Steffen Eiden as a reviewer in anticipation of the KVM/s390
ability to run arm64 guests, which will inevitably lead to arm64
code being directly used on s390
- Make sure that EL2 is configured with both exception entry and exit
being Context Synchronization Events
- Handle the current vcpu being NULL on EL2 panic
- Fix the selftest_vcpu memcache being empty at the point of donation
or sharing
- Check that the memcache has enough capacity before engaging on the
share/donate path
- Fix __deactivate_fgt() to use its parameter rather than a variable
in the macro context
s390:
- Fix array overrun with large amounts of PCI devices
x86:
- Never use L0's PAUSE loop exiting while L2 is running, since it's
unlikely that a nested guest will help solving the hypervisor's
spinlock contention
- Fix emulation of MOVNTDQA
- Fix typo in Xen hypercall tracepoint
- Add back an optimization that was left behind when recently fixing
a bug
- Add module parameter to disable CET, whose implementation seems to
have issues. For now it remains enabled by default
Generic:
- Reject offset causing an unsigned overflow in kvm_reset_dirty_gfn()
Documentation:
- Update stale links
Selftests:
- Fix guest_memfd_test with host page size > guest page size"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits)
KVM: VMX: introduce module parameter to disable CET
KVM: x86: Swap the dst and src operand for MOVNTDQA
KVM: x86: use again the flush argument of __link_shadow_page()
KVM: selftests: Ensure gmem file sizes are multiple of host page size
Documentation: kvm: update links in the references section of AMD Memory Encryption
KVM: nSVM: Never use L0's PAUSE loop exiting while L2 is running
KVM: x86: Fix Xen hypercall tracepoint argument assignment
KVM: Reject wrapped offset in kvm_reset_dirty_gfn()
KVM: arm64: Pre-check vcpu memcache for host->guest donate
KVM: arm64: Pre-check vcpu memcache for host->guest share
KVM: arm64: Seed pkvm_ownership_selftest vcpu memcache
KVM: arm64: Fix __deactivate_fgt macro parameter typo
KVM: arm64: Guard against NULL vcpu on VHE hyp panic path
KVM: arm64: Make EL2 exception entry and exit context-synchronization events
MAINTAINERS: Add Steffen as reviewer for KVM/arm64
KVM: arm64: Remove potential UB on nvhe tracing clock update
KVM: selftests: arm64: Fix steal_time test after UAPI refactoring
KVM: arm64: Handle permission faults with guest_memfd
KVM: arm64: nv: Consider the DS bit when translating TCR_EL2
KVM: arm64: Work around C1-Pro erratum 4193714 for protected guests
...
This commit is contained in:
@@ -656,8 +656,8 @@ References
|
||||
See [white-paper]_, [api-spec]_, [amd-apm]_, [kvm-forum]_, and [snp-fw-abi]_
|
||||
for more info.
|
||||
|
||||
.. [white-paper] https://developer.amd.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
|
||||
.. [api-spec] https://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
|
||||
.. [amd-apm] https://support.amd.com/TechDocs/24593.pdf (section 15.34)
|
||||
.. [white-paper] https://docs.amd.com/v/u/en-US/memory-encryption-white-paper
|
||||
.. [api-spec] https://docs.amd.com/v/u/en-US/55766_PUB_3.24_SEV_API
|
||||
.. [amd-apm] https://docs.amd.com/v/u/en-US/24593_3.44_APM_Vol2 (section 15.34)
|
||||
.. [kvm-forum] https://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
|
||||
.. [snp-fw-abi] https://www.amd.com/system/files/TechDocs/56860.pdf
|
||||
.. [snp-fw-abi] https://www.amd.com/content/dam/amd/en/documents/developer/56860.pdf
|
||||
|
||||
@@ -14055,6 +14055,7 @@ KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
|
||||
M: Marc Zyngier <maz@kernel.org>
|
||||
M: Oliver Upton <oupton@kernel.org>
|
||||
R: Joey Gouly <joey.gouly@arm.com>
|
||||
R: Steffen Eiden <seiden@linux.ibm.com>
|
||||
R: Suzuki K Poulose <suzuki.poulose@arm.com>
|
||||
R: Zenghui Yu <yuzenghui@huawei.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
|
||||
@@ -23,6 +23,7 @@ static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
|
||||
static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
|
||||
{
|
||||
return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
|
||||
((tcr & TCR_EL2_DS) ? TCR_DS : 0) |
|
||||
((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
|
||||
tcr_el2_ps_to_tcr_el1_ips(tcr) |
|
||||
(tcr & TCR_EL2_TG0_MASK) |
|
||||
|
||||
@@ -844,7 +844,7 @@
|
||||
#define INIT_SCTLR_EL2_MMU_ON \
|
||||
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
|
||||
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
|
||||
SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
|
||||
SCTLR_ELx_ITFSB | SCTLR_ELx_EIS | SCTLR_ELx_EOS | SCTLR_EL2_RES1)
|
||||
|
||||
#define INIT_SCTLR_EL2_MMU_OFF \
|
||||
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
|
||||
*/
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cpu_pm.h>
|
||||
#include <linux/errno.h>
|
||||
@@ -2638,6 +2639,22 @@ static int init_pkvm_host_sve_state(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pkvm_check_sme_dvmsync_fw_call(void)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714))
|
||||
return 0;
|
||||
|
||||
arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
|
||||
if (res.a0) {
|
||||
kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalizes the initialization of hyp mode, once everything else is initialized
|
||||
* and the initialziation process cannot fail.
|
||||
@@ -2838,6 +2855,10 @@ static int __init init_hyp_mode(void)
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
err = pkvm_check_sme_dvmsync_fw_call();
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
err = kvm_hyp_init_protection(hyp_va_bits);
|
||||
if (err) {
|
||||
kvm_err("Failed to init hyp memory protection\n");
|
||||
|
||||
@@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
|
||||
__activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
|
||||
}
|
||||
|
||||
#define __deactivate_fgt(htcxt, vcpu, reg) \
|
||||
#define __deactivate_fgt(hctxt, vcpu, reg) \
|
||||
do { \
|
||||
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
|
||||
SYS_ ## reg); \
|
||||
|
||||
@@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
|
||||
struct clock_data *clock = &trace_clock_data;
|
||||
u64 bank = clock->cur ^ 1;
|
||||
|
||||
if (!mult || shift >= 64)
|
||||
return;
|
||||
|
||||
clock->data[bank].mult = mult;
|
||||
clock->data[bank].shift = shift;
|
||||
clock->data[bank].epoch_ns = epoch_ns;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
@@ -14,6 +15,7 @@
|
||||
|
||||
#include <hyp/fault.h>
|
||||
|
||||
#include <nvhe/arm-smccc.h>
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
@@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool;
|
||||
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
|
||||
#define current_vm (*this_cpu_ptr(&__current_vm))
|
||||
|
||||
static void pkvm_sme_dvmsync_fw_call(void)
|
||||
{
|
||||
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
|
||||
struct arm_smccc_res res;
|
||||
|
||||
/*
|
||||
* Ignore the return value. Probing for the workaround
|
||||
* availability took place in init_hyp_mode().
|
||||
*/
|
||||
hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
|
||||
}
|
||||
}
|
||||
|
||||
static void guest_lock_component(struct pkvm_hyp_vm *vm)
|
||||
{
|
||||
hyp_spin_lock(&vm->lock);
|
||||
@@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
|
||||
ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
|
||||
addr, size, &host_s2_pool,
|
||||
KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
/*
|
||||
* After stage2 maintenance has happened, but before the page
|
||||
* owner has changed.
|
||||
*/
|
||||
pkvm_sme_dvmsync_fw_call();
|
||||
__host_update_page_state(addr, size, PKVM_NOPAGE);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1369,6 +1390,22 @@ int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
|
||||
return ret && ret != -EHWPOISON ? ret : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
|
||||
* KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
|
||||
* bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
|
||||
* conservative by one for the block.
|
||||
*/
|
||||
static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
|
||||
if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
|
||||
{
|
||||
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
|
||||
@@ -1388,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __guest_check_pgtable_memcache(vcpu);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
meta = host_stage2_encode_gfn_meta(vm, gfn);
|
||||
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
|
||||
PKVM_ID_GUEST, meta));
|
||||
@@ -1453,6 +1494,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
|
||||
}
|
||||
}
|
||||
|
||||
ret = __guest_check_pgtable_memcache(vcpu);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
for_each_hyp_page(page, phys, size) {
|
||||
set_host_state(page, PKVM_PAGE_SHARED_OWNED);
|
||||
page->host_share_guest_count++;
|
||||
|
||||
@@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = {
|
||||
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(virt);
|
||||
unsigned long min_pages, seeded = 0;
|
||||
int i;
|
||||
|
||||
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
|
||||
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
|
||||
|
||||
/*
|
||||
* Mirror pkvm_refill_memcache() for the share/donate pre-checks;
|
||||
* the selftest invokes those functions directly and would
|
||||
* otherwise see an empty memcache.
|
||||
*/
|
||||
min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu);
|
||||
|
||||
for (i = 0; i < pkvm_selftest_pages(); i++) {
|
||||
if (p[i].refcount)
|
||||
continue;
|
||||
p[i].refcount = 1;
|
||||
hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
|
||||
if (seeded < min_pages) {
|
||||
push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache,
|
||||
hyp_page_to_virt(&p[i]), hyp_virt_to_phys);
|
||||
seeded++;
|
||||
} else {
|
||||
hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
|
||||
}
|
||||
}
|
||||
|
||||
selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm();
|
||||
|
||||
@@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par)
|
||||
host_ctxt = host_data_ptr(host_ctxt);
|
||||
vcpu = host_ctxt->__hyp_running_vcpu;
|
||||
|
||||
__deactivate_traps(vcpu);
|
||||
if (vcpu)
|
||||
__deactivate_traps(vcpu);
|
||||
sysreg_restore_host_state_vhe(host_ctxt);
|
||||
|
||||
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n",
|
||||
|
||||
@@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc {
|
||||
static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
|
||||
{
|
||||
bool write_fault, exec_fault;
|
||||
bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
|
||||
unsigned long mmu_seq;
|
||||
struct page *page;
|
||||
struct kvm *kvm = s2fd->vcpu->kvm;
|
||||
void *memcache;
|
||||
void *memcache = NULL;
|
||||
kvm_pfn_t pfn;
|
||||
gfn_t gfn;
|
||||
int ret;
|
||||
|
||||
memcache = get_mmu_memcache(s2fd->vcpu);
|
||||
ret = topup_mmu_memcache(s2fd->vcpu, memcache);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!perm_fault) {
|
||||
memcache = get_mmu_memcache(s2fd->vcpu);
|
||||
ret = topup_mmu_memcache(s2fd->vcpu, memcache);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (s2fd->nested)
|
||||
gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
|
||||
@@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache, flags);
|
||||
if (perm_fault) {
|
||||
/*
|
||||
* Drop the SW bits in favour of those stored in the
|
||||
* PTE, which will be preserved.
|
||||
*/
|
||||
prot &= ~KVM_NV_GUEST_MAP_SZ;
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa,
|
||||
prot, flags);
|
||||
} else {
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache, flags);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);
|
||||
|
||||
@@ -3310,8 +3310,7 @@ static void aen_host_forward(unsigned long si)
|
||||
struct zpci_gaite *gaite;
|
||||
struct kvm *kvm;
|
||||
|
||||
gaite = (struct zpci_gaite *)aift->gait +
|
||||
(si * sizeof(struct zpci_gaite));
|
||||
gaite = aift->gait + si;
|
||||
if (gaite->count == 0)
|
||||
return;
|
||||
if (gaite->aisb != 0)
|
||||
|
||||
@@ -166,7 +166,7 @@ static int kvm_zpci_set_airq(struct zpci_dev *zdev)
|
||||
fib.fmt0.noi = airq_iv_end(zdev->aibv);
|
||||
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
|
||||
fib.fmt0.aibvo = 0;
|
||||
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
|
||||
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
|
||||
fib.fmt0.aisbo = zdev->aisb & 63;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
@@ -290,8 +290,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
|
||||
phys_to_virt(fib->fmt0.aibv));
|
||||
|
||||
spin_lock_irq(&aift->gait_lock);
|
||||
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
|
||||
sizeof(struct zpci_gaite));
|
||||
gaite = aift->gait + zdev->aisb;
|
||||
|
||||
/* If assist not requested, host will get all alerts */
|
||||
if (assist)
|
||||
@@ -309,7 +308,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
|
||||
|
||||
/* Update guest FIB for re-issue */
|
||||
fib->fmt0.aisbo = zdev->aisb & 63;
|
||||
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
|
||||
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
|
||||
fib->fmt0.isc = gisc;
|
||||
|
||||
/* Save some guest fib values in the host for later use */
|
||||
@@ -357,8 +356,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
|
||||
if (zdev->kzdev->fib.fmt0.aibv == 0)
|
||||
goto out;
|
||||
spin_lock_irq(&aift->gait_lock);
|
||||
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
|
||||
sizeof(struct zpci_gaite));
|
||||
gaite = aift->gait + zdev->aisb;
|
||||
isc = gaite->gisc;
|
||||
gaite->count--;
|
||||
if (gaite->count == 0) {
|
||||
|
||||
@@ -4481,7 +4481,7 @@ static const struct opcode opcode_map_0f_38[256] = {
|
||||
X16(N), X16(N),
|
||||
/* 0x20 - 0x2f */
|
||||
X8(N),
|
||||
X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
|
||||
X2(N), GP(SrcMem | DstReg | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
|
||||
/* 0x30 - 0x7f */
|
||||
X16(N), X16(N), X16(N), X16(N), X16(N),
|
||||
/* 0x80 - 0xef */
|
||||
|
||||
@@ -2526,6 +2526,23 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
|
||||
__shadow_walk_next(iterator, *iterator->sptep);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: while normally KVM uses a "bool flush" return value to let
|
||||
* the caller batch flushes, __link_shadow_page() flushes immediately
|
||||
* before populating the parent PTE with the new shadow page. The
|
||||
* typical callers, direct_map() and FNAME(fetch)(), are not going
|
||||
* to zap more than one huge SPTE anyway.
|
||||
*
|
||||
* The only exception, where @flush can be false, is when a huge SPTE
|
||||
* is replaced with a shadow page SPTE with a fully populated page table,
|
||||
* which can happen from shadow_mmu_split_huge_page(). In this case,
|
||||
* no memory is unmapped across the change to the page tables and no
|
||||
* immediate flush is needed for correctness.
|
||||
*
|
||||
* Even in that case, calls to kvm_mmu_commit_zap_page() are not
|
||||
* batched. Doing so would require adding an invalid_list argument
|
||||
* all the way down to __walk_slot_rmaps().
|
||||
*/
|
||||
static void __link_shadow_page(struct kvm *kvm,
|
||||
struct kvm_mmu_memory_cache *cache, u64 *sptep,
|
||||
struct kvm_mmu_page *sp, bool flush)
|
||||
@@ -2541,8 +2558,10 @@ static void __link_shadow_page(struct kvm *kvm,
|
||||
parent_sp = sptep_to_sp(sptep);
|
||||
WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K);
|
||||
|
||||
mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list);
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true);
|
||||
if (mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list))
|
||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
else if (flush)
|
||||
kvm_flush_remote_tlbs_sptep(kvm, sptep);
|
||||
}
|
||||
|
||||
spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));
|
||||
|
||||
@@ -160,6 +160,16 @@ void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm)
|
||||
if (!intercept_smi)
|
||||
vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI);
|
||||
|
||||
/*
|
||||
* Intercept PAUSE if and only if L1 wants to. KVM intercepts PAUSE so
|
||||
* that a vCPU that may be spinning waiting for a lock can be scheduled
|
||||
* out in favor of the vCPU that holds said lock. KVM doesn't support
|
||||
* yielding across L2 vCPUs, as KVM has limited visilibity into which
|
||||
* L2 vCPUs are in the same L2 VM, i.e. may be contending for locks.
|
||||
*/
|
||||
if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE))
|
||||
vmcb_clr_intercept(&vmcb02->control, INTERCEPT_PAUSE);
|
||||
|
||||
if (nested_vmcb_needs_vls_intercept(svm)) {
|
||||
/*
|
||||
* If the virtual VMLOAD/VMSAVE is not enabled for the L2,
|
||||
@@ -819,7 +829,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
|
||||
struct vmcb *vmcb01 = svm->vmcb01.ptr;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
u32 pause_count12, pause_thresh12;
|
||||
|
||||
nested_svm_transition_tlb_flush(vcpu);
|
||||
|
||||
@@ -947,31 +956,13 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
|
||||
vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
|
||||
pause_count12 = vmcb12_ctrl->pause_filter_count;
|
||||
vmcb02->control.pause_filter_count = vmcb12_ctrl->pause_filter_count;
|
||||
else
|
||||
pause_count12 = 0;
|
||||
vmcb02->control.pause_filter_count = 0;
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
|
||||
pause_thresh12 = vmcb12_ctrl->pause_filter_thresh;
|
||||
vmcb02->control.pause_filter_thresh = vmcb12_ctrl->pause_filter_thresh;
|
||||
else
|
||||
pause_thresh12 = 0;
|
||||
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
|
||||
/* use guest values since host doesn't intercept PAUSE */
|
||||
vmcb02->control.pause_filter_count = pause_count12;
|
||||
vmcb02->control.pause_filter_thresh = pause_thresh12;
|
||||
|
||||
} else {
|
||||
/* start from host values otherwise */
|
||||
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
|
||||
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
|
||||
|
||||
/* ... but ensure filtering is disabled if so requested. */
|
||||
if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) {
|
||||
if (!pause_count12)
|
||||
vmcb02->control.pause_filter_count = 0;
|
||||
if (!pause_thresh12)
|
||||
vmcb02->control.pause_filter_thresh = 0;
|
||||
}
|
||||
}
|
||||
vmcb02->control.pause_filter_thresh = 0;
|
||||
|
||||
/*
|
||||
* Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
|
||||
@@ -1298,12 +1289,6 @@ void nested_svm_vmexit(struct vcpu_svm *svm)
|
||||
/* in case we halted in L2 */
|
||||
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
|
||||
|
||||
if (!kvm_pause_in_guest(vcpu->kvm)) {
|
||||
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
|
||||
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate last_bus_lock_rip unless KVM is still waiting for the
|
||||
* guest to make forward progress before re-enabling bus lock detection.
|
||||
|
||||
@@ -913,7 +913,15 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
int old = control->pause_filter_count;
|
||||
|
||||
if (kvm_pause_in_guest(vcpu->kvm))
|
||||
/* Adjusting pause_filter_count makes no sense if PLE is disabled. */
|
||||
WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
|
||||
|
||||
/*
|
||||
* While running L2, KVM should intercept PAUSE if and only if L1 wants
|
||||
* to intercept PAUSE, and L1's intercept should take priority, i.e.
|
||||
* KVM should never handle a PAUSE intercept from L2.
|
||||
*/
|
||||
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
|
||||
return;
|
||||
|
||||
control->pause_filter_count = __grow_ple_window(old,
|
||||
@@ -934,7 +942,10 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
int old = control->pause_filter_count;
|
||||
|
||||
if (kvm_pause_in_guest(vcpu->kvm))
|
||||
/* Adjusting pause_filter_count makes no sense if PLE is disabled. */
|
||||
WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
|
||||
|
||||
if (is_guest_mode(vcpu))
|
||||
return;
|
||||
|
||||
control->pause_filter_count =
|
||||
|
||||
@@ -154,7 +154,7 @@ TRACE_EVENT(kvm_xen_hypercall,
|
||||
__entry->a2 = a2;
|
||||
__entry->a3 = a3;
|
||||
__entry->a4 = a4;
|
||||
__entry->a4 = a5;
|
||||
__entry->a5 = a5;
|
||||
),
|
||||
|
||||
TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
|
||||
|
||||
@@ -14,6 +14,7 @@ extern bool __read_mostly flexpriority_enabled;
|
||||
extern bool __read_mostly enable_ept;
|
||||
extern bool __read_mostly enable_unrestricted_guest;
|
||||
extern bool __read_mostly enable_ept_ad_bits;
|
||||
extern bool __read_mostly enable_cet;
|
||||
extern bool __read_mostly enable_pml;
|
||||
extern int __read_mostly pt_mode;
|
||||
|
||||
|
||||
@@ -108,6 +108,9 @@ module_param_named(unrestricted_guest,
|
||||
bool __read_mostly enable_ept_ad_bits = 1;
|
||||
module_param_named(eptad, enable_ept_ad_bits, bool, 0444);
|
||||
|
||||
bool __read_mostly enable_cet = 1;
|
||||
module_param_named(cet, enable_cet, bool, 0444);
|
||||
|
||||
static bool __read_mostly emulate_invalid_guest_state = true;
|
||||
module_param(emulate_invalid_guest_state, bool, 0444);
|
||||
|
||||
@@ -4476,7 +4479,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
|
||||
* SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter
|
||||
* 3 and 4 for details.
|
||||
*/
|
||||
if (cpu_has_load_cet_ctrl()) {
|
||||
if (enable_cet) {
|
||||
vmcs_writel(HOST_S_CET, kvm_host.s_cet);
|
||||
vmcs_writel(HOST_SSP, 0);
|
||||
vmcs_writel(HOST_INTR_SSP_TABLE, 0);
|
||||
@@ -4532,6 +4535,10 @@ static u32 vmx_get_initial_vmentry_ctrl(void)
|
||||
if (vmx_pt_mode_is_system())
|
||||
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
|
||||
VM_ENTRY_LOAD_IA32_RTIT_CTL);
|
||||
|
||||
if (!enable_cet)
|
||||
vmentry_ctrl &= ~VM_ENTRY_LOAD_CET_STATE;
|
||||
|
||||
/*
|
||||
* IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
|
||||
*/
|
||||
@@ -4546,6 +4553,9 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
|
||||
{
|
||||
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
|
||||
|
||||
if (!enable_cet)
|
||||
vmexit_ctrl &= ~VM_EXIT_LOAD_CET_STATE;
|
||||
|
||||
/*
|
||||
* Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
|
||||
* nested virtualization and thus allowed to be set in vmcs12.
|
||||
@@ -8155,7 +8165,7 @@ static __init void vmx_set_cpu_caps(void)
|
||||
* VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
|
||||
* fails, so disable CET in this case too.
|
||||
*/
|
||||
if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
|
||||
if (!enable_cet || !enable_unrestricted_guest ||
|
||||
!cpu_has_vmx_basic_no_hw_errcode_cc()) {
|
||||
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
|
||||
kvm_cpu_cap_clear(X86_FEATURE_IBT);
|
||||
@@ -8630,6 +8640,9 @@ __init int vmx_hardware_setup(void)
|
||||
!cpu_has_vmx_invept_global())
|
||||
enable_ept = 0;
|
||||
|
||||
if (!cpu_has_load_cet_ctrl())
|
||||
enable_cet = 0;
|
||||
|
||||
/* NX support is required for shadow paging. */
|
||||
if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
|
||||
pr_err_ratelimited("NX (Execute Disable) not supported\n");
|
||||
|
||||
@@ -105,6 +105,12 @@
|
||||
ARM_SMCCC_SMC_32, \
|
||||
0, 0x3fff)
|
||||
|
||||
/* C1-Pro erratum 4193714: SME DVMSync early acknowledgement */
|
||||
#define ARM_SMCCC_CPU_WORKAROUND_4193714 \
|
||||
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
|
||||
ARM_SMCCC_SMC_32, \
|
||||
ARM_SMCCC_OWNER_CPU, 0x10)
|
||||
|
||||
#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \
|
||||
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
|
||||
ARM_SMCCC_SMC_32, \
|
||||
|
||||
@@ -510,7 +510,12 @@ static void test_guest_memfd_guest(void)
|
||||
"Default VM type should support INIT_SHARED, supported flags = 0x%x",
|
||||
vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
|
||||
|
||||
size = vm->page_size;
|
||||
/*
|
||||
* Use the max of the host or guest page size for all operations, as
|
||||
* KVM requires guest_memfd files and memslots to be sized to multiples
|
||||
* of the host page size.
|
||||
*/
|
||||
size = max_t(size_t, vm->page_size, page_size);
|
||||
fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
|
||||
GUEST_MEMFD_FLAG_INIT_SHARED);
|
||||
vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
|
||||
@@ -519,7 +524,7 @@ static void test_guest_memfd_guest(void)
|
||||
memset(mem, 0xaa, size);
|
||||
kvm_munmap(mem, size);
|
||||
|
||||
virt_pg_map(vm, gpa, gpa);
|
||||
virt_map(vm, gpa, gpa, size / vm->page_size);
|
||||
vcpu_args_set(vcpu, 2, gpa, size);
|
||||
vcpu_run(vcpu);
|
||||
|
||||
|
||||
@@ -220,6 +220,8 @@ static void check_steal_time_uapi(void)
|
||||
};
|
||||
|
||||
vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0);
|
||||
virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1);
|
||||
|
||||
st_ipa = (ulong)ST_GPA_BASE | 1;
|
||||
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
|
||||
|
||||
@@ -63,7 +63,8 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
|
||||
|
||||
memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
|
||||
|
||||
if (!memslot || (offset + __fls(mask)) >= memslot->npages)
|
||||
if (!memslot || offset >= memslot->npages ||
|
||||
offset + __fls(mask) >= memslot->npages)
|
||||
return;
|
||||
|
||||
KVM_MMU_LOCK(kvm);
|
||||
|
||||
Reference in New Issue
Block a user