diff --git a/MAINTAINERS b/MAINTAINERS index 882214b0e7db..3fe4ea59199c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14052,6 +14052,7 @@ KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Marc Zyngier M: Oliver Upton R: Joey Gouly +R: Steffen Eiden R: Suzuki K Poulose R: Zenghui Yu L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 091544e6af44..dc2957662ff2 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -23,6 +23,7 @@ static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2) static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr) { return TCR_EPD1_MASK | /* disable TTBR1_EL1 */ + ((tcr & TCR_EL2_DS) ? TCR_DS : 0) | ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) | tcr_el2_ps_to_tcr_el1_ips(tcr) | (tcr & TCR_EL2_TG0_MASK) | diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 736561480f36..7aa08d59d494 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -844,7 +844,7 @@ #define INIT_SCTLR_EL2_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ - SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) + SCTLR_ELx_ITFSB | SCTLR_ELx_EIS | SCTLR_ELx_EOS | SCTLR_EL2_RES1) #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 8bb2c7422cc8..34c9950884d5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -4,6 +4,7 @@ * Author: Christoffer Dall */ +#include #include #include #include @@ -2638,6 +2639,22 @@ static int init_pkvm_host_sve_state(void) return 0; } +static int pkvm_check_sme_dvmsync_fw_call(void) +{ + struct arm_smccc_res res; + + if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714)) + return 0; + + arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res); + if (res.a0) { + kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n"); + return -ENODEV; + } + + return 0; +} + /* * Finalizes the initialization of hyp mode, once everything else is initialized * and the initialziation process cannot fail. @@ -2838,6 +2855,10 @@ static int __init init_hyp_mode(void) if (err) goto out_err; + err = pkvm_check_sme_dvmsync_fw_call(); + if (err) + goto out_err; + err = kvm_hyp_init_protection(hyp_va_bits); if (err) { kvm_err("Failed to init hyp memory protection\n"); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 98b2976837b1..bf0eb5e43427 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu) __activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2); } -#define __deactivate_fgt(htcxt, vcpu, reg) \ +#define __deactivate_fgt(hctxt, vcpu, reg) \ do { \ write_sysreg_s(ctxt_sys_reg(hctxt, reg), \ SYS_ ## reg); \ diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c index 32fc4313fe43..a7fc61976fd0 100644 --- a/arch/arm64/kvm/hyp/nvhe/clock.c +++ b/arch/arm64/kvm/hyp/nvhe/clock.c @@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) struct clock_data *clock = &trace_clock_data; u64 bank = clock->cur ^ 1; + if (!mult || shift >= 64) + return; + clock->data[bank].mult = mult; clock->data[bank].shift = shift; clock->data[bank].epoch_ns = epoch_ns; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 28a471d1927c..25f04629014e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -5,6 +5,7 @@ */ #include + #include #include #include @@ -14,6 +15,7 @@ #include +#include #include #include #include @@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool; static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); #define current_vm (*this_cpu_ptr(&__current_vm)) +static void pkvm_sme_dvmsync_fw_call(void) +{ + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) { + struct arm_smccc_res res; + + /* + * Ignore the return value. Probing for the workaround + * availability took place in init_hyp_mode(). + */ + hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res); + } +} + static void guest_lock_component(struct pkvm_hyp_vm *vm) { hyp_spin_lock(&vm->lock); @@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size, ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt, addr, size, &host_s2_pool, KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation); - if (!ret) + if (!ret) { + /* + * After stage2 maintenance has happened, but before the page + * owner has changed. + */ + pkvm_sme_dvmsync_fw_call(); __host_update_page_state(addr, size, PKVM_NOPAGE); + } return ret; } @@ -1369,6 +1390,22 @@ int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm) return ret && ret != -EHWPOISON ? ret : 0; } +/* + * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one + * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages() + * bounds the worst-case allocation: exact for the PAGE_SIZE leaf, + * conservative by one for the block. + */ +static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu) +{ + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + + if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu)) + return -ENOMEM; + + return 0; +} + int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); @@ -1388,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu) if (ret) goto unlock; + ret = __guest_check_pgtable_memcache(vcpu); + if (ret) + goto unlock; + meta = host_stage2_encode_gfn_meta(vm, gfn); WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE, PKVM_ID_GUEST, meta)); @@ -1453,6 +1494,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu } } + ret = __guest_check_pgtable_memcache(vcpu); + if (ret) + goto unlock; + for_each_hyp_page(page, phys, size) { set_host_state(page, PKVM_PAGE_SHARED_OWNED); page->host_share_guest_count++; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index e7496eb85628..eb1c10120f9f 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = { struct pkvm_hyp_vcpu *init_selftest_vm(void *virt) { struct hyp_page *p = hyp_virt_to_page(virt); + unsigned long min_pages, seeded = 0; int i; selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt)); + /* + * Mirror pkvm_refill_memcache() for the share/donate pre-checks; + * the selftest invokes those functions directly and would + * otherwise see an empty memcache. + */ + min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu); + for (i = 0; i < pkvm_selftest_pages(); i++) { if (p[i].refcount) continue; p[i].refcount = 1; - hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); + if (seeded < min_pages) { + push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache, + hyp_page_to_virt(&p[i]), hyp_virt_to_phys); + seeded++; + } else { + hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); + } } selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm(); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 9db3f11a4754..1e8995add14f 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par) host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; - __deactivate_traps(vcpu); + if (vcpu) + __deactivate_traps(vcpu); sysreg_restore_host_state_vhe(host_ctxt); panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n", diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d089c107d9b7..4da9281312eb 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc { static int gmem_abort(const struct kvm_s2_fault_desc *s2fd) { bool write_fault, exec_fault; + bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu); enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt; unsigned long mmu_seq; struct page *page; struct kvm *kvm = s2fd->vcpu->kvm; - void *memcache; + void *memcache = NULL; kvm_pfn_t pfn; gfn_t gfn; int ret; - memcache = get_mmu_memcache(s2fd->vcpu); - ret = topup_mmu_memcache(s2fd->vcpu, memcache); - if (ret) - return ret; + if (!perm_fault) { + memcache = get_mmu_memcache(s2fd->vcpu); + ret = topup_mmu_memcache(s2fd->vcpu, memcache); + if (ret) + return ret; + } if (s2fd->nested) gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT; @@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd) goto out_unlock; } - ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE, - __pfn_to_phys(pfn), prot, - memcache, flags); + if (perm_fault) { + /* + * Drop the SW bits in favour of those stored in the + * PTE, which will be preserved. + */ + prot &= ~KVM_NV_GUEST_MAP_SZ; + ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa, + prot, flags); + } else { + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE, + __pfn_to_phys(pfn), prot, + memcache, flags); + } out_unlock: kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W); diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 50b47eba7d01..e7195750d21b 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -105,6 +105,12 @@ ARM_SMCCC_SMC_32, \ 0, 0x3fff) +/* C1-Pro erratum 4193714: SME DVMSync early acknowledgement */ +#define ARM_SMCCC_CPU_WORKAROUND_4193714 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_CPU, 0x10) + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_32, \ diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 7df2bc8eec02..76fcdd1fd3cb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -220,6 +220,8 @@ static void check_steal_time_uapi(void) }; vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0); + virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1); st_ipa = (ulong)ST_GPA_BASE | 1; ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);