diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 12adab97e7f2..c762919a2072 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -731,6 +731,8 @@ struct vcpu_reset_state { bool reset; }; +struct vncr_tlb; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; @@ -825,6 +827,9 @@ struct kvm_vcpu_arch { /* Per-vcpu CCSIDR override or NULL */ u32 *ccsidr; + + /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */ + struct vncr_tlb *vncr_tlb; }; /* diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 9d56fd946e5e..98b3d6b58966 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -333,4 +333,7 @@ struct s1_walk_result { int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va); +/* VNCR management */ +int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95fee..528743587360 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -843,6 +843,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) return ret; if (vcpu_has_nv(vcpu)) { + ret = kvm_vcpu_allocate_vncr_tlb(vcpu); + if (ret) + return ret; + ret = kvm_vgic_vcpu_nv_init(vcpu); if (ret) return ret; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 0513f1367219..806e9cf6049a 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -16,6 +16,24 @@ #include "sys_regs.h" +struct vncr_tlb { + /* The guest's VNCR_EL2 */ + u64 gva; + struct s1_walk_info wi; + struct s1_walk_result wr; + + u64 hpa; + + /* -1 when not mapped on a CPU */ + int cpu; + + /* + * true if the TLB is valid. Can only be changed with the + * mmu_lock held. + */ + bool valid; +}; + /* * Ratio of live shadow S2 MMU per vcpu. This is a trade-off between * memory usage and potential number of different sets of S2 PTs in @@ -811,6 +829,60 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) kvm_uninit_stage2_mmu(kvm); } +/* + * Dealing with VNCR_EL2 exposed by the *guest* is a complicated matter: + * + * - We introduce an internal representation of a vcpu-private TLB, + * representing the mapping between the guest VA contained in VNCR_EL2, + * the IPA the guest's EL2 PTs point to, and the actual PA this lives at. + * + * - On translation fault from a nested VNCR access, we create such a TLB. + * If there is no mapping to describe, the guest inherits the fault. + * Crucially, no actual mapping is done at this stage. + * + * - On vcpu_load() in a non-HYP context with HCR_EL2.NV==1, if the above + * TLB exists, we map it in the fixmap for this CPU, and run with it. We + * have to respect the permissions dictated by the guest, but not the + * memory type (FWB is a must). + * + * - Note that we usually don't do a vcpu_load() on the back of a fault + * (unless we are preempted), so the resolution of a translation fault + * must go via a request that will map the VNCR page in the fixmap. + * vcpu_load() might as well use the same mechanism. + * + * - On vcpu_put() in a non-HYP context with HCR_EL2.NV==1, if the TLB was + * mapped, we unmap it. Yes it is that simple. The TLB still exists + * though, and may be reused at a later load. + * + * - On permission fault, we simply forward the fault to the guest's EL2. + * Get out of my way. + * + * - On any TLBI for the EL2&0 translation regime, we must find any TLB that + * intersects with the TLBI request, invalidate it, and unmap the page + * from the fixmap. Because we need to look at all the vcpu-private TLBs, + * this requires some wide-ranging locking to ensure that nothing races + * against it. This may require some refcounting to avoid the search when + * no such TLB is present. + * + * - On MMU notifiers, we must invalidate our TLB in a similar way, but + * looking at the IPA instead. The funny part is that there may not be a + * stage-2 mapping for this page if L1 hasn't accessed it using LD/ST + * instructions. + */ + +int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu) +{ + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)) + return 0; + + vcpu->arch.vncr_tlb = kzalloc(sizeof(*vcpu->arch.vncr_tlb), + GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.vncr_tlb) + return -ENOMEM; + + return 0; +} + /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 965e1429b9f6..959532422d3a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -159,6 +159,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); kfree(sve_state); free_page((unsigned long)vcpu->arch.ctxt.vncr_array); + kfree(vcpu->arch.vncr_tlb); kfree(vcpu->arch.ccsidr); }