From 493b01de726d02e835c510d01df6880fa28d41b7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Apr 2025 13:26:10 +0100 Subject: [PATCH 1/3] KVM: arm64: Fix PAR_EL1.{PTW,S} reporting on AT S1E* When an AT S1E* operation fails, we need to report whether the translation failed at S2, and whether this was during a S1 PTW. But these two bits are not independent. PAR_EL1.PTW can only be set of PAR_EL1.S is also set, and PAR_EL1.S can only be set on its own when the full S1 PTW has succeeded, but that the access itself is reporting a fault at S2. As a result, it makes no sense to carry both ptw and s2 as parameters to fail_s1_walk(), and they should be unified. This fixes a number of cases where we were reporting PTW=1 *and* S=0, which makes no sense. Link: https://lore.kernel.org/r/20250422122612.2675672-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index f74a66ce3064..3a4568e2de91 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -60,11 +60,11 @@ struct s1_walk_result { bool failed; }; -static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) +static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw) { wr->fst = fst; - wr->ptw = ptw; - wr->s2 = s2; + wr->ptw = s1ptw; + wr->s2 = s1ptw; wr->failed = true; } @@ -345,11 +345,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, return 0; addrsz: /* Address Size Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); + fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); return -EFAULT; transfault_l0: /* Translation Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); return -EFAULT; } @@ -380,13 +380,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (ret) { fail_s1_walk(wr, (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, - true, true); + true); return ret; } if (!kvm_s2_trans_readable(&s2_trans)) { fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), - true, true); + true); return -EPERM; } @@ -396,8 +396,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); if (ret) { - fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), - true, false); + fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); return ret; } @@ -468,10 +467,10 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; addrsz: - fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); + fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false); return -EINVAL; transfault: - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false); return -ENOENT; } @@ -1198,7 +1197,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) } if (perm_fail) - fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); + fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); compute_par: return compute_par_s1(vcpu, &wr, wi.regime); From ed648ab8043aab3135490d6c01f1c889c4bac62c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Apr 2025 13:26:11 +0100 Subject: [PATCH 2/3] KVM: arm64: Teach address translation about access faults It appears that our S1 PTW is completely oblivious of access faults. Teach the S1 translation code about it. Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20250422122612.2675672-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 3a4568e2de91..c40583edebc4 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -456,6 +456,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (check_output_size(desc & GENMASK(47, va_bottom), wi)) goto addrsz; + if (!(desc & PTE_AF)) { + fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); + return -EACCES; + } + va_bottom += contiguous_bit_shift(desc, wi, level); wr->failed = false; @@ -1209,7 +1214,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) * If the translation is unsuccessful, the value may only contain * PAR_EL1.F, and cannot be taken at face value. It isn't an * indication of the translation having failed, only that the fast - * path did not succeed, *unless* it indicates a S1 permission fault. + * path did not succeed, *unless* it indicates a S1 permission or + * access fault. */ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { @@ -1312,19 +1318,29 @@ static bool par_check_s1_perm_fault(u64 par) !(par & SYS_PAR_EL1_S)); } +static bool par_check_s1_access_fault(u64 par) +{ + u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); + + return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS && + !(par & SYS_PAR_EL1_S)); +} + void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); /* - * If PAR_EL1 reports that AT failed on a S1 permission fault, we - * know for sure that the PTW was able to walk the S1 tables and - * there's nothing else to do. + * If PAR_EL1 reports that AT failed on a S1 permission or access + * fault, we know for sure that the PTW was able to walk the S1 + * tables and there's nothing else to do. * * If AT failed for any other reason, then we must walk the guest S1 * to emulate the instruction. */ - if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) + if ((par & SYS_PAR_EL1_F) && + !par_check_s1_perm_fault(par) && + !par_check_s1_access_fault(par)) par = handle_at_slow(vcpu, op, vaddr); vcpu_write_sys_reg(vcpu, par, PAR_EL1); From 3e4d597220587593dba505f5a7e932309155c54d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Apr 2025 13:26:12 +0100 Subject: [PATCH 3/3] KVM: arm64: Don't feed uninitialised data to HCR_EL2 When the guest executes an AT S1E{0,1} from EL2, and that its HCR_EL2.{E2H,TGE}=={1,1}, then this is a pure S1 translation that doesn't involve a guest-supplied S2, and the full S1 context is already in place. This allows us to take a shortcut and avoid save/restoring a bunch of registers. However, we set HCR_EL2 to a value suitable for the use of AT in guest context. And we do so by using the value that we saved. Or not. In the case described above, we restore whatever junk was on the stack, and carry on with it until the next entry. Needless to say, this is completely broken. But this also triggers the realisation that saving HCR_EL2 is a bit pointless. We are always in host context at the point where reach this code, and what we program to enter the guest is a known value (vcpu->arch.hcr_el2). Drop the pointless save/restore, and wrap the AT operations with writes that switch between guest and host values for HCR_EL2. Reported-by: D Scott Phillips Link: https://lore.kernel.org/r/20250422122612.2675672-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index c40583edebc4..7a5267f43b51 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -492,7 +492,6 @@ struct mmu_config { u64 sctlr; u64 vttbr; u64 vtcr; - u64 hcr; }; static void __mmu_config_save(struct mmu_config *config) @@ -515,13 +514,10 @@ static void __mmu_config_save(struct mmu_config *config) config->sctlr = read_sysreg_el1(SYS_SCTLR); config->vttbr = read_sysreg(vttbr_el2); config->vtcr = read_sysreg(vtcr_el2); - config->hcr = read_sysreg(hcr_el2); } static void __mmu_config_restore(struct mmu_config *config) { - write_sysreg(config->hcr, hcr_el2); - /* * ARM errata 1165522 and 1530923 require TGE to be 1 before * we update the guest state. @@ -1271,8 +1267,8 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) __load_stage2(mmu, mmu->arch); skip_mmu_switch: - /* Clear TGE, enable S2 translation, we're rolling */ - write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); + /* Temporarily switch back to guest context */ + write_sysreg(vcpu->arch.hcr_el2, hcr_el2); isb(); switch (op) { @@ -1304,6 +1300,8 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) if (!fail) par = read_sysreg_par(); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) __mmu_config_restore(&config);