From d41571c7097a21a5e188b401a3976b563be3fbe4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:09 +0100 Subject: [PATCH 01/36] arm64: Drop SKL0/SKL1 from TCR2_EL2 Despite what the documentation says, TCR2_EL2.{SKL0,SKL1} do not exist, and the corresponding information is in the respective TTBRx_EL2. This is a leftover from a development version of the architecture. This change makes TCR2_EL2 similar to TCR2_EL1 in that respect. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/tools/sysreg | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8d637ac4b7c6..ee3adec6a7c8 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2819,8 +2819,7 @@ Field 13 AMEC1 Field 12 AMEC0 Field 11 HAFT Field 10 PTTWI -Field 9:8 SKL1 -Field 7:6 SKL0 +Res0 9:6 Field 5 D128 Field 4 AIE Field 3 POE From 5792349d0cce03fa37243a3bbcca78d2338d1f53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:10 +0100 Subject: [PATCH 02/36] arm64: Remove VNCR definition for PIRE0_EL2 As of the ARM ARM Known Issues document 102105_K.a_04_en, D22677 fixes a problem with the PIRE0_EL2 register, resulting in its removal from the VNCR page (it had no purpose being there the first place). Follow the architecture update by removing this offset. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/vncr_mapping.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h index 06f8ec0906a6..4f9bbd4d6c26 100644 --- a/arch/arm64/include/asm/vncr_mapping.h +++ b/arch/arm64/include/asm/vncr_mapping.h @@ -50,7 +50,6 @@ #define VNCR_VBAR_EL1 0x250 #define VNCR_TCR2_EL1 0x270 #define VNCR_PIRE0_EL1 0x290 -#define VNCR_PIRE0_EL2 0x298 #define VNCR_PIR_EL1 0x2A0 #define VNCR_POR_EL1 0x2A8 #define VNCR_ICH_LR0_EL2 0x400 From 4ecda4c67961234e634295334fe69aea574c8e61 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:11 +0100 Subject: [PATCH 03/36] arm64: Add encoding for PIRE0_EL2 PIRE0_EL2 is the equivalent of PIRE0_EL1 for the EL2&0 translation regime, and it is sorely missing from the sysreg file. Add the sucker. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241023145345.1613824-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/tools/sysreg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index ee3adec6a7c8..3c812fd28eca 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2882,6 +2882,10 @@ Sysreg PIRE0_EL12 3 5 10 2 2 Fields PIRx_ELx EndSysreg +Sysreg PIRE0_EL2 3 4 10 2 2 +Fields PIRx_ELx +EndSysreg + Sysreg PIR_EL1 3 0 10 2 3 Fields PIRx_ELx EndSysreg From a5c870d0939b7a878edacb70831b8b32cbfed593 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:12 +0100 Subject: [PATCH 04/36] KVM: arm64: Drop useless struct s2_mmu in __kvm_at_s1e2() __kvm_at_s1e2() contains the definition of an s2_mmu for the current context, but doesn't make any use of it. Drop it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-5-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 39f0e87a340e..f04677127fbc 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -992,12 +992,9 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) * switching context behind everybody's back, disable interrupts... */ scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { - struct kvm_s2_mmu *mmu; u64 val, hcr; bool fail; - mmu = &vcpu->kvm->arch.mmu; - val = hcr = read_sysreg(hcr_el2); val &= ~HCR_TGE; val |= HCR_VM; From dfeb91686992f5a973d09b66ddedf458de1acf08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:13 +0100 Subject: [PATCH 05/36] KVM: arm64: nv: Add missing EL2->EL1 mappings in get_el2_to_el1_mapping() As KVM has grown a bunch of new system register for NV, it appears that we are missing them in the get_el2_to_el1_mapping() list. Most of them are not crucial as they don't tend to be accessed via vcpu_read_sys_reg() and vcpu_write_sys_reg(). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-6-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ff8c4e1b847e..ae0ecd8ae671 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -110,6 +110,14 @@ static bool get_el2_to_el1_mapping(unsigned int reg, PURE_EL2_SYSREG( RVBAR_EL2 ); PURE_EL2_SYSREG( TPIDR_EL2 ); PURE_EL2_SYSREG( HPFAR_EL2 ); + PURE_EL2_SYSREG( HCRX_EL2 ); + PURE_EL2_SYSREG( HFGRTR_EL2 ); + PURE_EL2_SYSREG( HFGWTR_EL2 ); + PURE_EL2_SYSREG( HFGITR_EL2 ); + PURE_EL2_SYSREG( HDFGRTR_EL2 ); + PURE_EL2_SYSREG( HDFGWTR_EL2 ); + PURE_EL2_SYSREG( HAFGRTR_EL2 ); + PURE_EL2_SYSREG( CNTVOFF_EL2 ); PURE_EL2_SYSREG( CNTHCTL_EL2 ); MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1, translate_sctlr_el2_to_sctlr_el1 ); @@ -130,6 +138,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); default: return false; } From 164b5e20cdf6038f1b38867d2f6252ec6f10c356 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:14 +0100 Subject: [PATCH 06/36] KVM: arm64: nv: Handle CNTHCTL_EL2 specially Accessing CNTHCTL_EL2 is fraught with danger if running with HCR_EL2.E2H=1: half of the bits are held in CNTKCTL_EL1, and thus can be changed behind our back, while the rest lives in the CNTHCTL_EL2 shadow copy that is memory-based. Yes, this is a lot of fun! Make sure that we merge the two on read access, while we can write to CNTKCTL_EL1 in a more straightforward manner. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-7-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 28 ++++++++++++++++++++++++++++ include/kvm/arm_arch_timer.h | 3 +++ 2 files changed, 31 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ae0ecd8ae671..9fb2847f5abc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -157,6 +157,21 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) if (!is_hyp_ctxt(vcpu)) goto memory_read; + /* + * CNTHCTL_EL2 requires some special treatment to + * account for the bits that can be set via CNTKCTL_EL1. + */ + switch (reg) { + case CNTHCTL_EL2: + if (vcpu_el2_e2h_is_set(vcpu)) { + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; + return val; + } + break; + } + /* * If this register does not have an EL1 counterpart, * then read the stored EL2 version. @@ -207,6 +222,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) */ __vcpu_sys_reg(vcpu, reg) = val; + switch (reg) { + case CNTHCTL_EL2: + /* + * If E2H=0, CNHTCTL_EL2 is a pure shadow register. + * Otherwise, some of the bits are backed by + * CNTKCTL_EL1, while the rest is kept in memory. + * Yes, this is fun stuff. + */ + if (vcpu_el2_e2h_is_set(vcpu)) + write_sysreg_el1(val, SYS_CNTKCTL); + return; + } + /* No EL1 counterpart? We're done here.? */ if (reg == el1r) return; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c819c5d16613..fd650a8789b9 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -147,6 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt); void kvm_timer_cpu_up(void); void kvm_timer_cpu_down(void); +/* CNTKCTL_EL1 valid bits as of DDI0487J.a */ +#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0)) + static inline bool has_cntpoff(void) { return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); From b9527b38c66730061c245e353dab42ef7dda33c6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:15 +0100 Subject: [PATCH 07/36] KVM: arm64: nv: Save/Restore vEL2 sysregs Whenever we need to restore the guest's system registers to the CPU, we now need to take care of the EL2 system registers as well. Most of them are accessed via traps only, but some have an immediate effect and also a guest running in VHE mode would expect them to be accessible via their EL1 encoding, which we do not trap. For vEL2 we write the virtual EL2 registers with an identical format directly into their EL1 counterpart, and translate the few registers that have a different format for the same effect on the execution when running a non-VHE guest guest hypervisor. Based on an initial patch from Andre Przywara, rewritten many times since. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-8-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 5 +- arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 2 +- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 136 ++++++++++++++++++++- 3 files changed, 138 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 1579a3c08a36..d67628d01bf5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); } -static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, + u64 mpidr) { - write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(mpidr, vmpidr_el2); if (has_vhe() || !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 29305022bc04..dba101565de3 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) { - __sysreg_restore_el1_state(ctxt); + __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); __sysreg_restore_common_state(ctxt); __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index e12bd7d6d2dc..922aac39b021 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -15,6 +15,107 @@ #include #include +static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) +{ + /* These registers are common with EL1 */ + __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); + __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); + + __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); + __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); + __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); + __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); + __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); + __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); + __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); + __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); + + /* + * In VHE mode those registers are compatible between EL1 and EL2, + * and the guest uses the _EL1 versions on the CPU naturally. + * So we save them into their _EL2 versions here. + * For nVHE mode we trap accesses to those registers, so our + * _EL2 copy in sys_regs[] is always up-to-date and we don't need + * to save anything here. + */ + if (vcpu_el2_e2h_is_set(vcpu)) { + u64 val; + + /* + * We don't save CPTR_EL2, as accesses to CPACR_EL1 + * are always trapped, ensuring that the in-memory + * copy is always up-to-date. A small blessing... + */ + __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); + __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); + __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); + __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + + /* + * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where + * the interesting CNTHCTL_EL2 bits live. So preserve these + * bits when reading back the guest-visible value. + */ + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; + __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; + } + + __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); + __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); + __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); +} + +static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* These registers are common with EL1 */ + write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1); + write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1); + + write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2); + write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR); + + if (vcpu_el2_e2h_is_set(vcpu)) { + /* + * In VHE mode those registers are compatible between + * EL1 and EL2. + */ + write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0); + write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1); + write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL); + } else { + /* + * CNTHCTL_EL2 only affects EL1 when running nVHE, so + * no need to restore it. + */ + val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2)); + write_sysreg_el1(val, SYS_SCTLR); + val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2)); + write_sysreg_el1(val, SYS_CPACR); + val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2)); + write_sysreg_el1(val, SYS_TTBR0); + val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2)); + write_sysreg_el1(val, SYS_TCR); + } + + write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0); + write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2), SYS_AFSR1); + write_sysreg_el1(__vcpu_sys_reg(vcpu, FAR_EL2), SYS_FAR); + write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1); + write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR); +} + /* * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and * pstate, which are handled as part of the el2 return state) on every @@ -66,6 +167,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; + u64 mpidr; host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); @@ -89,7 +191,29 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) */ __sysreg32_restore_state(vcpu); __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); + + if (unlikely(__is_hyp_ctxt(guest_ctxt))) { + __sysreg_restore_vel2_state(vcpu); + } else { + if (vcpu_has_nv(vcpu)) { + /* + * Use the guest hypervisor's VPIDR_EL2 when in a + * nested state. The hardware value of MIDR_EL1 gets + * restored on put. + */ + write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2); + + /* + * As we're restoring a nested guest, set the value + * provided by the guest hypervisor. + */ + mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2); + } else { + mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1); + } + + __sysreg_restore_el1_state(guest_ctxt, mpidr); + } vcpu_set_flag(vcpu, SYSREGS_ON_CPU); } @@ -112,12 +236,20 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); - __sysreg_save_el1_state(guest_ctxt); + if (unlikely(__is_hyp_ctxt(guest_ctxt))) + __sysreg_save_vel2_state(vcpu); + else + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); __sysreg32_save_state(vcpu); /* Restore host user state */ __sysreg_restore_user_state(host_ctxt); + /* If leaving a nesting guest, restore MIDR_EL1 default view */ + if (vcpu_has_nv(vcpu)) + write_sysreg(read_cpuid_id(), vpidr_el2); + vcpu_clear_flag(vcpu, SYSREGS_ON_CPU); } From 14ca930d828b6bd0538a0c7e101b52319ae7ad35 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:16 +0100 Subject: [PATCH 08/36] KVM: arm64: Correctly access TCR2_EL1, PIR_EL1, PIRE0_EL1 with VHE For code that accesses any of the guest registers for emulation purposes, it is crucial to know where the most up-to-date data is. While this is pretty clear for nVHE (memory is the sole repository), things are a lot muddier for VHE, as depending on the SYSREGS_ON_CPU flag, registers can either be loaded on the HW or be in memory. Even worse with NV, where the loaded state is by definition partial. For these reasons, KVM offers the vcpu_read_sys_reg() and vcpu_write_sys_reg() primitives that always do the right thing. However, these primitive must know what register to access, and this is the role of the __vcpu_read_sys_reg_from_cpu() and __vcpu_write_sys_reg_to_cpu() helpers. As it turns out, TCR2_EL1, PIR_EL1, PIRE0_EL1 and not described in the latter helpers, meaning that the AT code cannot use them to emulate S1PIE. Add the three registers to the (long) list. Fixes: 86f9de9db178 ("KVM: arm64: Save/restore PIE registers") Signed-off-by: Marc Zyngier Cc: Joey Gouly Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-9-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bf64fed9820e..b89f19b4195d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1037,6 +1037,9 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break; + case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break; + case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break; case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; @@ -1083,6 +1086,9 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; + case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; + case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; From a0162020095e2a34a59fc64c07183cc039e759f6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:17 +0100 Subject: [PATCH 09/36] KVM: arm64: Extend masking facility to arbitrary registers We currently only use the masking (RES0/RES1) facility for VNCR registers, as they are memory-based and thus easy to sanitise. But we could apply the same thing to other registers if we: - split the sanitisation from __VNCR_START__ - apply the sanitisation when reading from a HW register This involves a new "marker" in the vcpu_sysreg enum, which defines the point at which the sanitisation applies (the VNCR registers being of course after this marker). Whle we are at it, rename kvm_vcpu_sanitise_vncr_reg() to kvm_vcpu_apply_reg_masks(), which is vaguely more explicit, and harden set_sysreg_masks() against setting masks for random registers... Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-10-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 19 +++++++++++++------ arch/arm64/kvm/nested.c | 12 ++++++++---- arch/arm64/kvm/sys_regs.c | 3 +++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b89f19b4195d..c753ab31dcea 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -374,7 +374,7 @@ struct kvm_arch { u64 ctr_el0; - /* Masks for VNCR-baked sysregs */ + /* Masks for VNCR-backed and general EL2 sysregs */ struct kvm_sysreg_masks *sysreg_masks; /* @@ -408,6 +408,9 @@ struct kvm_vcpu_fault_info { r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ __after_##r = __MAX__(__before_##r - 1, r) +#define MARKER(m) \ + m, __after_##m = m - 1 + enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ @@ -494,7 +497,11 @@ enum vcpu_sysreg { CNTHV_CTL_EL2, CNTHV_CVAL_EL2, - __VNCR_START__, /* Any VNCR-capable reg goes after this point */ + /* Anything from this can be RES0/RES1 sanitised */ + MARKER(__SANITISED_REG_START__), + + /* Any VNCR-capable reg goes after this point */ + MARKER(__VNCR_START__), VNCR(SCTLR_EL1),/* System Control Register */ VNCR(ACTLR_EL1),/* Auxiliary Control Register */ @@ -554,7 +561,7 @@ struct kvm_sysreg_masks { struct { u64 res0; u64 res1; - } mask[NR_SYS_REGS - __VNCR_START__]; + } mask[NR_SYS_REGS - __SANITISED_REG_START__]; }; struct kvm_cpu_context { @@ -1002,13 +1009,13 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) -u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); #define __vcpu_sys_reg(v,r) \ (*({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ - if (vcpu_has_nv((v)) && (r) >= __VNCR_START__) \ - *__r = kvm_vcpu_sanitise_vncr_reg((v), (r)); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\ __r; \ })) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index c4b17d90fc49..26103d6514bd 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -933,15 +933,15 @@ static void limit_nv_id_regs(struct kvm *kvm) kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val); } -u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr) +u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg sr, u64 v) { - u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr); struct kvm_sysreg_masks *masks; masks = vcpu->kvm->arch.sysreg_masks; if (masks) { - sr -= __VNCR_START__; + sr -= __SANITISED_REG_START__; v &= ~masks->mask[sr].res0; v |= masks->mask[sr].res1; @@ -952,7 +952,11 @@ u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr) static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) { - int i = sr - __VNCR_START__; + int i = sr - __SANITISED_REG_START__; + + BUILD_BUG_ON(!__builtin_constant_p(sr)); + BUILD_BUG_ON(sr < __SANITISED_REG_START__); + BUILD_BUG_ON(sr >= NR_SYS_REGS); kvm->arch.sysreg_masks->mask[i].res0 = res0; kvm->arch.sysreg_masks->mask[i].res1 = res1; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9fb2847f5abc..9115a75b2c61 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -189,6 +189,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) /* Get the current version of the EL1 counterpart. */ WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val)); + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); + return val; } From 9ae424d2a1ae144c83ffd7c9e2f408ae5acfb634 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:18 +0100 Subject: [PATCH 10/36] arm64: Define ID_AA64MMFR1_EL1.HAFDBS advertising FEAT_HAFT This definition is missing, and we are going to need it to sanitise TCR2_ELx. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-11-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/tools/sysreg | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 3c812fd28eca..8db4431093b2 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1688,6 +1688,7 @@ UnsignedEnum 3:0 HAFDBS 0b0000 NI 0b0001 AF 0b0010 DBM + 0b0011 HAFT EndEnum EndSysreg From 69c19e047dfee63f3e5b06b4ad288bbad32fe8f0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:19 +0100 Subject: [PATCH 11/36] KVM: arm64: Add TCR2_EL2 to the sysreg arrays Add the TCR2_EL2 register to the per-vcpu sysreg register array, the sysreg descriptor array, and advertise it as mapped to TCR2_EL1 for NV purposes. Access to this register is conditional based on ID_AA64MMFR3_EL1.TCRX being advertised. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-12-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/sys_regs.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c753ab31dcea..b41de132fa4e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -499,6 +499,7 @@ enum vcpu_sysreg { /* Anything from this can be RES0/RES1 sanitised */ MARKER(__SANITISED_REG_START__), + TCR2_EL2, /* Extended Translation Control Register (EL2) */ /* Any VNCR-capable reg goes after this point */ MARKER(__VNCR_START__), diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9115a75b2c61..a435b78606c0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -134,6 +134,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL ); MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL ); MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL ); + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL ); MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); @@ -452,6 +453,18 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +static bool access_tcr2_el2(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + + return access_rw(vcpu, p, r); +} + static bool access_actlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -2866,6 +2879,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), + EL2_REG(TCR2_EL2, access_tcr2_el2, reset_val, TCR2_EL2_RES1), EL2_REG_VNCR(VTTBR_EL2, reset_val, 0), EL2_REG_VNCR(VTCR_EL2, reset_val, 0), From ad4f6ef0fa19d0418e4087fd6783679c3fdfa888 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:20 +0100 Subject: [PATCH 12/36] KVM: arm64: Sanitise TCR2_EL2 TCR2_EL2 is a bag of control bits, all of which are only valid if certain features are present, and RES0 otherwise. Describe these constraints and register them with the masking infrastructure. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-13-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 26103d6514bd..1c8a6aa907df 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1204,6 +1204,28 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= ~(res0 | res1); set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1); + /* TCR2_EL2 */ + res0 = TCR2_EL2_RES0; + res1 = TCR2_EL2_RES1; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP)) + res0 |= (TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1 | TCR2_EL2_D128); + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, MEC, IMP)) + res0 |= TCR2_EL2_AMEC1 | TCR2_EL2_AMEC0; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, HAFDBS, HAFT)) + res0 |= TCR2_EL2_HAFT; + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) + res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP)) + res0 |= TCR2_EL2_AIE; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) + res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE; + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + res0 |= TCR2_EL2_PIE; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP)) + res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 | + TCR2_EL2_AMEC1 | TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1); + set_sysreg_masks(kvm, TCR2_EL2, res0, res1); + /* SCTLR_EL1 */ res0 = SCTLR_EL1_RES0; res1 = SCTLR_EL1_RES1; From 5055938452ede673baf13bda4fe84d8fac2bee76 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:21 +0100 Subject: [PATCH 13/36] KVM: arm64: Add save/restore for TCR2_EL2 Like its EL1 equivalent, TCR2_EL2 gets context-switched. This is made conditional on FEAT_TCRX being adversised. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-14-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 922aac39b021..cdbf52bfc483 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -51,6 +51,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + if (ctxt_has_tcrx(&vcpu->arch.ctxt)) + __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2); + /* * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where * the interesting CNTHCTL_EL2 bits live. So preserve these @@ -107,6 +110,10 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) write_sysreg_el1(val, SYS_TCR); } + if (ctxt_has_tcrx(&vcpu->arch.ctxt)) + write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR2_EL2), SYS_TCR2); + + write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR); write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0); write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2), SYS_AFSR1); From 5f8d5a15ef5a6ebf2c568101c20d4880a970a874 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:22 +0100 Subject: [PATCH 14/36] KVM: arm64: Add PIR{,E0}_EL2 to the sysreg arrays Add the FEAT_S1PIE EL2 registers to the per-vcpu sysreg register array. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-15-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/sys_regs.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b41de132fa4e..553fb07f2153 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -478,6 +478,8 @@ enum vcpu_sysreg { TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ + PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */ + PIR_EL2, /* Permission Indirection Register 1 (EL2) */ SPSR_EL2, /* EL2 saved program status register */ ELR_EL2, /* EL2 exception link register */ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a435b78606c0..9cbcd69d45fc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -135,6 +135,8 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL ); MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL ); MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL ); + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1, NULL ); + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1, NULL ); MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); From b3ad940a088761fd183dccd65c6ee20b360e8c4b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:23 +0100 Subject: [PATCH 15/36] KVM: arm64: Add save/restore for PIR{,E0}_EL2 Like their EL1 equivalent, the EL2-specific FEAT_S1PIE registers are context-switched. This is made conditional on both FEAT_TCRX and FEAT_S1PIE being adversised. Note that this change only makes sense if read together with the issue D22677 contained in 102105_K.a_04_en. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-16-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index cdbf52bfc483..a603966726f6 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -51,9 +51,15 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); - if (ctxt_has_tcrx(&vcpu->arch.ctxt)) + if (ctxt_has_tcrx(&vcpu->arch.ctxt)) { __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2); + if (ctxt_has_s1pie(&vcpu->arch.ctxt)) { + __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0); + __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR); + } + } + /* * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where * the interesting CNTHCTL_EL2 bits live. So preserve these @@ -110,9 +116,14 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) write_sysreg_el1(val, SYS_TCR); } - if (ctxt_has_tcrx(&vcpu->arch.ctxt)) + if (ctxt_has_tcrx(&vcpu->arch.ctxt)) { write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR2_EL2), SYS_TCR2); + if (ctxt_has_s1pie(&vcpu->arch.ctxt)) { + write_sysreg_el1(__vcpu_sys_reg(vcpu, PIR_EL2), SYS_PIR); + write_sysreg_el1(__vcpu_sys_reg(vcpu, PIRE0_EL2), SYS_PIRE0); + } + } write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR); write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0); From 874ae1d48e607d41ae08fa72a9ed76cb62651085 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:24 +0100 Subject: [PATCH 16/36] KVM: arm64: Handle PIR{,E0}_EL2 traps Add the FEAT_S1PIE EL2 registers the sysreg descriptor array so that they can be handled as a trap. Access to these registers is conditional based on ID_AA64MMFR3_EL1.S1PIE being advertised. Similarly to other other changes, PIRE0_EL2 is guaranteed to trap thanks to the D22677 update to the architecture. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-17-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9cbcd69d45fc..2d30266a4c65 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -369,6 +369,18 @@ static bool access_rw(struct kvm_vcpu *vcpu, return true; } +static bool check_s1pie_access_rw(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + + return access_rw(vcpu, p, r); +} + /* * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). */ @@ -2909,6 +2921,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), EL2_REG(MAIR_EL2, access_rw, reset_val, 0), + EL2_REG(PIRE0_EL2, check_s1pie_access_rw, reset_val, 0), + EL2_REG(PIR_EL2, check_s1pie_access_rw, reset_val, 0), EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), EL2_REG(VBAR_EL2, access_rw, reset_val, 0), From 23e7a34c8397d1ecff430b3500ad5c8bdea10a5c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:26 +0100 Subject: [PATCH 17/36] KVM: arm64: Add AT fast-path support for S1PIE Emulating AT using AT instructions requires that the live state matches the translation regime the AT instruction targets. If targeting the EL1&0 translation regime and that S1PIE is supported, we also need to restore that state (covering TCR2_EL1, PIR_EL1, and PIRE0_EL1). Add the required system register switcheroo. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-19-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index f04677127fbc..b9d0992e9197 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -412,6 +412,9 @@ struct mmu_config { u64 ttbr1; u64 tcr; u64 mair; + u64 tcr2; + u64 pir; + u64 pire0; u64 sctlr; u64 vttbr; u64 vtcr; @@ -424,6 +427,13 @@ static void __mmu_config_save(struct mmu_config *config) config->ttbr1 = read_sysreg_el1(SYS_TTBR1); config->tcr = read_sysreg_el1(SYS_TCR); config->mair = read_sysreg_el1(SYS_MAIR); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) { + config->tcr2 = read_sysreg_el1(SYS_TCR2); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + config->pir = read_sysreg_el1(SYS_PIR); + config->pire0 = read_sysreg_el1(SYS_PIRE0); + } + } config->sctlr = read_sysreg_el1(SYS_SCTLR); config->vttbr = read_sysreg(vttbr_el2); config->vtcr = read_sysreg(vtcr_el2); @@ -444,6 +454,13 @@ static void __mmu_config_restore(struct mmu_config *config) write_sysreg_el1(config->ttbr1, SYS_TTBR1); write_sysreg_el1(config->tcr, SYS_TCR); write_sysreg_el1(config->mair, SYS_MAIR); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) { + write_sysreg_el1(config->tcr2, SYS_TCR2); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + write_sysreg_el1(config->pir, SYS_PIR); + write_sysreg_el1(config->pire0, SYS_PIRE0); + } + } write_sysreg_el1(config->sctlr, SYS_SCTLR); write_sysreg(config->vttbr, vttbr_el2); write_sysreg(config->vtcr, vtcr_el2); @@ -914,6 +931,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) { + write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) { + write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); + write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); + } + } write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); __load_stage2(mmu, mmu->arch); From 4967b87a9ff7cb19bd85dd985616e08d0f08b07b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:27 +0100 Subject: [PATCH 18/36] KVM: arm64: Split S1 permission evaluation into direct and hierarchical parts The AArch64.S1DirectBasePermissions() pseudocode deals with both direct and hierarchical S1 permission evaluation. While this is probably convenient in the pseudocode, we would like a bit more flexibility to slot things like indirect permissions. To that effect, split the two permission check parts out of handle_at_slow() and into their own functions. The permissions are passed around as part of the walk_result structure. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-20-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 174 ++++++++++++++++++++++++++------------------ 1 file changed, 104 insertions(+), 70 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index b9d0992e9197..adcfce3f67f0 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -37,6 +37,12 @@ struct s1_walk_result { u8 APTable; bool UXNTable; bool PXNTable; + bool ur; + bool uw; + bool ux; + bool pr; + bool pw; + bool px; }; struct { u8 fst; @@ -764,11 +770,103 @@ static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) return sctlr & SCTLR_EL1_EPAN; } +static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, + struct s1_walk_info *wi, + struct s1_walk_result *wr) +{ + /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ + if (wi->regime != TR_EL2) { + switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { + case 0b00: + wr->pr = wr->pw = true; + wr->ur = wr->uw = false; + break; + case 0b01: + wr->pr = wr->pw = wr->ur = wr->uw = true; + break; + case 0b10: + wr->pr = true; + wr->pw = wr->ur = wr->uw = false; + break; + case 0b11: + wr->pr = wr->ur = true; + wr->pw = wr->uw = false; + break; + } + + /* We don't use px for anything yet, but hey... */ + wr->px = !((wr->desc & PTE_PXN) || wr->uw); + wr->ux = !(wr->desc & PTE_UXN); + } else { + wr->ur = wr->uw = wr->ux = false; + + if (!(wr->desc & PTE_RDONLY)) { + wr->pr = wr->pw = true; + } else { + wr->pr = true; + wr->pw = false; + } + + /* XN maps to UXN */ + wr->px = !(wr->desc & PTE_UXN); + } +} + +static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, + struct s1_walk_info *wi, + struct s1_walk_result *wr) +{ + /* Hierarchical part of AArch64.S1DirectBasePermissions() */ + if (wi->regime != TR_EL2) { + switch (wr->APTable) { + case 0b00: + break; + case 0b01: + wr->ur = wr->uw = false; + break; + case 0b10: + wr->pw = wr->uw = false; + break; + case 0b11: + wr->pw = wr->ur = wr->uw = false; + break; + } + + wr->px &= !wr->PXNTable; + wr->ux &= !wr->UXNTable; + } else { + if (wr->APTable & BIT(1)) + wr->pw = false; + + /* XN maps to UXN */ + wr->px &= !wr->UXNTable; + } +} + +static void compute_s1_permissions(struct kvm_vcpu *vcpu, u32 op, + struct s1_walk_info *wi, + struct s1_walk_result *wr) +{ + compute_s1_direct_permissions(vcpu, wi, wr); + + if (!wi->hpd) + compute_s1_hierarchical_permissions(vcpu, wi, wr); + + if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) { + bool pan; + + pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; + pan &= wr->ur || wr->uw || (pan3_enabled(vcpu, wi->regime) && wr->ux); + wr->pw &= !pan; + wr->pr &= !pan; + } +} + static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { - bool perm_fail, ur, uw, ux, pr, pw, px; struct s1_walk_result wr = {}; struct s1_walk_info wi = {}; + bool perm_fail = false; int ret, idx; ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr); @@ -787,88 +885,24 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) if (ret) goto compute_par; - /* FIXME: revisit when adding indirect permission support */ - /* AArch64.S1DirectBasePermissions() */ - if (wi.regime != TR_EL2) { - switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) { - case 0b00: - pr = pw = true; - ur = uw = false; - break; - case 0b01: - pr = pw = ur = uw = true; - break; - case 0b10: - pr = true; - pw = ur = uw = false; - break; - case 0b11: - pr = ur = true; - pw = uw = false; - break; - } - - switch (wr.APTable) { - case 0b00: - break; - case 0b01: - ur = uw = false; - break; - case 0b10: - pw = uw = false; - break; - case 0b11: - pw = ur = uw = false; - break; - } - - /* We don't use px for anything yet, but hey... */ - px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw); - ux = !((wr.desc & PTE_UXN) || wr.UXNTable); - - if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) { - bool pan; - - pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; - pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux); - pw &= !pan; - pr &= !pan; - } - } else { - ur = uw = ux = false; - - if (!(wr.desc & PTE_RDONLY)) { - pr = pw = true; - } else { - pr = true; - pw = false; - } - - if (wr.APTable & BIT(1)) - pw = false; - - /* XN maps to UXN */ - px = !((wr.desc & PTE_UXN) || wr.UXNTable); - } - - perm_fail = false; + compute_s1_permissions(vcpu, op, &wi, &wr); switch (op) { case OP_AT_S1E1RP: case OP_AT_S1E1R: case OP_AT_S1E2R: - perm_fail = !pr; + perm_fail = !wr.pr; break; case OP_AT_S1E1WP: case OP_AT_S1E1W: case OP_AT_S1E2W: - perm_fail = !pw; + perm_fail = !wr.pw; break; case OP_AT_S1E0R: - perm_fail = !ur; + perm_fail = !wr.ur; break; case OP_AT_S1E0W: - perm_fail = !uw; + perm_fail = !wr.uw; break; case OP_AT_S1E1A: case OP_AT_S1E2A: From 5e21b297872237a96a23b637e670548987a09bb9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:28 +0100 Subject: [PATCH 19/36] KVM: arm64: Disable hierarchical permissions when S1PIE is enabled S1PIE implicitly disables hierarchical permissions, as specified in R_JHSVW, by making TCR_ELx.HPDn RES1. Add a predicate for S1PIE being enabled for a given translation regime, and emulate this behaviour by forcing the hpd field to true if S1PIE is enabled for that translation regime. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-21-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index adcfce3f67f0..f5bd750288ff 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -93,6 +93,23 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o } } +static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + return false; + + switch (regime) { + case TR_EL2: + case TR_EL20: + return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; + case TR_EL10: + return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && + (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE); + default: + BUG(); + } +} + static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va) { @@ -186,6 +203,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, (va55 ? FIELD_GET(TCR_HPD1, tcr) : FIELD_GET(TCR_HPD0, tcr))); + /* R_JHSVW */ + wi->hpd |= s1pie_enabled(vcpu, wi->regime); /* Someone was silly enough to encode TG0/TG1 differently */ if (va55) { From 364c081029a68b47e0ecb475a0cf337a89c9f960 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:29 +0100 Subject: [PATCH 20/36] KVM: arm64: Implement AT S1PIE support It doesn't take much effort to implement S1PIE support in AT. It is only a matter of using the AArch64.S1IndirectBasePermissions() encodings for the permission, ignoring GCS which has no impact on AT, and enforce FEAT_PAN3 being enabled as this is a requirement of FEAT_S1PIE. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-22-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 117 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index f5bd750288ff..3d93ed179560 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -781,6 +781,9 @@ static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) return false; + if (s1pie_enabled(vcpu, regime)) + return true; + if (regime == TR_EL10) sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); else @@ -862,11 +865,123 @@ static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, } } +#define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf) + +#define set_priv_perms(wr, r, w, x) \ + do { \ + (wr)->pr = (r); \ + (wr)->pw = (w); \ + (wr)->px = (x); \ + } while (0) + +#define set_unpriv_perms(wr, r, w, x) \ + do { \ + (wr)->ur = (r); \ + (wr)->uw = (w); \ + (wr)->ux = (x); \ + } while (0) + +/* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ +#define set_perms(w, wr, ip) \ + do { \ + /* R_LLZDZ */ \ + switch ((ip)) { \ + case 0b0000: \ + set_ ## w ## _perms((wr), false, false, false); \ + break; \ + case 0b0001: \ + set_ ## w ## _perms((wr), true , false, false); \ + break; \ + case 0b0010: \ + set_ ## w ## _perms((wr), false, false, true ); \ + break; \ + case 0b0011: \ + set_ ## w ## _perms((wr), true , false, true ); \ + break; \ + case 0b0100: \ + set_ ## w ## _perms((wr), false, false, false); \ + break; \ + case 0b0101: \ + set_ ## w ## _perms((wr), true , true , false); \ + break; \ + case 0b0110: \ + set_ ## w ## _perms((wr), true , true , true ); \ + break; \ + case 0b0111: \ + set_ ## w ## _perms((wr), true , true , true ); \ + break; \ + case 0b1000: \ + set_ ## w ## _perms((wr), true , false, false); \ + break; \ + case 0b1001: \ + set_ ## w ## _perms((wr), true , false, false); \ + break; \ + case 0b1010: \ + set_ ## w ## _perms((wr), true , false, true ); \ + break; \ + case 0b1011: \ + set_ ## w ## _perms((wr), false, false, false); \ + break; \ + case 0b1100: \ + set_ ## w ## _perms((wr), true , true , false); \ + break; \ + case 0b1101: \ + set_ ## w ## _perms((wr), false, false, false); \ + break; \ + case 0b1110: \ + set_ ## w ## _perms((wr), true , true , true ); \ + break; \ + case 0b1111: \ + set_ ## w ## _perms((wr), false, false, false); \ + break; \ + } \ + } while (0) + +static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, + struct s1_walk_info *wi, + struct s1_walk_result *wr) +{ + u8 up, pp, idx; + + idx = pte_pi_index(wr->desc); + + switch (wi->regime) { + case TR_EL10: + pp = perm_idx(vcpu, PIR_EL1, idx); + up = perm_idx(vcpu, PIRE0_EL1, idx); + break; + case TR_EL20: + pp = perm_idx(vcpu, PIR_EL2, idx); + up = perm_idx(vcpu, PIRE0_EL2, idx); + break; + case TR_EL2: + pp = perm_idx(vcpu, PIR_EL2, idx); + up = 0; + break; + } + + set_perms(priv, wr, pp); + + if (wi->regime != TR_EL2) + set_perms(unpriv, wr, up); + else + set_unpriv_perms(wr, false, false, false); + + /* R_VFPJF */ + if (wr->px && wr->uw) { + set_priv_perms(wr, false, false, false); + set_unpriv_perms(wr, false, false, false); + } +} + static void compute_s1_permissions(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, struct s1_walk_result *wr) { - compute_s1_direct_permissions(vcpu, wi, wr); + if (!s1pie_enabled(vcpu, wi->regime)) + compute_s1_direct_permissions(vcpu, wi, wr); + else + compute_s1_indirect_permissions(vcpu, wi, wr); if (!wi->hpd) compute_s1_hierarchical_permissions(vcpu, wi, wr); From ee3a9a0643c58f61d2227ba819e13dbb552aff11 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:30 +0100 Subject: [PATCH 21/36] KVM: arm64: Add a composite EL2 visibility helper We are starting to have a bunch of visibility helpers checking for EL2 + something else, and we are going to add more. Simplify things somehow by introducing a helper that implement extractly that by taking a visibility helper as a parameter, and convert the existing ones to that. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-23-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2d30266a4c65..9938c768c5ac 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2303,16 +2303,18 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return __vcpu_sys_reg(vcpu, r->reg) = val; } +static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + unsigned int (*fn)(const struct kvm_vcpu *, + const struct sys_reg_desc *)) +{ + return el2_visibility(vcpu, rd) ?: fn(vcpu, rd); +} + static unsigned int sve_el2_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - unsigned int r; - - r = el2_visibility(vcpu, rd); - if (r) - return r; - - return sve_visibility(vcpu, rd); + return __el2_visibility(vcpu, rd, sve_visibility); } static bool access_zcr_el2(struct kvm_vcpu *vcpu, From 997eeecafebaef668b76264800c185544a432839 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 23 Oct 2024 15:53:31 +0100 Subject: [PATCH 22/36] KVM: arm64: Define helper for EL2 registers with custom visibility In preparation for adding more visibility filtering for EL2 registers add a helper macro like EL2_REG() which allows specification of a custom visibility operation. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240822-kvm-arm64-hide-pie-regs-v2-1-376624fa829c@kernel.org Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-24-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9938c768c5ac..a2648a66b88d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2171,6 +2171,15 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ + SYS_DESC(SYS_##name), \ + .access = acc, \ + .reset = rst, \ + .reg = name, \ + .visibility = filter, \ + .val = v, \ +} + #define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) @@ -2887,8 +2896,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(HFGITR_EL2, reset_val, 0), EL2_REG_VNCR(HACR_EL2, reset_val, 0), - { SYS_DESC(SYS_ZCR_EL2), .access = access_zcr_el2, .reset = reset_val, - .visibility = sve_el2_visibility, .reg = ZCR_EL2 }, + EL2_REG_FILTERED(ZCR_EL2, access_zcr_el2, reset_val, 0, + sve_el2_visibility), EL2_REG_VNCR(HCRX_EL2, reset_val, 0), From 0fcb4eea5345531118ca6f46391e88b12fbc35e4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 23 Oct 2024 15:53:32 +0100 Subject: [PATCH 23/36] KVM: arm64: Hide TCR2_EL1 from userspace when disabled for guests When the guest does not support FEAT_TCR2 we should not allow any access to it in order to ensure that we do not create spurious issues with guest migration. Add a visibility operation for it. Fixes: fbff56068232 ("KVM: arm64: Save/restore TCR2_EL1") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240822-kvm-arm64-hide-pie-regs-v2-2-376624fa829c@kernel.org [maz: simplify by using __el2_visibility(), kvm_has_tcr2() throughout] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-25-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/at.c | 2 +- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 2 +- arch/arm64/kvm/nested.c | 2 +- arch/arm64/kvm/sys_regs.c | 27 ++++++++++++++++++---- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 553fb07f2153..fb5dd6a287eb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1519,4 +1519,7 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); (system_supports_fpmr() && \ kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP)) +#define kvm_has_tcr2(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 3d93ed179560..a9f665d5ceb0 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1099,7 +1099,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) { + if (kvm_has_tcr2(vcpu->kvm)) { write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) { write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index d67628d01bf5..c92c2c0b86aa 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -69,7 +69,7 @@ static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt) return false; vcpu = ctxt_to_vcpu(ctxt); - return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP); + return kvm_has_tcr2(kern_hyp_va(vcpu->kvm)); } static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 1c8a6aa907df..a4e085dab3ba 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1054,7 +1054,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= HCRX_EL2_PTTWI; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP)) res0 |= HCRX_EL2_SCTLR2En; - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) + if (!kvm_has_tcr2(kvm)) res0 |= HCRX_EL2_TCR2En; if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a2648a66b88d..d4b5faacf066 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -446,7 +446,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, u64 val, mask, shift; if (reg_to_encoding(r) == SYS_TCR2_EL1 && - !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) + !kvm_has_tcr2(vcpu->kvm)) return undef_access(vcpu, p, r); BUG_ON(!p->is_write); @@ -471,7 +471,7 @@ static bool access_tcr2_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) { + if (!kvm_has_tcr2(vcpu->kvm)) { kvm_inject_undefined(vcpu); return false; } @@ -2357,6 +2357,21 @@ static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static unsigned int tcr2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (kvm_has_tcr2(vcpu->kvm)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return __el2_visibility(vcpu, rd, tcr2_visibility); +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2567,7 +2582,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, - { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 }, + { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0, + .visibility = tcr2_visibility }, PTRAUTH_KEY(APIA), PTRAUTH_KEY(APIB), @@ -2904,7 +2920,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), - EL2_REG(TCR2_EL2, access_tcr2_el2, reset_val, TCR2_EL2_RES1), + EL2_REG_FILTERED(TCR2_EL2, access_tcr2_el2, reset_val, TCR2_EL2_RES1, + tcr2_el2_visibility), EL2_REG_VNCR(VTTBR_EL2, reset_val, 0), EL2_REG_VNCR(VTCR_EL2, reset_val, 0), @@ -4800,7 +4817,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu) if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); - if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) + if (kvm_has_tcr2(kvm)) vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En; if (kvm_has_fpmr(kvm)) From a68cddbe47efdac10855e5f154cbd6c87b792ba2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 23 Oct 2024 15:53:33 +0100 Subject: [PATCH 24/36] KVM: arm64: Hide S1PIE registers from userspace when disabled for guests When the guest does not support S1PIE we should not allow any access to the system registers it adds in order to ensure that we do not create spurious issues with guest migration. Add a visibility operation for these registers. Fixes: 86f9de9db178 ("KVM: arm64: Save/restore PIE registers") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240822-kvm-arm64-hide-pie-regs-v2-3-376624fa829c@kernel.org [maz: simplify by using __el2_visibility(), kvm_has_s1pie() throughout] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-26-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/at.c | 4 +-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 2 +- arch/arm64/kvm/nested.c | 4 +-- arch/arm64/kvm/sys_regs.c | 31 +++++++++++++++++----- 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fb5dd6a287eb..063463d05644 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1522,4 +1522,7 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_tcr2(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP)) +#define kvm_has_s1pie(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index a9f665d5ceb0..de7109111e40 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -95,7 +95,7 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) { - if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + if (!kvm_has_s1pie(vcpu->kvm)) return false; switch (regime) { @@ -1101,7 +1101,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); if (kvm_has_tcr2(vcpu->kvm)) { write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) { + if (kvm_has_s1pie(vcpu->kvm)) { write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); } diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index c92c2c0b86aa..a306ea70502c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -58,7 +58,7 @@ static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt) return false; vcpu = ctxt_to_vcpu(ctxt); - return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP); + return kvm_has_s1pie(kern_hyp_va(vcpu->kvm)); } static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index a4e085dab3ba..9838b92dde5f 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1105,7 +1105,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) res0 |= HFGxTR_EL2_nRCWMASK_EL1; - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + if (!kvm_has_s1pie(kvm)) res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); @@ -1219,7 +1219,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= TCR2_EL2_AIE; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE; - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + if (!kvm_has_s1pie(kvm)) res0 |= TCR2_EL2_PIE; if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP)) res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 | diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d4b5faacf066..46aa3d54b528 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -373,7 +373,7 @@ static bool check_s1pie_access_rw(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) { + if (!kvm_has_s1pie(vcpu->kvm)) { kvm_inject_undefined(vcpu); return false; } @@ -2372,6 +2372,21 @@ static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, tcr2_visibility); } +static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (kvm_has_s1pie(vcpu->kvm)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return __el2_visibility(vcpu, rd, s1pie_visibility); +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2637,8 +2652,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, - { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 }, - { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1, + .visibility = s1pie_visibility }, + { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1, + .visibility = s1pie_visibility }, { SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1, .visibility = s1poe_visibility }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -2949,8 +2966,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), EL2_REG(MAIR_EL2, access_rw, reset_val, 0), - EL2_REG(PIRE0_EL2, check_s1pie_access_rw, reset_val, 0), - EL2_REG(PIR_EL2, check_s1pie_access_rw, reset_val, 0), + EL2_REG_FILTERED(PIRE0_EL2, check_s1pie_access_rw, reset_val, 0, + s1pie_el2_visibility), + EL2_REG_FILTERED(PIR_EL2, check_s1pie_access_rw, reset_val, 0, + s1pie_el2_visibility), EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), EL2_REG(VBAR_EL2, access_rw, reset_val, 0), @@ -4867,7 +4886,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu) kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP | HFGITR_EL2_ATS1E1WP); - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) + if (!kvm_has_s1pie(kvm)) kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); From b4824120303f18dfa2b4b6bf303240172117925b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:34 +0100 Subject: [PATCH 25/36] KVM: arm64: Rely on visibility to let PIR*_ELx/TCR2_ELx UNDEF With a visibility defined for these registers, there is no need to check again for S1PIE or TCRX being implemented as perform_access() already handles it. Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-27-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 46aa3d54b528..e0fbe71d5890 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -369,18 +369,6 @@ static bool access_rw(struct kvm_vcpu *vcpu, return true; } -static bool check_s1pie_access_rw(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (!kvm_has_s1pie(vcpu->kvm)) { - kvm_inject_undefined(vcpu); - return false; - } - - return access_rw(vcpu, p, r); -} - /* * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). */ @@ -445,10 +433,6 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, bool was_enabled = vcpu_has_cache_enabled(vcpu); u64 val, mask, shift; - if (reg_to_encoding(r) == SYS_TCR2_EL1 && - !kvm_has_tcr2(vcpu->kvm)) - return undef_access(vcpu, p, r); - BUG_ON(!p->is_write); get_access_mask(r, &mask, &shift); @@ -467,18 +451,6 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } -static bool access_tcr2_el2(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (!kvm_has_tcr2(vcpu->kvm)) { - kvm_inject_undefined(vcpu); - return false; - } - - return access_rw(vcpu, p, r); -} - static bool access_actlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -2937,7 +2909,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), EL2_REG(TTBR1_EL2, access_rw, reset_val, 0), EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1), - EL2_REG_FILTERED(TCR2_EL2, access_tcr2_el2, reset_val, TCR2_EL2_RES1, + EL2_REG_FILTERED(TCR2_EL2, access_rw, reset_val, TCR2_EL2_RES1, tcr2_el2_visibility), EL2_REG_VNCR(VTTBR_EL2, reset_val, 0), EL2_REG_VNCR(VTCR_EL2, reset_val, 0), @@ -2966,9 +2938,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), EL2_REG(MAIR_EL2, access_rw, reset_val, 0), - EL2_REG_FILTERED(PIRE0_EL2, check_s1pie_access_rw, reset_val, 0, + EL2_REG_FILTERED(PIRE0_EL2, access_rw, reset_val, 0, s1pie_el2_visibility), - EL2_REG_FILTERED(PIR_EL2, check_s1pie_access_rw, reset_val, 0, + EL2_REG_FILTERED(PIR_EL2, access_rw, reset_val, 0, s1pie_el2_visibility), EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), From b9ed7e5dfbe9b77639fd4ff042bef94fb232b94d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:35 +0100 Subject: [PATCH 26/36] arm64: Add encoding for POR_EL2 POR_EL2 is the equivalent of POR_EL1 for the EL2&0 translation regime, and it is sorely missing from the sysreg file. Add the sucker. Signed-off-by: Marc Zyngier Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241023145345.1613824-28-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/tools/sysreg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8db4431093b2..a33136243bdf 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2907,6 +2907,10 @@ Sysreg POR_EL1 3 0 10 2 4 Fields PIRx_ELx EndSysreg +Sysreg POR_EL2 3 4 10 2 4 +Fields PIRx_ELx +EndSysreg + Sysreg POR_EL12 3 5 10 2 4 Fields PIRx_ELx EndSysreg From b17d8aa2012617c51c5478f1be69044adb602d53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:36 +0100 Subject: [PATCH 27/36] KVM: arm64: Drop bogus CPTR_EL2.E0POE trap routing It took me some time to realise it, but CPTR_EL2.E0POE does not apply to a guest, only to EL0 when InHost(). And when InHost(), CPCR_EL2 is mapped to CPACR_EL1, maning that the E0POE bit naturally takes effect without any trap. To sum it up, this trap bit is better left ignored, we will never have to hanedle it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-29-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/emulate-nested.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 05b6435d02a9..ddcbaa983de3 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -79,7 +79,6 @@ enum cgt_group_id { CGT_MDCR_E2TB, CGT_MDCR_TDCC, - CGT_CPACR_E0POE, CGT_CPTR_TAM, CGT_CPTR_TCPAC, @@ -362,12 +361,6 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = MDCR_EL2_TDCC, .behaviour = BEHAVE_FORWARD_ANY, }, - [CGT_CPACR_E0POE] = { - .index = CPTR_EL2, - .value = CPACR_ELx_E0POE, - .mask = CPACR_ELx_E0POE, - .behaviour = BEHAVE_FORWARD_ANY, - }, [CGT_CPTR_TAM] = { .index = CPTR_EL2, .value = CPTR_EL2_TAM, @@ -1141,7 +1134,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_AMEVTYPER1_EL0(13), CGT_CPTR_TAM), SR_TRAP(SYS_AMEVTYPER1_EL0(14), CGT_CPTR_TAM), SR_TRAP(SYS_AMEVTYPER1_EL0(15), CGT_CPTR_TAM), - SR_TRAP(SYS_POR_EL0, CGT_CPACR_E0POE), /* op0=2, op1=1, and CRn<0b1000 */ SR_RANGE_TRAP(sys_reg(2, 1, 0, 0, 0), sys_reg(2, 1, 7, 15, 7), CGT_CPTR_TTA), From f7575530df436c39d07e79d6eef721c6e7b35bb4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:37 +0100 Subject: [PATCH 28/36] KVM: arm64: Subject S1PIE/S1POE registers to HCR_EL2.{TVM,TRVM} All the El0/EL1 S1PIE/S1POE system register are caught by the HCR_EL2 TVM and TRVM bits. Reflect this in the coarse grained trap table. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-30-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/emulate-nested.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index ddcbaa983de3..0ab090553354 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -704,6 +704,10 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_MAIR_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_AMAIR_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_CONTEXTIDR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_PIR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_PIRE0_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_POR_EL0, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_POR_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_TCR2_EL1, CGT_HCR_TVM_TRVM_HCRX_TCR2En), SR_TRAP(SYS_DC_ZVA, CGT_HCR_TDZ), SR_TRAP(SYS_DC_GVA, CGT_HCR_TDZ), From 26e89dccdf6328b608baa35d84cc36d915769f50 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:38 +0100 Subject: [PATCH 29/36] KVM: arm64: Add kvm_has_s1poe() helper Just like we have kvm_has_s1pie(), add its S1POE counterpart, making the code slightly more readable. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-31-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 2 +- arch/arm64/kvm/nested.c | 4 ++-- arch/arm64/kvm/sys_regs.c | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 063463d05644..9269d893f85b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1525,4 +1525,7 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1pie(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP)) +#define kvm_has_s1poe(k) \ + (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index a306ea70502c..a651c43ad679 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -80,7 +80,7 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt) return false; vcpu = ctxt_to_vcpu(ctxt); - return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP); + return kvm_has_s1poe(kern_hyp_va(vcpu->kvm)); } static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 9838b92dde5f..be88a36da071 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1107,7 +1107,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= HFGxTR_EL2_nRCWMASK_EL1; if (!kvm_has_s1pie(kvm)) res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) + if (!kvm_has_s1poe(kvm)) res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) res0 |= HFGxTR_EL2_nS2POR_EL1; @@ -1217,7 +1217,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP)) res0 |= TCR2_EL2_AIE; - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) + if (!kvm_has_s1poe(kvm)) res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE; if (!kvm_has_s1pie(kvm)) res0 |= TCR2_EL2_PIE; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e0fbe71d5890..d46663f27fba 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2323,7 +2323,7 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) + if (kvm_has_s1poe(vcpu->kvm)) return 0; return REG_HIDDEN; @@ -4862,7 +4862,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu) kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) + if (!kvm_has_s1poe(kvm)) kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0); From 5970e9903f03560ce9829297e302463acdc26bc0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:39 +0100 Subject: [PATCH 30/36] KVM: arm64: Add basic support for POR_EL2 S1POE support implies support for POR_EL2, which we provide by - adding it to the vcpu_sysreg enum - advertising it as mapped to its EL1 counterpart in get_el2_to_el1_mapping - wiring it in the sys_reg_desc table with the correct visibility - handling POR_EL1 in __vcpu_{read,write}_sys_reg_from_cpu() Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-32-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/sys_regs.c | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9269d893f85b..28ea7e72477c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -480,6 +480,7 @@ enum vcpu_sysreg { TCR_EL2, /* Translation Control Register (EL2) */ PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */ PIR_EL2, /* Permission Indirection Register 1 (EL2) */ + POR_EL2, /* Permission Overlay Register 2 (EL2) */ SPSR_EL2, /* EL2 saved program status register */ ELR_EL2, /* EL2 exception link register */ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */ @@ -1050,6 +1051,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break; case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break; case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break; + case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break; case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; @@ -1099,6 +1101,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; + case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d46663f27fba..4f4fe84fb0e8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -137,6 +137,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL ); MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1, NULL ); MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1, NULL ); + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1, NULL ); MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); @@ -2329,6 +2330,12 @@ static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static unsigned int s1poe_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return __el2_visibility(vcpu, rd, s1poe_visibility); +} + static unsigned int tcr2_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { @@ -2942,6 +2949,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { s1pie_el2_visibility), EL2_REG_FILTERED(PIR_EL2, access_rw, reset_val, 0, s1pie_el2_visibility), + EL2_REG_FILTERED(POR_EL2, access_rw, reset_val, 0, + s1poe_el2_visibility), EL2_REG(AMAIR_EL2, access_rw, reset_val, 0), EL2_REG(VBAR_EL2, access_rw, reset_val, 0), From de5c2827fb44ae3074638e373bcea64ac9107689 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:40 +0100 Subject: [PATCH 31/36] KVM: arm64: Add save/restore support for POR_EL2 POR_EL2 needs saving when the guest is VHE, and restoring in any case. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-33-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index a603966726f6..5f78a39053a7 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -58,6 +58,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0); __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR); } + + if (ctxt_has_s1poe(&vcpu->arch.ctxt)) + __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR); } /* @@ -123,6 +126,9 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, PIR_EL2), SYS_PIR); write_sysreg_el1(__vcpu_sys_reg(vcpu, PIRE0_EL2), SYS_PIRE0); } + + if (ctxt_has_s1poe(&vcpu->arch.ctxt)) + write_sysreg_el1(__vcpu_sys_reg(vcpu, POR_EL2), SYS_POR); } write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR); From 846c993df98278ce18b80183d78b5c1ca8b14f5c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:41 +0100 Subject: [PATCH 32/36] KVM: arm64: Add POE save/restore for AT emulation fast-path Just like the other extensions affecting address translation, we must save/restore POE so that an out-of-context translation context can be restored and used with the AT instructions. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-34-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index de7109111e40..ef1643faedeb 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -440,6 +440,8 @@ struct mmu_config { u64 tcr2; u64 pir; u64 pire0; + u64 por_el0; + u64 por_el1; u64 sctlr; u64 vttbr; u64 vtcr; @@ -458,6 +460,10 @@ static void __mmu_config_save(struct mmu_config *config) config->pir = read_sysreg_el1(SYS_PIR); config->pire0 = read_sysreg_el1(SYS_PIRE0); } + if (system_supports_poe()) { + config->por_el1 = read_sysreg_el1(SYS_POR); + config->por_el0 = read_sysreg_s(SYS_POR_EL0); + } } config->sctlr = read_sysreg_el1(SYS_SCTLR); config->vttbr = read_sysreg(vttbr_el2); @@ -485,6 +491,10 @@ static void __mmu_config_restore(struct mmu_config *config) write_sysreg_el1(config->pir, SYS_PIR); write_sysreg_el1(config->pire0, SYS_PIRE0); } + if (system_supports_poe()) { + write_sysreg_el1(config->por_el1, SYS_POR); + write_sysreg_s(config->por_el0, SYS_POR_EL0); + } } write_sysreg_el1(config->sctlr, SYS_SCTLR); write_sysreg(config->vttbr, vttbr_el2); @@ -1105,6 +1115,10 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); } + if (kvm_has_s1poe(vcpu->kvm)) { + write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR); + write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0); + } } write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); __load_stage2(mmu, mmu->arch); From 8a9b304d7e2276a0ebb60a23cdcf7d348052752f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:42 +0100 Subject: [PATCH 33/36] KVM: arm64: Disable hierarchical permissions when POE is enabled The hierarchical permissions must be disabled when POE is enabled in the translation regime used for a given table walk. We store the two enable bits in the s1_walk_info structure so that they can be retrieved down the line, as they will be useful. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241023145345.1613824-35-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index ef1643faedeb..8d1dc6327ec5 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -24,6 +24,8 @@ struct s1_walk_info { unsigned int txsz; int sl; bool hpd; + bool e0poe; + bool poe; bool be; bool s2; }; @@ -110,6 +112,34 @@ static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) } } +static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) +{ + u64 val; + + if (!kvm_has_s1poe(vcpu->kvm)) { + wi->poe = wi->e0poe = false; + return; + } + + switch (wi->regime) { + case TR_EL2: + case TR_EL20: + val = vcpu_read_sys_reg(vcpu, TCR2_EL2); + wi->poe = val & TCR2_EL2_POE; + wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); + break; + case TR_EL10: + if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { + wi->poe = wi->e0poe = false; + return; + } + + val = __vcpu_sys_reg(vcpu, TCR2_EL1); + wi->poe = val & TCR2_EL1x_POE; + wi->e0poe = val & TCR2_EL1x_E0POE; + } +} + static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va) { @@ -206,6 +236,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, /* R_JHSVW */ wi->hpd |= s1pie_enabled(vcpu, wi->regime); + /* Do we have POE? */ + compute_s1poe(vcpu, wi); + + /* R_BVXDG */ + wi->hpd |= (wi->poe || wi->e0poe); + /* Someone was silly enough to encode TG0/TG1 differently */ if (va55) { wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); From 7cd5c2796cb039aaa43d3f16d875bb7d60b2c1b0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:43 +0100 Subject: [PATCH 34/36] KVM: arm64: Make PAN conditions part of the S1 walk context Move the conditions describing PAN as part of the s1_walk_info structure, in an effort to declutter the permission processing. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-36-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 8d1dc6327ec5..2ab2c3578c3a 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -26,6 +26,7 @@ struct s1_walk_info { bool hpd; bool e0poe; bool poe; + bool pan; bool be; bool s2; }; @@ -151,6 +152,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, wi->regime = compute_translation_regime(vcpu, op); as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); + wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) && + (*vcpu_cpsr(vcpu) & PSR_PAN_BIT); va55 = va & BIT(55); @@ -1020,10 +1023,12 @@ static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, } } -static void compute_s1_permissions(struct kvm_vcpu *vcpu, u32 op, +static void compute_s1_permissions(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr) { + bool pan; + if (!s1pie_enabled(vcpu, wi->regime)) compute_s1_direct_permissions(vcpu, wi, wr); else @@ -1032,14 +1037,10 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu, u32 op, if (!wi->hpd) compute_s1_hierarchical_permissions(vcpu, wi, wr); - if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) { - bool pan; - - pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; - pan &= wr->ur || wr->uw || (pan3_enabled(vcpu, wi->regime) && wr->ux); - wr->pw &= !pan; - wr->pr &= !pan; - } + pan = wi->pan && (wr->ur || wr->uw || + (pan3_enabled(vcpu, wi->regime) && wr->ux)); + wr->pw &= !pan; + wr->pr &= !pan; } static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) @@ -1065,7 +1066,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) if (ret) goto compute_par; - compute_s1_permissions(vcpu, op, &wi, &wr); + compute_s1_permissions(vcpu, &wi, &wr); switch (op) { case OP_AT_S1E1RP: From e39ce7033c70df5e402b47dd248137878df6c771 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:44 +0100 Subject: [PATCH 35/36] KVM: arm64: Handle stage-1 permission overlays We now have the intrastructure in place to emulate S1POE: - direct permissions are always overlay-capable - indirect permissions are overlay-capable if the permissions are in the 0b0xxx range - the overlays are strictly substractive Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-37-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 2ab2c3578c3a..d300cd1a0d8a 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -40,9 +40,11 @@ struct s1_walk_result { u8 APTable; bool UXNTable; bool PXNTable; + bool uov; bool ur; bool uw; bool ux; + bool pov; bool pr; bool pw; bool px; @@ -881,6 +883,9 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, /* XN maps to UXN */ wr->px = !(wr->desc & PTE_UXN); } + + wr->pov = wi->poe; + wr->uov = wi->e0poe; } static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, @@ -1016,6 +1021,9 @@ static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, else set_unpriv_perms(wr, false, false, false); + wr->pov = wi->poe && !(pp & BIT(3)); + wr->uov = wi->e0poe && !(up & BIT(3)); + /* R_VFPJF */ if (wr->px && wr->uw) { set_priv_perms(wr, false, false, false); @@ -1023,6 +1031,48 @@ static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, } } +static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, + struct s1_walk_info *wi, + struct s1_walk_result *wr) +{ + u8 idx, pov_perms, uov_perms; + + idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); + + switch (wi->regime) { + case TR_EL10: + pov_perms = perm_idx(vcpu, POR_EL1, idx); + uov_perms = perm_idx(vcpu, POR_EL0, idx); + break; + case TR_EL20: + pov_perms = perm_idx(vcpu, POR_EL2, idx); + uov_perms = perm_idx(vcpu, POR_EL0, idx); + break; + case TR_EL2: + pov_perms = perm_idx(vcpu, POR_EL2, idx); + uov_perms = 0; + break; + } + + if (pov_perms & ~POE_RXW) + pov_perms = POE_NONE; + + if (wi->poe && wr->pov) { + wr->pr &= pov_perms & POE_R; + wr->px &= pov_perms & POE_X; + wr->pw &= pov_perms & POE_W; + } + + if (uov_perms & ~POE_RXW) + uov_perms = POE_NONE; + + if (wi->e0poe && wr->uov) { + wr->ur &= uov_perms & POE_R; + wr->ux &= uov_perms & POE_X; + wr->uw &= uov_perms & POE_W; + } +} + static void compute_s1_permissions(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr) @@ -1037,6 +1087,9 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu, if (!wi->hpd) compute_s1_hierarchical_permissions(vcpu, wi, wr); + if (wi->poe || wi->e0poe) + compute_s1_overlay_permissions(vcpu, wi, wr); + pan = wi->pan && (wr->ur || wr->uw || (pan3_enabled(vcpu, wi->regime) && wr->ux)); wr->pw &= !pan; From 1c6801d565eca7654732812b0c45ed898e64f238 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Oct 2024 15:53:45 +0100 Subject: [PATCH 36/36] KVM: arm64: Handle WXN attribute Until now, we didn't really care about WXN as it didn't have an effect on the R/W permissions (only the execution could be droppped), and therefore not of interest for AT. However, with S1POE, WXN can revoke the Write permission if an overlay is active and that execution is allowed. This *is* relevant to AT. Add full handling of WXN so that we correctly handle this case. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241023145345.1613824-38-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/at.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index d300cd1a0d8a..8c5d7990e5b3 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -40,10 +40,12 @@ struct s1_walk_result { u8 APTable; bool UXNTable; bool PXNTable; + bool uwxn; bool uov; bool ur; bool uw; bool ux; + bool pwxn; bool pov; bool pr; bool pw; @@ -847,6 +849,8 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr) { + bool wxn; + /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ if (wi->regime != TR_EL2) { switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { @@ -884,6 +888,17 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, wr->px = !(wr->desc & PTE_UXN); } + switch (wi->regime) { + case TR_EL2: + case TR_EL20: + wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); + break; + case TR_EL10: + wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); + break; + } + + wr->pwxn = wr->uwxn = wxn; wr->pov = wi->poe; wr->uov = wi->e0poe; } @@ -935,6 +950,16 @@ static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, (wr)->ux = (x); \ } while (0) +#define set_priv_wxn(wr, v) \ + do { \ + (wr)->pwxn = (v); \ + } while (0) + +#define set_unpriv_wxn(wr, v) \ + do { \ + (wr)->uwxn = (v); \ + } while (0) + /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ #define set_perms(w, wr, ip) \ do { \ @@ -989,6 +1014,10 @@ static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, set_ ## w ## _perms((wr), false, false, false); \ break; \ } \ + \ + /* R_HJYGR */ \ + set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \ + \ } while (0) static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, @@ -1090,6 +1119,22 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu, if (wi->poe || wi->e0poe) compute_s1_overlay_permissions(vcpu, wi, wr); + /* R_QXXPC */ + if (wr->pwxn) { + if (!wr->pov && wr->pw) + wr->px = false; + if (wr->pov && wr->px) + wr->pw = false; + } + + /* R_NPBXC */ + if (wr->uwxn) { + if (!wr->uov && wr->uw) + wr->ux = false; + if (wr->uov && wr->ux) + wr->uw = false; + } + pan = wi->pan && (wr->ur || wr->uw || (pan3_enabled(vcpu, wi->regime) && wr->ux)); wr->pw &= !pan;