From ccab51d69b1478b549ad0bbb38f556ab3bfb47ab Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Tue, 14 Apr 2026 11:02:31 +0100 Subject: [PATCH 01/11] KVM: arm64: Re-allow hyp tracing HVCs for [nh]VHE The introduction of __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM excluded hyp tracing HVCs from the common [nh]VHE/pKVM list. Re-allow them. Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20260414100231.1859687-1-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 16 ++++++++-------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 37414440cee7..11dcdf434971 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -72,6 +72,14 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, + __KVM_HOST_SMCCC_FUNC___tracing_load, + __KVM_HOST_SMCCC_FUNC___tracing_unload, + __KVM_HOST_SMCCC_FUNC___tracing_enable, + __KVM_HOST_SMCCC_FUNC___tracing_swap_reader, + __KVM_HOST_SMCCC_FUNC___tracing_update_clock, + __KVM_HOST_SMCCC_FUNC___tracing_reset, + __KVM_HOST_SMCCC_FUNC___tracing_enable_event, + __KVM_HOST_SMCCC_FUNC___tracing_write_event, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr, @@ -100,14 +108,6 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, - __KVM_HOST_SMCCC_FUNC___tracing_load, - __KVM_HOST_SMCCC_FUNC___tracing_unload, - __KVM_HOST_SMCCC_FUNC___tracing_enable, - __KVM_HOST_SMCCC_FUNC___tracing_swap_reader, - __KVM_HOST_SMCCC_FUNC___tracing_update_clock, - __KVM_HOST_SMCCC_FUNC___tracing_reset, - __KVM_HOST_SMCCC_FUNC___tracing_enable_event, - __KVM_HOST_SMCCC_FUNC___tracing_write_event, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 73f2e0221e70..8f7582d57ab5 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -709,6 +709,14 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__tracing_load), + HANDLE_FUNC(__tracing_unload), + HANDLE_FUNC(__tracing_enable), + HANDLE_FUNC(__tracing_swap_reader), + HANDLE_FUNC(__tracing_update_clock), + HANDLE_FUNC(__tracing_reset), + HANDLE_FUNC(__tracing_enable_event), + HANDLE_FUNC(__tracing_write_event), HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__vgic_v5_save_apr), @@ -735,14 +743,6 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_vcpu_load), HANDLE_FUNC(__pkvm_vcpu_put), HANDLE_FUNC(__pkvm_tlb_flush_vmid), - HANDLE_FUNC(__tracing_load), - HANDLE_FUNC(__tracing_unload), - HANDLE_FUNC(__tracing_enable), - HANDLE_FUNC(__tracing_swap_reader), - HANDLE_FUNC(__tracing_update_clock), - HANDLE_FUNC(__tracing_reset), - HANDLE_FUNC(__tracing_enable_event), - HANDLE_FUNC(__tracing_write_event), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) From f05799491d6a2a29d8e15f4451e685c4a6e13d8f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Apr 2026 17:05:28 +0100 Subject: [PATCH 02/11] KVM: arm64: pkvm: Adopt MARKER() to define host hypercall ranges The EL2 code defines ranges of host hypercalls that are either enabled at boot-time only, used by [nh]VHE KVM, or reserved to pKVM. The way these ranges are delineated is error prone, as the enum symbols defining the limits are expressed in terms of actual function symbols. This means that should a new function be added, special care must be taken to also update the limit symbol. Improve this by reusing the mechanism introduced for the vcpu_sysreg enum, which uses a MARKER() macro and some extra trickery to make the limit symbol standalone. Crucially, the limit symbol has the same value as the *following* symbol. The handle_host_hcall() function is then updated to make use of the new limit definitions and get rid of the brittle default upper limit. This allows for some more strict checks at build time, and the removal of an comparison at run time. Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Link: https://patch.msgid.link/20260414160528.2218858-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 12 ++++++++++-- arch/arm64/include/asm/kvm_host.h | 3 --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 10 +++++----- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 11dcdf434971..043495f7fc78 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -50,6 +50,9 @@ #include +#define MARKER(m) \ + m, __after_##m = m - 1 + enum __kvm_host_smccc_func { /* Hypercalls that are unavailable once pKVM has finalised. */ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ @@ -59,8 +62,10 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + + MARKER(__KVM_HOST_SMCCC_FUNC_MIN_PKVM), + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, - __KVM_HOST_SMCCC_FUNC_MIN_PKVM = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, /* Hypercalls that are always available and common to [nh]VHE/pKVM. */ __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, @@ -84,7 +89,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr, __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr, - __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM = __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr, + + MARKER(__KVM_HOST_SMCCC_FUNC_PKVM_ONLY), /* Hypercalls that are available only when pKVM has finalised. */ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, @@ -108,6 +114,8 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, + + MARKER(__KVM_HOST_SMCCC_FUNC_MAX) }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 851f6171751c..44211e86f5eb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -450,9 +450,6 @@ struct kvm_vcpu_fault_info { r = __VNCR_START__ + ((VNCR_ ## r) / 8), \ __after_##r = __MAX__(__before_##r - 1, r) -#define MARKER(m) \ - m, __after_##m = m - 1 - enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8f7582d57ab5..1de9c70599c6 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -748,9 +748,11 @@ static const hcall_t host_hcall[] = { static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(unsigned long, id, host_ctxt, 0); - unsigned long hcall_min = 0, hcall_max = -1; + unsigned long hcall_min = 0, hcall_max = __KVM_HOST_SMCCC_FUNC_MAX; hcall_t hfn; + BUILD_BUG_ON(ARRAY_SIZE(host_hcall) != __KVM_HOST_SMCCC_FUNC_MAX); + /* * If pKVM has been initialised then reject any calls to the * early "privileged" hypercalls. Note that we cannot reject @@ -763,16 +765,14 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (static_branch_unlikely(&kvm_protected_mode_initialized)) { hcall_min = __KVM_HOST_SMCCC_FUNC_MIN_PKVM; } else { - hcall_max = __KVM_HOST_SMCCC_FUNC_MAX_NO_PKVM; + hcall_max = __KVM_HOST_SMCCC_FUNC_PKVM_ONLY; } id &= ~ARM_SMCCC_CALL_HINTS; id -= KVM_HOST_SMCCC_ID(0); - if (unlikely(id < hcall_min || id > hcall_max || - id >= ARRAY_SIZE(host_hcall))) { + if (unlikely(id < hcall_min || id >= hcall_max)) goto inval; - } hfn = host_hcall[id]; if (unlikely(!hfn)) From a0e6ae45af17e8b27958830595799c702ffbab8d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 7 Apr 2026 21:27:02 +0100 Subject: [PATCH 03/11] KVM: arm64: vgic: Fix IIDR revision field extracted from wrong value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uaccess write handlers for GICD_IIDR in both GICv2 and GICv3 extract the revision field from 'reg' (the current IIDR value read back from the emulated distributor) instead of 'val' (the value userspace is trying to write). This means userspace can never actually change the implementation revision — the extracted value is always the current one. Fix the FIELD_GET to use 'val' so that userspace can select a different revision for migration compatibility. Fixes: 49a1a2c70a7f ("KVM: arm64: vgic-v3: Advertise GICR_CTLR.{IR, CES} as a new GICD_IIDR revision") Signed-off-by: David Woodhouse Link: https://patch.msgid.link/20260407210949.2076251-2-dwmw2@infradead.org Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/vgic/vgic-mmio-v2.c | 2 +- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index 406845b3117c..0643e333db35 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -91,7 +91,7 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, * migration from old kernels to new kernels with legacy * userspace. */ - reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 89edb84d1ac6..5913a20d8301 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -194,7 +194,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, if ((reg ^ val) & ~GICD_IIDR_REVISION_MASK) return -EINVAL; - reg = FIELD_GET(GICD_IIDR_REVISION_MASK, reg); + reg = FIELD_GET(GICD_IIDR_REVISION_MASK, val); switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: From 480ea48cad873b49a1fabd07c0847c3cf1c32286 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Wed, 8 Apr 2026 11:41:18 +0000 Subject: [PATCH 04/11] KVM: arm64: Reject non compliant SMCCC function calls in pKVM Prevent the propagation of a function-id that has the top bits set since this is not compliant with the SMCCC spec and can overlap with the already known function-id decoders. (eg. if we invoke an smc with 0xffffffffc4000012 it will be decoded as a PSCI reset call). Instead, make it clear that we don't support it and return an error. Signed-off-by: Sebastian Ene Link: https://patch.msgid.link/20260408114118.422604-1-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 1de9c70599c6..06db299c37a8 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -805,6 +805,10 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) } func_id &= ~ARM_SMCCC_CALL_HINTS; + if (upper_32_bits(func_id)) { + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; + goto exit_skip_instr; + } handled = kvm_host_psci_handler(host_ctxt, func_id); if (!handled) From 7fe2cd4e1a3ad230d8fcc00cc99c4bcce4412a75 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:03 +0100 Subject: [PATCH 05/11] KVM: arm64: Fix FEAT_Debugv8p9 to check DebugVer, not PMUVer FEAT_Debugv8p9 is incorrectly defined against ID_AA64DFR0_EL1.PMUVer instead of ID_AA64DFR0_EL1.DebugVer. All three consumers of the macro gate features that are architecturally tied to FEAT_Debugv8p9 (DebugVer = 0b1011, DDI0487 M.b A2.2.10): - HDFGRTR2_EL2.nMDSELR_EL1, HDFGWTR2_EL2.nMDSELR_EL1: MDSELR_EL1 is present only when FEAT_Debugv8p9 is implemented (D24.3.21). - MDCR_EL2.EBWE: the Extended Breakpoint and Watchpoint Enable bit is RES0 unless FEAT_Debugv8p9 is implemented (D24.3.17). Neither register has any dependency on PMUVer. FEAT_Debugv8p9 and FEAT_PMUv3p9 are independent. Per DDI0487 M.b A2.2.10, FEAT_Debugv8p9 is unconditionally mandatory from Armv8.9, whereas FEAT_PMUv3p9 is mandatory only when FEAT_PMUv3 is implemented. An Armv8.9 CPU without a PMU has DebugVer = 0b1011 but PMUVer = 0b0000, so the wrong field check would cause KVM to incorrectly treat EBWE and MDSELR_EL1 as RES0 on such hardware. Fixes: 4bc0fe089840 ("KVM: arm64: Add sanitisation for FEAT_FGT2 registers") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-2-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index f35b8dddd7c1..093290b366e6 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -192,7 +192,7 @@ struct reg_feat_map_desc { #define FEAT_SRMASK ID_AA64MMFR4_EL1, SRMASK, IMP #define FEAT_PoPS ID_AA64MMFR4_EL1, PoPS, IMP #define FEAT_PFAR ID_AA64PFR1_EL1, PFAR, IMP -#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, PMUVer, V3P9 +#define FEAT_Debugv8p9 ID_AA64DFR0_EL1, DebugVer, V8P9 #define FEAT_PMUv3_SS ID_AA64DFR0_EL1, PMSS, IMP #define FEAT_SEBEP ID_AA64DFR0_EL1, SEBEP, IMP #define FEAT_EBEP ID_AA64DFR1_EL1, EBEP, IMP From 2a623408112626d2625a6f00aed665861d59665c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:04 +0100 Subject: [PATCH 06/11] KVM: arm64: Fix typo in feature check comments Revists -> Revisit. The following patch will add another similar line. No functional change intended. Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-3-tabba@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 093290b366e6..a722ea178f68 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -283,7 +283,7 @@ static bool feat_anerr(struct kvm *kvm) static bool feat_sme_smps(struct kvm *kvm) { /* - * Revists this if KVM ever supports SME -- this really should + * Revisit this if KVM ever supports SME -- this really should * look at the guest's view of SMIDR_EL1. Funnily enough, this * is not captured in the JSON file, but only as a note in the * ARM ARM. @@ -295,7 +295,7 @@ static bool feat_sme_smps(struct kvm *kvm) static bool feat_spe_fds(struct kvm *kvm) { /* - * Revists this if KVM ever supports SPE -- this really should + * Revisit this if KVM ever supports SPE -- this really should * look at the guest's view of PMSIDR_EL1. */ return (kvm_has_feat(kvm, FEAT_SPEv1p4) && @@ -305,7 +305,7 @@ static bool feat_spe_fds(struct kvm *kvm) static bool feat_trbe_mpam(struct kvm *kvm) { /* - * Revists this if KVM ever supports both MPAM and TRBE -- + * Revisit this if KVM ever supports both MPAM and TRBE -- * this really should look at the guest's view of TRBIDR_EL1. */ return (kvm_has_feat(kvm, FEAT_TRBE) && From 08d715338287a1affb4c7ad5733decef4558a5c8 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:05 +0100 Subject: [PATCH 07/11] KVM: arm64: Fix FEAT_SPE_FnE to use PMSIDR_EL1.FnE, not PMSVer FEAT_SPE_FnE is architecturally detected via PMSIDR_EL1.FnE [6], not ID_AA64DFR0_EL1.PMSVer. The FEAT_X macro form (register, field, value) cannot encode a PMSIDR_EL1-based feature, so FEAT_SPE_FnE was defined identically to FEAT_SPEv1p2 (ID_AA64DFR0_EL1, PMSVer, V1P2), producing a duplicate that used PMSVer >= V1P2 as a proxy. Replace the macro with feat_spe_fne(), following the same pattern as the sibling feat_spe_fds(): guard on FEAT_SPEv1p2 and read PMSIDR_EL1.FnE [6] directly. Wire the two NEEDS_FEAT consumers to use the new function. Remove the now-unused FEAT_SPE_FnE macro. Fixes: 63d423a7635b ("KVM: arm64: Switch to table-driven FGU configuration") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-4-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/config.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index a722ea178f68..0622162b089e 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -131,7 +131,6 @@ struct reg_feat_map_desc { } #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP -#define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP #define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP #define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP @@ -302,6 +301,16 @@ static bool feat_spe_fds(struct kvm *kvm) (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FDS)); } +static bool feat_spe_fne(struct kvm *kvm) +{ + /* + * Revisit this if KVM ever supports SPE -- this really should + * look at the guest's view of PMSIDR_EL1. + */ + return (kvm_has_feat(kvm, FEAT_SPEv1p2) && + (read_sysreg_s(SYS_PMSIDR_EL1) & PMSIDR_EL1_FnE)); +} + static bool feat_trbe_mpam(struct kvm *kvm) { /* @@ -537,7 +546,7 @@ static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { HDFGRTR_EL2_PMBPTR_EL1 | HDFGRTR_EL2_PMBLIMITR_EL1, FEAT_SPE), - NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), + NEEDS_FEAT(HDFGRTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), NEEDS_FEAT(HDFGRTR_EL2_nBRBDATA | HDFGRTR_EL2_nBRBCTL | HDFGRTR_EL2_nBRBIDR, @@ -605,7 +614,7 @@ static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { HDFGWTR_EL2_PMBPTR_EL1 | HDFGWTR_EL2_PMBLIMITR_EL1, FEAT_SPE), - NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, FEAT_SPE_FnE), + NEEDS_FEAT(HDFGWTR_EL2_nPMSNEVFR_EL1, feat_spe_fne), NEEDS_FEAT(HDFGWTR_EL2_nBRBDATA | HDFGWTR_EL2_nBRBCTL, FEAT_BRBE), From d89fdda7dd8a488f922e1175e6782f781ba8a23b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:06 +0100 Subject: [PATCH 08/11] KVM: arm64: Fix kvm_vcpu_initialized() macro parameter The macro is defined with parameter 'v' but the body references the literal token 'vcpu' instead, causing it to silently operate on whatever 'vcpu' resolves to in the caller's scope rather than the value passed by the caller. All current call sites happen to use a variable named 'vcpu', so the bug is latent. Fixes: e016333745c7 ("KVM: arm64: Only reset vCPU-scoped feature ID regs once") Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-5-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 44211e86f5eb..65eead8362e0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1545,7 +1545,7 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature) #define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) -#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED) +#define kvm_vcpu_initialized(v) vcpu_get_flag(v, VCPU_INITIALIZED) int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM From 73b9c1e5da84cd69b1a86e374e450817cd051371 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Fri, 24 Apr 2026 09:49:07 +0100 Subject: [PATCH 09/11] KVM: arm64: Fix pin leak and publication ordering in __pkvm_init_vcpu() Two bugs exist in the vCPU initialisation path: 1. If a check fails after hyp_pin_shared_mem() succeeds, the cleanup path jumps to 'unlock' without calling unpin_host_vcpu() or unpin_host_sve_state(), permanently leaking pin references on the host vCPU and SVE state pages. Extract a register_hyp_vcpu() helper that performs the checks and the store. When register_hyp_vcpu() returns an error, call unpin_host_vcpu() and unpin_host_sve_state() inline before falling through to the existing 'unlock' label. 2. register_hyp_vcpu() publishes the new vCPU pointer into 'hyp_vm->vcpus[]' with a bare store, allowing a concurrent caller of pkvm_load_hyp_vcpu() to observe a partially initialised vCPU object. Ensure the store uses smp_store_release() and the load uses smp_load_acquire(). While 'vm_table_lock' currently serialises the store and the load, these barriers ensure the reader sees the fully initialised 'hyp_vcpu' object even if there were a lockless path or if the lock's own ordering guarantees were insufficient for nested object initialization. Fixes: 49af6ddb8e5c ("KVM: arm64: Add infrastructure to create and track pKVM instances at EL2") Reported-by: Ben Simner Co-developed-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-6-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 38 ++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 7ed96d64d611..e7496eb85628 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -266,7 +266,8 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, if (hyp_vm->kvm.created_vcpus <= vcpu_idx) goto unlock; - hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + /* Pairs with smp_store_release() in register_hyp_vcpu(). */ + hyp_vcpu = smp_load_acquire(&hyp_vm->vcpus[vcpu_idx]); if (!hyp_vcpu) goto unlock; @@ -860,12 +861,30 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, * the page-aligned size of 'struct pkvm_hyp_vcpu'. * Return 0 on success, negative error code on failure. */ +static int register_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm, + struct pkvm_hyp_vcpu *hyp_vcpu) +{ + unsigned int idx = hyp_vcpu->vcpu.vcpu_idx; + + if (idx >= hyp_vm->kvm.created_vcpus) + return -EINVAL; + + if (hyp_vm->vcpus[idx]) + return -EINVAL; + + /* + * Ensure the hyp_vcpu is initialised before publishing it to + * the vCPU-load path via 'hyp_vm->vcpus[]'. + */ + smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu); + return 0; +} + int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unsigned long vcpu_hva) { struct pkvm_hyp_vcpu *hyp_vcpu; struct pkvm_hyp_vm *hyp_vm; - unsigned int idx; int ret; hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); @@ -884,18 +903,11 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, if (ret) goto unlock; - idx = hyp_vcpu->vcpu.vcpu_idx; - if (idx >= hyp_vm->kvm.created_vcpus) { - ret = -EINVAL; - goto unlock; + ret = register_hyp_vcpu(hyp_vm, hyp_vcpu); + if (ret) { + unpin_host_vcpu(host_vcpu); + unpin_host_sve_state(hyp_vcpu); } - - if (hyp_vm->vcpus[idx]) { - ret = -EINVAL; - goto unlock; - } - - hyp_vm->vcpus[idx] = hyp_vcpu; unlock: hyp_spin_unlock(&vm_table_lock); From 5bb0aed57ba944f8c201e4e82ec066e0187e0f85 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 24 Apr 2026 09:49:08 +0100 Subject: [PATCH 10/11] KVM: arm64: Fix initialisation order in __pkvm_init_finalise() fix_host_ownership() walks the hypervisor's stage-1 page-table to adjust the host's stage-2 accordingly. Any such adjustment that requires cache maintenance operations depends on the per-CPU hyp fixmap being present. However, fix_host_ownership() is currently called before fix_hyp_pgtable_refcnt() and hyp_create_fixmap(), so the fixmap does not yet exist when it runs. This is benign today because the host stage-2 starts empty and no CMOs are needed, but it becomes a latent crash as soon as fix_host_ownership() is extended to operate on a non-empty page-table. Reorder the calls so that fix_hyp_pgtable_refcnt() and hyp_create_fixmap() complete before fix_host_ownership() is invoked. Fixes: 0d16d12eb26e ("KVM: arm64: Fix-up hyp stage-1 refcounts for all pages mapped at EL2") Signed-off-by: Quentin Perret Signed-off-by: Fuad Tabba Link: https://patch.msgid.link/20260424084908.370776-7-tabba@google.com Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/hyp/nvhe/setup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index d8e5b563fd3d..d461981616d9 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -312,10 +312,6 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = fix_host_ownership(); - if (ret) - goto out; - ret = fix_hyp_pgtable_refcnt(); if (ret) goto out; @@ -324,6 +320,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = fix_host_ownership(); + if (ret) + goto out; + ret = hyp_ffa_init(ffa_proxy_pages); if (ret) goto out; From 4ce98bf0865c349e7026ad9c14f48da264920953 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 23 Apr 2026 17:36:07 +0100 Subject: [PATCH 11/11] KVM: arm64: Wake-up from WFI when iqrchip is in userspace It appears that there is nothing in the wake-up path that evaluates whether the in-kernel interrupts are pending unless we have a vgic. This means that the userspace irqchip support has been broken for about four years, and nobody noticed. It was also broken before as we wouldn't wake-up on a PMU interrupt, but hey, who cares... It is probably time to remove the feature altogether, because it was a terrible idea 10 years ago, and it still is. Fixes: b57de4ffd7c6d ("KVM: arm64: Simplify kvm_cpu_has_pending_timer()") Link: https://patch.msgid.link/20260423163607.486345-1-maz@kernel.org Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org --- arch/arm64/kvm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 176cbe8baad3..8bb2c7422cc8 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -824,6 +824,10 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); + irq_lines |= (!irqchip_in_kernel(v->kvm) && + (kvm_timer_should_notify_user(v) || + kvm_pmu_should_notify_user(v))); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); }