mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-28 05:34:13 -05:00
x86/tdx: Fix arch_safe_halt() execution for TDX VMs
Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE handler will enable interrupts before TDCALL is routed to hypervisor leading to missed wakeup events, as current TDX spec doesn't expose interruptibility state information to allow #VE handler to selectively enable interrupts. Commitbfe6ed0c67("x86/tdx: Add HLT support for TDX guests") prevented the idle routines from executing HLT instruction in STI-shadow. But it missed the paravirt routine which can be reached via this path as an example: kvm_wait() => safe_halt() => raw_safe_halt() => arch_safe_halt() => irq.safe_halt() => pv_native_safe_halt() To reliably handle arch_safe_halt() for TDX VMs, introduce explicit dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() routines with TDX-safe versions that execute direct TDCALL and needed interrupt flag updates. Executing direct TDCALL brings in additional benefit of avoiding HLT related #VEs altogether. As tested by Ryan Afranji: "Tested with the specjbb2015 benchmark. It has heavy lock contention which leads to many halt calls. TDX VMs suffered a poor score before this patchset. Verified the major performance improvement with this patchset applied." Fixes:bfe6ed0c67("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve <vannapurve@google.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Ryan Afranji <afranji@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Juergen Gross <jgross@suse.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com
This commit is contained in:
committed by
Ingo Molnar
parent
22cc5ca5de
commit
9f98a4f4e7
@@ -878,6 +878,7 @@ config INTEL_TDX_GUEST
|
||||
depends on X86_64 && CPU_SUP_INTEL
|
||||
depends on X86_X2APIC
|
||||
depends on EFI_STUB
|
||||
depends on PARAVIRT
|
||||
select ARCH_HAS_CC_PLATFORM
|
||||
select X86_MEM_ENCRYPT
|
||||
select X86_MCE
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <asm/ia32.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/insn-eval.h>
|
||||
#include <asm/paravirt_types.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/traps.h>
|
||||
@@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *ve)
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
void __cpuidle tdx_safe_halt(void)
|
||||
void __cpuidle tdx_halt(void)
|
||||
{
|
||||
const bool irq_disabled = false;
|
||||
|
||||
@@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void)
|
||||
WARN_ONCE(1, "HLT instruction emulation failed\n");
|
||||
}
|
||||
|
||||
static void __cpuidle tdx_safe_halt(void)
|
||||
{
|
||||
tdx_halt();
|
||||
/*
|
||||
* "__cpuidle" section doesn't support instrumentation, so stick
|
||||
* with raw_* variant that avoids tracing hooks.
|
||||
*/
|
||||
raw_local_irq_enable();
|
||||
}
|
||||
|
||||
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_module_args args = {
|
||||
@@ -1109,6 +1120,19 @@ void __init tdx_early_init(void)
|
||||
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
|
||||
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
|
||||
|
||||
/*
|
||||
* Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
|
||||
* will enable interrupts before HLT TDCALL invocation if executed
|
||||
* in STI-shadow, possibly resulting in missed wakeup events.
|
||||
*
|
||||
* Modify all possible HLT execution paths to use TDX specific routines
|
||||
* that directly execute TDCALL and toggle the interrupt state as
|
||||
* needed after TDCALL completion. This also reduces HLT related #VEs
|
||||
* in addition to having a reliable halt logic execution.
|
||||
*/
|
||||
pv_ops.irq.safe_halt = tdx_safe_halt;
|
||||
pv_ops.irq.halt = tdx_halt;
|
||||
|
||||
/*
|
||||
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
|
||||
* bringup low level code. That raises #VE which cannot be handled
|
||||
|
||||
@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
|
||||
|
||||
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
|
||||
|
||||
void tdx_safe_halt(void);
|
||||
void tdx_halt(void);
|
||||
|
||||
bool tdx_early_handle_ve(struct pt_regs *regs);
|
||||
|
||||
@@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
|
||||
#else
|
||||
|
||||
static inline void tdx_early_init(void) { };
|
||||
static inline void tdx_safe_halt(void) { };
|
||||
static inline void tdx_halt(void) { };
|
||||
|
||||
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
|
||||
|
||||
|
||||
@@ -939,7 +939,7 @@ void __init select_idle_routine(void)
|
||||
static_call_update(x86_idle, mwait_idle);
|
||||
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
|
||||
pr_info("using TDX aware idle routine\n");
|
||||
static_call_update(x86_idle, tdx_safe_halt);
|
||||
static_call_update(x86_idle, tdx_halt);
|
||||
} else {
|
||||
static_call_update(x86_idle, default_idle);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user