mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 20:34:23 -04:00
Merge tag 'kvm-x86-vmx-6.15' of https://github.com/kvm-x86/linux into HEAD
KVM VMX changes for 6.15 - Fix a bug where KVM unnecessarily reads XFD_ERR from hardware and thus modifies the vCPU's XFD_ERR on a #NM due to CR0.TS=1. - Pass XFD_ERR as a psueo-payload when injecting #NM as a preparatory step for upcoming FRED virtualization support. - Decouple the EPT entry RWX protection bit macros from the EPT Violation bits as a general cleanup, and in anticipation of adding support for emulating Mode-Based Execution (MBEC). - Reject KVM_RUN if userspace manages to gain control and stuff invalid guest state while KVM is in the middle of emulating nested VM-Enter. - Add a macro to handle KVM's sanity checks on entry/exit VMCS control pairs in anticipation of adding sanity checks for secondary exit controls (the primary field is out of bits).
This commit is contained in:
@@ -580,18 +580,22 @@ enum vm_entry_failure_code {
|
||||
/*
|
||||
* Exit Qualifications for EPT Violations
|
||||
*/
|
||||
#define EPT_VIOLATION_ACC_READ_BIT 0
|
||||
#define EPT_VIOLATION_ACC_WRITE_BIT 1
|
||||
#define EPT_VIOLATION_ACC_INSTR_BIT 2
|
||||
#define EPT_VIOLATION_RWX_SHIFT 3
|
||||
#define EPT_VIOLATION_GVA_IS_VALID_BIT 7
|
||||
#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
|
||||
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
|
||||
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
|
||||
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
|
||||
#define EPT_VIOLATION_RWX_MASK (VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT)
|
||||
#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
|
||||
#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
|
||||
#define EPT_VIOLATION_ACC_READ BIT(0)
|
||||
#define EPT_VIOLATION_ACC_WRITE BIT(1)
|
||||
#define EPT_VIOLATION_ACC_INSTR BIT(2)
|
||||
#define EPT_VIOLATION_PROT_READ BIT(3)
|
||||
#define EPT_VIOLATION_PROT_WRITE BIT(4)
|
||||
#define EPT_VIOLATION_PROT_EXEC BIT(5)
|
||||
#define EPT_VIOLATION_PROT_MASK (EPT_VIOLATION_PROT_READ | \
|
||||
EPT_VIOLATION_PROT_WRITE | \
|
||||
EPT_VIOLATION_PROT_EXEC)
|
||||
#define EPT_VIOLATION_GVA_IS_VALID BIT(7)
|
||||
#define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
|
||||
|
||||
#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
|
||||
|
||||
static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
|
||||
(EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
|
||||
|
||||
/*
|
||||
* Exit Qualifications for NOTIFY VM EXIT
|
||||
|
||||
@@ -510,8 +510,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
|
||||
* Note, pte_access holds the raw RWX bits from the EPTE, not
|
||||
* ACC_*_MASK flags!
|
||||
*/
|
||||
walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) <<
|
||||
EPT_VIOLATION_RWX_SHIFT;
|
||||
walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
|
||||
}
|
||||
#endif
|
||||
walker->fault.address = addr;
|
||||
|
||||
@@ -2578,6 +2578,34 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
|
||||
return ctl_opt & allowed;
|
||||
}
|
||||
|
||||
#define vmx_check_entry_exit_pairs(pairs, entry_controls, exit_controls) \
|
||||
({ \
|
||||
int i, r = 0; \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(pairs[0].entry_control) != sizeof(entry_controls)); \
|
||||
BUILD_BUG_ON(sizeof(pairs[0].exit_control) != sizeof(exit_controls)); \
|
||||
\
|
||||
for (i = 0; i < ARRAY_SIZE(pairs); i++) { \
|
||||
typeof(entry_controls) n_ctrl = pairs[i].entry_control; \
|
||||
typeof(exit_controls) x_ctrl = pairs[i].exit_control; \
|
||||
\
|
||||
if (!(entry_controls & n_ctrl) == !(exit_controls & x_ctrl)) \
|
||||
continue; \
|
||||
\
|
||||
pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, " \
|
||||
"entry = %llx (%llx), exit = %llx (%llx)\n", \
|
||||
(u64)(entry_controls & n_ctrl), (u64)n_ctrl, \
|
||||
(u64)(exit_controls & x_ctrl), (u64)x_ctrl); \
|
||||
\
|
||||
if (error_on_inconsistent_vmcs_config) \
|
||||
r = -EIO; \
|
||||
\
|
||||
entry_controls &= ~n_ctrl; \
|
||||
exit_controls &= ~x_ctrl; \
|
||||
} \
|
||||
r; \
|
||||
})
|
||||
|
||||
static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
struct vmx_capability *vmx_cap)
|
||||
{
|
||||
@@ -2589,7 +2617,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
u32 _vmentry_control = 0;
|
||||
u64 basic_msr;
|
||||
u64 misc_msr;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
|
||||
@@ -2693,22 +2720,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||
&_vmentry_control))
|
||||
return -EIO;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
|
||||
u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
|
||||
u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
|
||||
|
||||
if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
|
||||
continue;
|
||||
|
||||
pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
|
||||
_vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
|
||||
|
||||
if (error_on_inconsistent_vmcs_config)
|
||||
return -EIO;
|
||||
|
||||
_vmentry_control &= ~n_ctrl;
|
||||
_vmexit_control &= ~x_ctrl;
|
||||
}
|
||||
if (vmx_check_entry_exit_pairs(vmcs_entry_exit_pairs,
|
||||
_vmentry_control, _vmexit_control))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
|
||||
@@ -5211,6 +5225,12 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
|
||||
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
|
||||
}
|
||||
|
||||
static bool is_xfd_nm_fault(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.guest_fpu.fpstate->xfd &&
|
||||
!kvm_is_cr0_bit_set(vcpu, X86_CR0_TS);
|
||||
}
|
||||
|
||||
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@@ -5237,7 +5257,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
* point.
|
||||
*/
|
||||
if (is_nm_fault(intr_info)) {
|
||||
kvm_queue_exception(vcpu, NM_VECTOR);
|
||||
kvm_queue_exception_p(vcpu, NM_VECTOR,
|
||||
is_xfd_nm_fault(vcpu) ? vcpu->arch.guest_fpu.xfd_err : 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -5817,7 +5838,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||||
error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
|
||||
? PFERR_FETCH_MASK : 0;
|
||||
/* ept page table entry is present? */
|
||||
error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
|
||||
error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
|
||||
? PFERR_PRESENT_MASK : 0;
|
||||
|
||||
if (error_code & EPT_VIOLATION_GVA_IS_VALID)
|
||||
@@ -5871,11 +5892,35 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* Returns true if emulation is required (due to the vCPU having invalid state
|
||||
* with unsrestricted guest mode disabled) and KVM can't faithfully emulate the
|
||||
* current vCPU state.
|
||||
*/
|
||||
static bool vmx_unhandleable_emulation_required(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
return vmx->emulation_required && !vmx->rmode.vm86_active &&
|
||||
if (!vmx->emulation_required)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* It is architecturally impossible for emulation to be required when a
|
||||
* nested VM-Enter is pending completion, as VM-Enter will VM-Fail if
|
||||
* guest state is invalid and unrestricted guest is disabled, i.e. KVM
|
||||
* should synthesize VM-Fail instead emulation L2 code. This path is
|
||||
* only reachable if userspace modifies L2 guest state after KVM has
|
||||
* performed the nested VM-Enter consistency checks.
|
||||
*/
|
||||
if (vmx->nested.nested_run_pending)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* KVM only supports emulating exceptions if the vCPU is in Real Mode.
|
||||
* If emulation is required, KVM can't perform a successful VM-Enter to
|
||||
* inject the exception.
|
||||
*/
|
||||
return !vmx->rmode.vm86_active &&
|
||||
(kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
|
||||
}
|
||||
|
||||
@@ -5898,7 +5943,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
||||
if (!kvm_emulate_instruction(vcpu, 0))
|
||||
return 0;
|
||||
|
||||
if (vmx_emulation_required_with_pending_exception(vcpu)) {
|
||||
if (vmx_unhandleable_emulation_required(vcpu)) {
|
||||
kvm_prepare_emulation_failure_exit(vcpu);
|
||||
return 0;
|
||||
}
|
||||
@@ -5922,7 +5967,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
||||
|
||||
int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vmx_emulation_required_with_pending_exception(vcpu)) {
|
||||
if (vmx_unhandleable_emulation_required(vcpu)) {
|
||||
kvm_prepare_emulation_failure_exit(vcpu);
|
||||
return 0;
|
||||
}
|
||||
@@ -6997,16 +7042,15 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
|
||||
* MSR value is not clobbered by the host activity before the guest
|
||||
* has chance to consume it.
|
||||
*
|
||||
* Do not blindly read xfd_err here, since this exception might
|
||||
* be caused by L1 interception on a platform which doesn't
|
||||
* support xfd at all.
|
||||
* Update the guest's XFD_ERR if and only if XFD is enabled, as the #NM
|
||||
* interception may have been caused by L1 interception. Per the SDM,
|
||||
* XFD_ERR is not modified for non-XFD #NM, i.e. if CR0.TS=1.
|
||||
*
|
||||
* Do it conditionally upon guest_fpu::xfd. xfd_err matters
|
||||
* only when xfd contains a non-zero value.
|
||||
*
|
||||
* Queuing exception is done in vmx_handle_exit. See comment there.
|
||||
* Note, XFD_ERR is updated _before_ the #NM interception check, i.e.
|
||||
* unlike CR2 and DR6, the value is not a payload that is attached to
|
||||
* the #NM exception.
|
||||
*/
|
||||
if (vcpu->arch.guest_fpu.fpstate->xfd)
|
||||
if (is_xfd_nm_fault(vcpu))
|
||||
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user