diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h index ad2cc82abf79..f0aa6996811f 100644 --- a/arch/x86/include/asm/kvm-x86-pmu-ops.h +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h @@ -24,6 +24,8 @@ KVM_X86_PMU_OP_OPTIONAL(deliver_pmi) KVM_X86_PMU_OP_OPTIONAL(cleanup) KVM_X86_PMU_OP_OPTIONAL(write_global_ctrl) +KVM_X86_PMU_OP(mediated_load) +KVM_X86_PMU_OP(mediated_put) #undef KVM_X86_PMU_OP #undef KVM_X86_PMU_OP_OPTIONAL diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3d0a0950d20a..4d3566bb1a93 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1219,6 +1219,7 @@ #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 +#define MSR_CORE_PERF_GLOBAL_STATUS_SET 0x00000391 #define MSR_PERF_METRICS 0x00000329 diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 24f5c14715ef..f6387c67b25c 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -880,10 +880,13 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) diff = pmu->global_ctrl ^ data; pmu->global_ctrl = data; reprogram_counters(pmu, diff); - - if (kvm_vcpu_has_mediated_pmu(vcpu)) - kvm_pmu_call(write_global_ctrl)(data); } + /* + * Unconditionally forward writes to vendor code, i.e. to the + * VMC{B,S}, as pmu->global_ctrl is per-VCPU, not per-VMC{B,S}. + */ + if (kvm_vcpu_has_mediated_pmu(vcpu)) + kvm_pmu_call(write_global_ctrl)(data); break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: /* @@ -1244,3 +1247,124 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) kfree(filter); return r; } + +static __always_inline u32 fixed_counter_msr(u32 idx) +{ + return kvm_pmu_ops.FIXED_COUNTER_BASE + idx * kvm_pmu_ops.MSR_STRIDE; +} + +static __always_inline u32 gp_counter_msr(u32 idx) +{ + return kvm_pmu_ops.GP_COUNTER_BASE + idx * kvm_pmu_ops.MSR_STRIDE; +} + +static __always_inline u32 gp_eventsel_msr(u32 idx) +{ + return kvm_pmu_ops.GP_EVENTSEL_BASE + idx * kvm_pmu_ops.MSR_STRIDE; +} + +static void kvm_pmu_load_guest_pmcs(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + u32 i; + + /* + * No need to zero out unexposed GP/fixed counters/selectors since RDPMC + * is intercepted if hardware has counters that aren't visible to the + * guest (KVM will inject #GP as appropriate). + */ + for (i = 0; i < pmu->nr_arch_gp_counters; i++) { + pmc = &pmu->gp_counters[i]; + + wrmsrl(gp_counter_msr(i), pmc->counter); + wrmsrl(gp_eventsel_msr(i), pmc->eventsel_hw); + } + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { + pmc = &pmu->fixed_counters[i]; + + wrmsrl(fixed_counter_msr(i), pmc->counter); + } +} + +void kvm_mediated_pmu_load(struct kvm_vcpu *vcpu) +{ + if (!kvm_vcpu_has_mediated_pmu(vcpu) || + KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) + return; + + lockdep_assert_irqs_disabled(); + + perf_load_guest_context(); + + /* + * Explicitly clear PERF_GLOBAL_CTRL, as "loading" the guest's context + * disables all individual counters (if any were enabled), but doesn't + * globally disable the entire PMU. Loading event selectors and PMCs + * with guest values while PERF_GLOBAL_CTRL is non-zero will generate + * unexpected events and PMIs. + * + * VMX will enable/disable counters at VM-Enter/VM-Exit by atomically + * loading PERF_GLOBAL_CONTROL. SVM effectively performs the switch by + * configuring all events to be GUEST_ONLY. Clear PERF_GLOBAL_CONTROL + * even for SVM to minimize the damage if a perf event is left enabled, + * and to ensure a consistent starting state. + */ + wrmsrq(kvm_pmu_ops.PERF_GLOBAL_CTRL, 0); + + perf_load_guest_lvtpc(kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVTPC)); + + kvm_pmu_load_guest_pmcs(vcpu); + + kvm_pmu_call(mediated_load)(vcpu); +} + +static void kvm_pmu_put_guest_pmcs(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + u32 i; + + /* + * Clear selectors and counters to ensure hardware doesn't count using + * guest controls when the host (perf) restores its state. + */ + for (i = 0; i < pmu->nr_arch_gp_counters; i++) { + pmc = &pmu->gp_counters[i]; + + pmc->counter = rdpmc(i); + if (pmc->counter) + wrmsrq(gp_counter_msr(i), 0); + if (pmc->eventsel_hw) + wrmsrq(gp_eventsel_msr(i), 0); + } + + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { + pmc = &pmu->fixed_counters[i]; + + pmc->counter = rdpmc(INTEL_PMC_FIXED_RDPMC_BASE | i); + if (pmc->counter) + wrmsrq(fixed_counter_msr(i), 0); + } +} + +void kvm_mediated_pmu_put(struct kvm_vcpu *vcpu) +{ + if (!kvm_vcpu_has_mediated_pmu(vcpu) || + KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) + return; + + lockdep_assert_irqs_disabled(); + + /* + * Defer handling of PERF_GLOBAL_CTRL to vendor code. On Intel, it's + * atomically cleared on VM-Exit, i.e. doesn't need to be clear here. + */ + kvm_pmu_call(mediated_put)(vcpu); + + kvm_pmu_put_guest_pmcs(vcpu); + + perf_put_guest_lvtpc(); + + perf_put_guest_context(); +} diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 9a199109d672..25b583da9ee2 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -38,11 +38,19 @@ struct kvm_pmu_ops { void (*cleanup)(struct kvm_vcpu *vcpu); bool (*is_mediated_pmu_supported)(struct x86_pmu_capability *host_pmu); + void (*mediated_load)(struct kvm_vcpu *vcpu); + void (*mediated_put)(struct kvm_vcpu *vcpu); void (*write_global_ctrl)(u64 global_ctrl); const u64 EVENTSEL_EVENT; const int MAX_NR_GP_COUNTERS; const int MIN_NR_GP_COUNTERS; + + const u32 PERF_GLOBAL_CTRL; + const u32 GP_EVENTSEL_BASE; + const u32 GP_COUNTER_BASE; + const u32 FIXED_COUNTER_BASE; + const u32 MSR_STRIDE; }; void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops); @@ -240,6 +248,8 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu); void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu); +void kvm_mediated_pmu_load(struct kvm_vcpu *vcpu); +void kvm_mediated_pmu_put(struct kvm_vcpu *vcpu); bool is_vmware_backdoor_pmc(u32 pmc_idx); bool kvm_need_perf_global_ctrl_intercept(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 6d5f791126b1..7aa298eeb072 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -234,6 +234,32 @@ static bool amd_pmu_is_mediated_pmu_supported(struct x86_pmu_capability *host_pm return host_pmu->version >= 2; } +static void amd_mediated_pmu_load(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + u64 global_status; + + rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, global_status); + /* Clear host global_status MSR if non-zero. */ + if (global_status) + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, global_status); + + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, pmu->global_status); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, pmu->global_ctrl); +} + +static void amd_mediated_pmu_put(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); + rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, pmu->global_status); + + /* Clear global status bits if non-zero */ + if (pmu->global_status) + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, pmu->global_status); +} + struct kvm_pmu_ops amd_pmu_ops __initdata = { .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = amd_msr_idx_to_pmc, @@ -245,8 +271,16 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = { .init = amd_pmu_init, .is_mediated_pmu_supported = amd_pmu_is_mediated_pmu_supported, + .mediated_load = amd_mediated_pmu_load, + .mediated_put = amd_mediated_pmu_put, .EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT, .MAX_NR_GP_COUNTERS = KVM_MAX_NR_AMD_GP_COUNTERS, .MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS, + + .PERF_GLOBAL_CTRL = MSR_AMD64_PERF_CNTR_GLOBAL_CTL, + .GP_EVENTSEL_BASE = MSR_F15H_PERF_CTL0, + .GP_COUNTER_BASE = MSR_F15H_PERF_CTR0, + .FIXED_COUNTER_BASE = 0, + .MSR_STRIDE = 2, }; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dca45f5151f9..1a616eb3ff1c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4367,6 +4367,9 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET; + if (!msr_write_intercepted(vcpu, MSR_AMD64_PERF_CNTR_GLOBAL_CTL)) + rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, vcpu_to_pmu(vcpu)->global_ctrl); + trace_kvm_exit(vcpu, KVM_ISA_SVM); svm_complete_interrupts(vcpu); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 855240678300..55249fa4db95 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -792,6 +792,42 @@ static void intel_pmu_write_global_ctrl(u64 global_ctrl) vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, global_ctrl); } + +static void intel_mediated_pmu_load(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + u64 global_status, toggle; + + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, global_status); + toggle = pmu->global_status ^ global_status; + if (global_status & toggle) + wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, global_status & toggle); + if (pmu->global_status & toggle) + wrmsrq(MSR_CORE_PERF_GLOBAL_STATUS_SET, pmu->global_status & toggle); + + wrmsrq(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl_hw); +} + +static void intel_mediated_pmu_put(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + + /* MSR_CORE_PERF_GLOBAL_CTRL is already saved at VM-exit. */ + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, pmu->global_status); + + /* Clear hardware MSR_CORE_PERF_GLOBAL_STATUS MSR, if non-zero. */ + if (pmu->global_status) + wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, pmu->global_status); + + /* + * Clear hardware FIXED_CTR_CTRL MSR to avoid information leakage and + * also to avoid accidentally enabling fixed counters (based on guest + * state) while running in the host, e.g. when setting global ctrl. + */ + if (pmu->fixed_ctr_ctrl_hw) + wrmsrq(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); +} + struct kvm_pmu_ops intel_pmu_ops __initdata = { .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, .msr_idx_to_pmc = intel_msr_idx_to_pmc, @@ -805,9 +841,17 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = { .cleanup = intel_pmu_cleanup, .is_mediated_pmu_supported = intel_pmu_is_mediated_pmu_supported, + .mediated_load = intel_mediated_pmu_load, + .mediated_put = intel_mediated_pmu_put, .write_global_ctrl = intel_pmu_write_global_ctrl, .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT, .MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS, .MIN_NR_GP_COUNTERS = 1, + + .PERF_GLOBAL_CTRL = MSR_CORE_PERF_GLOBAL_CTRL, + .GP_EVENTSEL_BASE = MSR_P6_EVNTSEL0, + .GP_COUNTER_BASE = MSR_IA32_PMC0, + .FIXED_COUNTER_BASE = MSR_CORE_PERF_FIXED_CTR0, + .MSR_STRIDE = 1, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76e86eb358df..589a309259f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11334,6 +11334,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) run_flags |= KVM_RUN_LOAD_DEBUGCTL; vcpu->arch.host_debugctl = debug_ctl; + kvm_mediated_pmu_load(vcpu); + guest_timing_enter_irqoff(); /* @@ -11372,6 +11374,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_load_host_pkru(vcpu); + kvm_mediated_pmu_put(vcpu); + /* * Do this here before restoring debug registers on the host. And * since we do this before handling the vmexit, a DR access vmexit