From 658fa7b1c47a857af484c5c5dff8d0164b7c7bfb Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:52 +0530 Subject: [PATCH 01/20] ACPI: CPPC: Add cppc_get_perf() API to read performance controls Add cppc_get_perf() function to read values of performance control registers including desired_perf, min_perf, max_perf, energy_perf, and auto_sel. This provides a read interface to complement the existing cppc_set_perf() write interface for performance control registers. Note that auto_sel is read by cppc_get_perf() but not written by cppc_set_perf() to avoid unintended mode changes during performance updates. It can be updated with existing dedicated cppc_set_auto_sel() API. Use cppc_get_perf() in cppc_cpufreq_get_cpu_data() to initialize perf_ctrls with current hardware register values during cpufreq policy initialization. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-2-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 80 ++++++++++++++++++++++++++++++++++ drivers/cpufreq/cppc_cpufreq.c | 6 +++ include/acpi/cppc_acpi.h | 5 +++ 3 files changed, 91 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index f0e513e9ed5d..5122e99bddb0 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1738,6 +1738,86 @@ int cppc_set_enable(int cpu, bool enable) } EXPORT_SYMBOL_GPL(cppc_set_enable); +/** + * cppc_get_perf - Get a CPU's performance controls. + * @cpu: CPU for which to get performance controls. + * @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h + * + * Return: 0 for success with perf_ctrls, -ERRNO otherwise. + */ +int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cpc_register_resource *desired_perf_reg, + *min_perf_reg, *max_perf_reg, + *energy_perf_reg, *auto_sel_reg; + u64 desired_perf = 0, min = 0, max = 0, energy_perf = 0, auto_sel = 0; + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = 0, regs_in_pcc = 0; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + if (!perf_ctrls) { + pr_debug("Invalid perf_ctrls pointer\n"); + return -EINVAL; + } + + desired_perf_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + min_perf_reg = &cpc_desc->cpc_regs[MIN_PERF]; + max_perf_reg = &cpc_desc->cpc_regs[MAX_PERF]; + energy_perf_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + + /* Are any of the regs PCC ?*/ + if (CPC_IN_PCC(desired_perf_reg) || CPC_IN_PCC(min_perf_reg) || + CPC_IN_PCC(max_perf_reg) || CPC_IN_PCC(energy_perf_reg) || + CPC_IN_PCC(auto_sel_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); + return -ENODEV; + } + pcc_ss_data = pcc_data[pcc_ss_id]; + regs_in_pcc = 1; + down_write(&pcc_ss_data->pcc_lock); + /* Ring doorbell once to update PCC subspace */ + if (send_pcc_cmd(pcc_ss_id, CMD_READ) < 0) { + ret = -EIO; + goto out_err; + } + } + + /* Read optional elements if present */ + if (CPC_SUPPORTED(max_perf_reg)) + cpc_read(cpu, max_perf_reg, &max); + perf_ctrls->max_perf = max; + + if (CPC_SUPPORTED(min_perf_reg)) + cpc_read(cpu, min_perf_reg, &min); + perf_ctrls->min_perf = min; + + if (CPC_SUPPORTED(desired_perf_reg)) + cpc_read(cpu, desired_perf_reg, &desired_perf); + perf_ctrls->desired_perf = desired_perf; + + if (CPC_SUPPORTED(energy_perf_reg)) + cpc_read(cpu, energy_perf_reg, &energy_perf); + perf_ctrls->energy_perf = energy_perf; + + if (CPC_SUPPORTED(auto_sel_reg)) + cpc_read(cpu, auto_sel_reg, &auto_sel); + perf_ctrls->auto_sel = (bool)auto_sel; + +out_err: + if (regs_in_pcc) + up_write(&pcc_ss_data->pcc_lock); + return ret; +} +EXPORT_SYMBOL_GPL(cppc_get_perf); + /** * cppc_set_perf - Set a CPU's performance controls. * @cpu: CPU for which to set performance controls. diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 011f35cb47b9..a61a24e0dcae 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -594,6 +594,12 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) goto free_mask; } + ret = cppc_get_perf(cpu, &cpu_data->perf_ctrls); + if (ret) { + pr_debug("Err reading CPU%d perf ctrls: ret:%d\n", cpu, ret); + goto free_mask; + } + return cpu_data; free_mask: diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 4d644f03098e..3fc796c0d902 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -151,6 +151,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); +extern int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); @@ -193,6 +194,10 @@ static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ { return -EOPNOTSUPP; } +static inline int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + return -EOPNOTSUPP; +} static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { return -EOPNOTSUPP; From b3e45fb2db9d8a733e94b315f1272e2c4468ed4b Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:53 +0530 Subject: [PATCH 02/20] ACPI: CPPC: Warn on missing mandatory DESIRED_PERF register Add a warning during CPPC processor probe if the Desired Performance register is not supported when it should be. As per 8.4.6.1.2.3 section of ACPI 6.6 specification, "The Desired Performance Register is optional only when OSPM indicates support for CPPC2 in the platform-wide _OSC capabilities and the Autonomous Selection Enable field is encoded as an Integer with a value of 1." In other words: - In CPPC v1, DESIRED_PERF is mandatory - In CPPC v2, it becomes optional only when AUTO_SEL_ENABLE is supported This helps detect firmware configuration issues early during boot. Link: https://lore.kernel.org/lkml/9fa21599-004a-4af8-acc2-190fd0404e35@nvidia.com/ Suggested-by: Pierre Gondois Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-3-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 5122e99bddb0..3f758d2944e2 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -853,6 +853,16 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } per_cpu(cpu_pcc_subspace_idx, pr->id) = pcc_subspace_id; + /* + * In CPPC v1, DESIRED_PERF is mandatory. In CPPC v2, it is optional + * only when AUTO_SEL_ENABLE is supported. + */ + if (!CPC_SUPPORTED(&cpc_ptr->cpc_regs[DESIRED_PERF]) && + (!osc_sb_cppc2_support_acked || + !CPC_SUPPORTED(&cpc_ptr->cpc_regs[AUTO_SEL_ENABLE]))) + pr_warn("Desired perf. register is mandatory if CPPC v2 is not supported " + "or autonomous selection is disabled\n"); + /* * Initialize the remaining cpc_regs as unsupported. * Example: In case FW exposes CPPC v2, the below loop will initialize From 38428a680026c52a1fc64212325d161974c3e4cf Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:54 +0530 Subject: [PATCH 03/20] ACPI: CPPC: Extend cppc_set_epp_perf() for FFH/SystemMemory Extend cppc_set_epp_perf() to write both auto_sel and energy_perf registers when they are in FFH or SystemMemory address space. This keeps the behavior consistent with PCC case where both registers are already updated together, but was missing for FFH/SystemMemory. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-4-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3f758d2944e2..94a7ffa8be3c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1571,6 +1571,8 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) struct cpc_register_resource *auto_sel_reg; struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cppc_pcc_data *pcc_ss_data = NULL; + bool autosel_ffh_sysmem; + bool epp_ffh_sysmem; int ret; if (!cpc_desc) { @@ -1581,6 +1583,11 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + epp_ffh_sysmem = CPC_SUPPORTED(epp_set_reg) && + (CPC_IN_FFH(epp_set_reg) || CPC_IN_SYSTEM_MEMORY(epp_set_reg)); + autosel_ffh_sysmem = CPC_SUPPORTED(auto_sel_reg) && + (CPC_IN_FFH(auto_sel_reg) || CPC_IN_SYSTEM_MEMORY(auto_sel_reg)); + if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); @@ -1606,11 +1613,22 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); up_write(&pcc_ss_data->pcc_lock); } else if (osc_cpc_flexible_adr_space_confirmed && - CPC_SUPPORTED(epp_set_reg) && CPC_IN_FFH(epp_set_reg)) { - ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf); + (epp_ffh_sysmem || autosel_ffh_sysmem)) { + if (autosel_ffh_sysmem) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + if (epp_ffh_sysmem) { + ret = cpc_write(cpu, epp_set_reg, + perf_ctrls->energy_perf); + if (ret) + return ret; + } } else { ret = -ENOTSUPP; - pr_debug("_CPC in PCC and _CPC in FFH are not supported\n"); + pr_debug("_CPC in PCC/FFH/SystemMemory are not supported\n"); } return ret; From 24ad4c6c136bdaa4c92c5c5948856752ce3e9f76 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:55 +0530 Subject: [PATCH 04/20] cpufreq: CPPC: Update cached perf_ctrls on sysfs write Update the cached perf_ctrls values when writing via sysfs to keep them in sync with hardware registers: - store_auto_select(): update perf_ctrls.auto_sel - store_energy_performance_preference_val(): update perf_ctrls.energy_perf This ensures consistent cached values after sysfs writes, which complements the cppc_get_perf() initialization during policy setup. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-5-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index a61a24e0dcae..ebb5746df220 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -855,6 +855,7 @@ static ssize_t show_auto_select(struct cpufreq_policy *policy, char *buf) static ssize_t store_auto_select(struct cpufreq_policy *policy, const char *buf, size_t count) { + struct cppc_cpudata *cpu_data = policy->driver_data; bool val; int ret; @@ -866,6 +867,8 @@ static ssize_t store_auto_select(struct cpufreq_policy *policy, if (ret) return ret; + cpu_data->perf_ctrls.auto_sel = val; + return count; } @@ -916,8 +919,32 @@ static ssize_t store_##_name(struct cpufreq_policy *policy, \ CPPC_CPUFREQ_ATTR_RW_U64(auto_act_window, cppc_get_auto_act_window, cppc_set_auto_act_window) -CPPC_CPUFREQ_ATTR_RW_U64(energy_performance_preference_val, - cppc_get_epp_perf, cppc_set_epp) +static ssize_t +show_energy_performance_preference_val(struct cpufreq_policy *policy, char *buf) +{ + return cppc_cpufreq_sysfs_show_u64(policy->cpu, cppc_get_epp_perf, buf); +} + +static ssize_t +store_energy_performance_preference_val(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + u64 val; + int ret; + + ret = kstrtou64(buf, 0, &val); + if (ret) + return ret; + + ret = cppc_set_epp(policy->cpu, val); + if (ret) + return ret; + + cpu_data->perf_ctrls.energy_perf = val; + + return count; +} cpufreq_freq_attr_ro(freqdomain_cpus); cpufreq_freq_attr_rw(auto_select); From ea3db45ae476889a1ba0ab3617e6afdeeefbda3d Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:56 +0530 Subject: [PATCH 05/20] cpufreq: cppc: Update MIN_PERF/MAX_PERF in target callbacks Update MIN_PERF and MAX_PERF registers from policy->min and policy->max in the .target() and .fast_switch() callbacks. This allows controlling performance bounds via standard scaling_min_freq and scaling_max_freq sysfs interfaces. Similar to intel_cpufreq which updates HWP min/max limits in .target(), cppc_cpufreq now programs MIN_PERF/MAX_PERF along with DESIRED_PERF. Since MIN_PERF/MAX_PERF can be updated even when auto_sel is disabled, they are updated unconditionally. Also program MIN_PERF/MAX_PERF in store_auto_select() when enabling autonomous selection so the platform uses correct bounds immediately. Suggested-by: Rafael J. Wysocki Signed-off-by: Sumit Gupta Link: https://patch.msgid.link/20260206142658.72583-6-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 41 +++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index ebb5746df220..8a8cf76828ee 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -287,6 +287,21 @@ static inline void cppc_freq_invariance_exit(void) } #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ +static void cppc_cpufreq_update_perf_limits(struct cppc_cpudata *cpu_data, + struct cpufreq_policy *policy) +{ + struct cppc_perf_caps *caps = &cpu_data->perf_caps; + u32 min_perf, max_perf; + + min_perf = cppc_khz_to_perf(caps, policy->min); + max_perf = cppc_khz_to_perf(caps, policy->max); + + cpu_data->perf_ctrls.min_perf = + clamp_t(u32, min_perf, caps->lowest_perf, caps->highest_perf); + cpu_data->perf_ctrls.max_perf = + clamp_t(u32, max_perf, caps->lowest_perf, caps->highest_perf); +} + static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -298,6 +313,8 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, cpu_data->perf_ctrls.desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); + cppc_cpufreq_update_perf_limits(cpu_data, policy); + freqs.old = policy->cur; freqs.new = target_freq; @@ -322,8 +339,9 @@ static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy, desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); cpu_data->perf_ctrls.desired_perf = desired_perf; - ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + cppc_cpufreq_update_perf_limits(cpu_data, policy); + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); if (ret) { pr_debug("Failed to set target on CPU:%d. ret:%d\n", cpu, ret); @@ -869,6 +887,27 @@ static ssize_t store_auto_select(struct cpufreq_policy *policy, cpu_data->perf_ctrls.auto_sel = val; + if (val) { + u32 old_min_perf = cpu_data->perf_ctrls.min_perf; + u32 old_max_perf = cpu_data->perf_ctrls.max_perf; + + /* + * When enabling autonomous selection, program MIN_PERF and + * MAX_PERF from current policy limits so that the platform + * uses the correct performance bounds immediately. + */ + cppc_cpufreq_update_perf_limits(cpu_data, policy); + + ret = cppc_set_perf(policy->cpu, &cpu_data->perf_ctrls); + if (ret) { + cpu_data->perf_ctrls.min_perf = old_min_perf; + cpu_data->perf_ctrls.max_perf = old_max_perf; + cppc_set_auto_sel(policy->cpu, false); + cpu_data->perf_ctrls.auto_sel = false; + return ret; + } + } + return count; } From 13c45a26635fa51a68911aa57e6778bdad18b103 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:57 +0530 Subject: [PATCH 06/20] ACPI: CPPC: add APIs and sysfs interface for perf_limited Add sysfs interface to read/write the Performance Limited register. The Performance Limited register indicates to the OS that an unpredictable event (like thermal throttling) has limited processor performance. It contains two sticky bits set by the platform: - Bit 0 (Desired_Excursion): Set when delivered performance is constrained below desired performance. Not used when Autonomous Selection is enabled. - Bit 1 (Minimum_Excursion): Set when delivered performance is constrained below minimum performance. These bits remain set until OSPM explicitly clears them. The write operation accepts a bitmask of bits to clear: - Write 0x1 to clear bit 0 - Write 0x2 to clear bit 1 - Write 0x3 to clear both bits This enables users to detect if platform throttling impacted a workload. Users clear the register before execution, run the workload, then check afterward - if set, hardware throttling occurred during that time window. The interface is exposed as: /sys/devices/system/cpu/cpuX/cpufreq/perf_limited Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-7-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 56 ++++++++++++++++++++++++++++++++++ drivers/cpufreq/cppc_cpufreq.c | 5 +++ include/acpi/cppc_acpi.h | 15 +++++++++ 3 files changed, 76 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 94a7ffa8be3c..53a6ffd995a1 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1978,6 +1978,62 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } EXPORT_SYMBOL_GPL(cppc_set_perf); +/** + * cppc_get_perf_limited - Get the Performance Limited register value. + * @cpu: CPU from which to get Performance Limited register. + * @perf_limited: Pointer to store the Performance Limited value. + * + * The returned value contains sticky status bits indicating platform-imposed + * performance limitations. + * + * Return: 0 for success, -EIO on failure, -EOPNOTSUPP if not supported. + */ +int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return cppc_get_reg_val(cpu, PERF_LIMITED, perf_limited); +} +EXPORT_SYMBOL_GPL(cppc_get_perf_limited); + +/** + * cppc_set_perf_limited() - Clear bits in the Performance Limited register. + * @cpu: CPU on which to write register. + * @bits_to_clear: Bitmask of bits to clear in the perf_limited register. + * + * The Performance Limited register contains two sticky bits set by platform: + * - Bit 0 (Desired_Excursion): Set when delivered performance is constrained + * below desired performance. Not used when Autonomous Selection is enabled. + * - Bit 1 (Minimum_Excursion): Set when delivered performance is constrained + * below minimum performance. + * + * These bits are sticky and remain set until OSPM explicitly clears them. + * This function only allows clearing bits (the platform sets them). + * + * Return: 0 for success, -EINVAL for invalid bits, -EIO on register + * access failure, -EOPNOTSUPP if not supported. + */ +int cppc_set_perf_limited(int cpu, u64 bits_to_clear) +{ + u64 current_val, new_val; + int ret; + + /* Only bits 0 and 1 are valid */ + if (bits_to_clear & ~CPPC_PERF_LIMITED_MASK) + return -EINVAL; + + if (!bits_to_clear) + return 0; + + ret = cppc_get_perf_limited(cpu, ¤t_val); + if (ret) + return ret; + + /* Clear the specified bits */ + new_val = current_val & ~bits_to_clear; + + return cppc_set_reg_val(cpu, PERF_LIMITED, new_val); +} +EXPORT_SYMBOL_GPL(cppc_set_perf_limited); + /** * cppc_get_transition_latency - returns frequency transition latency in ns * @cpu_num: CPU number for per_cpu(). diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8a8cf76828ee..94d489a4c90d 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -985,16 +985,21 @@ store_energy_performance_preference_val(struct cpufreq_policy *policy, return count; } +CPPC_CPUFREQ_ATTR_RW_U64(perf_limited, cppc_get_perf_limited, + cppc_set_perf_limited) + cpufreq_freq_attr_ro(freqdomain_cpus); cpufreq_freq_attr_rw(auto_select); cpufreq_freq_attr_rw(auto_act_window); cpufreq_freq_attr_rw(energy_performance_preference_val); +cpufreq_freq_attr_rw(perf_limited); static struct freq_attr *cppc_cpufreq_attr[] = { &freqdomain_cpus, &auto_select, &auto_act_window, &energy_performance_preference_val, + &perf_limited, NULL, }; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 3fc796c0d902..f7afa20b8ad9 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -42,6 +42,11 @@ #define CPPC_EPP_PERFORMANCE_PREF 0x00 #define CPPC_EPP_ENERGY_EFFICIENCY_PREF 0xFF +#define CPPC_PERF_LIMITED_DESIRED_EXCURSION BIT(0) +#define CPPC_PERF_LIMITED_MINIMUM_EXCURSION BIT(1) +#define CPPC_PERF_LIMITED_MASK (CPPC_PERF_LIMITED_DESIRED_EXCURSION | \ + CPPC_PERF_LIMITED_MINIMUM_EXCURSION) + /* Each register has the folowing format. */ struct cpc_reg { u8 descriptor; @@ -174,6 +179,8 @@ extern int cppc_get_auto_act_window(int cpu, u64 *auto_act_window); extern int cppc_set_auto_act_window(int cpu, u64 auto_act_window); extern int cppc_get_auto_sel(int cpu, bool *enable); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int cppc_get_perf_limited(int cpu, u64 *perf_limited); +extern int cppc_set_perf_limited(int cpu, u64 bits_to_clear); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); extern int amd_detect_prefcore(bool *detected); @@ -270,6 +277,14 @@ static inline int cppc_set_auto_sel(int cpu, bool enable) { return -EOPNOTSUPP; } +static inline int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_perf_limited(int cpu, u64 bits_to_clear) +{ + return -EOPNOTSUPP; +} static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) { return -ENODEV; From 856250ba2e810e772dc95b3234ebf0d6393a51d9 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:58 +0530 Subject: [PATCH 07/20] cpufreq: CPPC: Add sysfs documentation for perf_limited Add ABI documentation for the Performance Limited Register sysfs interface in the cppc_cpufreq driver. Signed-off-by: Sumit Gupta Reviewed-by: Randy Dunlap Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-8-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- .../ABI/testing/sysfs-devices-system-cpu | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 3a05604c21bf..82d10d556cc8 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -327,6 +327,24 @@ Description: Energy performance preference This file is only present if the cppc-cpufreq driver is in use. +What: /sys/devices/system/cpu/cpuX/cpufreq/perf_limited +Date: February 2026 +Contact: linux-pm@vger.kernel.org +Description: Performance Limited + + Read to check if platform throttling (thermal/power/current + limits) caused delivered performance to fall below the + requested level. A non-zero value indicates throttling occurred. + + Write the bitmask of bits to clear: + + - 0x1 = clear bit 0 (desired performance excursion) + - 0x2 = clear bit 1 (minimum performance excursion) + - 0x3 = clear both bits + + The platform sets these bits; OSPM can only clear them. + + This file is only present if the cppc-cpufreq driver is in use. What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} Date: August 2008 From 8505bfb4e4eca28ef1b20d3369435ec2d6a125c6 Mon Sep 17 00:00:00 2001 From: Pengjie Zhang Date: Fri, 13 Feb 2026 18:09:35 +0800 Subject: [PATCH 08/20] ACPI: CPPC: Move reference performance to capabilities Currently, the `Reference Performance` register is read every time the CPU frequency is sampled in `cppc_get_perf_ctrs()`. This function is on the hot path of the cppc_cpufreq driver. Reference Performance indicates the performance level that corresponds to the Reference Counter incrementing and is not expected to change dynamically during runtime (unlike the Delivered and Reference counters). Reading this register in the hot path incurs unnecessary overhead, particularly on platforms where CPC registers are located in the PCC (Platform Communication Channel) subspace. This patch moves `reference_perf` from the dynamic feedback counters structure (`cppc_perf_fb_ctrs`) to the static capabilities structure (`cppc_perf_caps`). Signed-off-by: Pengjie Zhang [ rjw: Changelog adjustment ] Link: https://patch.msgid.link/20260213100935.19111-1-zhangpengjie2@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 55 +++++++++++++++------------------- drivers/cpufreq/cppc_cpufreq.c | 21 +++++++------ include/acpi/cppc_acpi.h | 2 +- 3 files changed, 37 insertions(+), 41 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 53a6ffd995a1..07bbf5b366a4 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -177,12 +177,12 @@ __ATTR(_name, 0444, show_##_name, NULL) show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, highest_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_perf); +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, reference_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_nonlinear_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, guaranteed_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_freq); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq); -show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); /* Check for valid access_width, otherwise, fallback to using bit_width */ @@ -1352,9 +1352,10 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *highest_reg, *lowest_reg, - *lowest_non_linear_reg, *nominal_reg, *guaranteed_reg, - *low_freq_reg = NULL, *nom_freq_reg = NULL; - u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0; + *lowest_non_linear_reg, *nominal_reg, *reference_reg, + *guaranteed_reg, *low_freq_reg = NULL, *nom_freq_reg = NULL; + u64 high, low, guaranteed, nom, ref, min_nonlinear, + low_f = 0, nom_f = 0; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0, regs_in_pcc = 0; @@ -1368,6 +1369,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF]; lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF]; nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; + reference_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ]; nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ]; guaranteed_reg = &cpc_desc->cpc_regs[GUARANTEED_PERF]; @@ -1375,6 +1377,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) || + (CPC_SUPPORTED(reference_reg) && CPC_IN_PCC(reference_reg)) || CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg) || CPC_IN_PCC(guaranteed_reg)) { if (pcc_ss_id < 0) { @@ -1400,6 +1403,17 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, nominal_reg, &nom); perf_caps->nominal_perf = nom; + /* + * If reference perf register is not supported then we should + * use the nominal perf value + */ + if (CPC_SUPPORTED(reference_reg)) { + cpc_read(cpunum, reference_reg, &ref); + perf_caps->reference_perf = ref; + } else { + perf_caps->reference_perf = nom; + } + if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { perf_caps->guaranteed_perf = 0; @@ -1411,7 +1425,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); perf_caps->lowest_nonlinear_perf = min_nonlinear; - if (!high || !low || !nom || !min_nonlinear) + if (!high || !low || !nom || !ref || !min_nonlinear) ret = -EFAULT; /* Read optional lowest and nominal frequencies if present */ @@ -1441,20 +1455,10 @@ EXPORT_SYMBOL_GPL(cppc_get_perf_caps); bool cppc_perf_ctrs_in_pcc_cpu(unsigned int cpu) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); - struct cpc_register_resource *ref_perf_reg; - - /* - * If reference perf register is not supported then we should use the - * nominal perf value - */ - ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; - if (!CPC_SUPPORTED(ref_perf_reg)) - ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; return CPC_IN_PCC(&cpc_desc->cpc_regs[DELIVERED_CTR]) || CPC_IN_PCC(&cpc_desc->cpc_regs[REFERENCE_CTR]) || - CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME]) || - CPC_IN_PCC(ref_perf_reg); + CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME]); } EXPORT_SYMBOL_GPL(cppc_perf_ctrs_in_pcc_cpu); @@ -1491,10 +1495,10 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *delivered_reg, *reference_reg, - *ref_perf_reg, *ctr_wrap_reg; + *ctr_wrap_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; - u64 delivered, reference, ref_perf, ctr_wrap_time; + u64 delivered, reference, ctr_wrap_time; int ret = 0, regs_in_pcc = 0; if (!cpc_desc) { @@ -1504,19 +1508,11 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR]; reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR]; - ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; ctr_wrap_reg = &cpc_desc->cpc_regs[CTR_WRAP_TIME]; - /* - * If reference perf register is not supported then we should - * use the nominal perf value - */ - if (!CPC_SUPPORTED(ref_perf_reg)) - ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; - /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(delivered_reg) || CPC_IN_PCC(reference_reg) || - CPC_IN_PCC(ctr_wrap_reg) || CPC_IN_PCC(ref_perf_reg)) { + CPC_IN_PCC(ctr_wrap_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; @@ -1533,8 +1529,6 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) cpc_read(cpunum, delivered_reg, &delivered); cpc_read(cpunum, reference_reg, &reference); - cpc_read(cpunum, ref_perf_reg, &ref_perf); - /* * Per spec, if ctr_wrap_time optional register is unsupported, then the * performance counters are assumed to never wrap during the lifetime of @@ -1544,14 +1538,13 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) if (CPC_SUPPORTED(ctr_wrap_reg)) cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); - if (!delivered || !reference || !ref_perf) { + if (!delivered || !reference) { ret = -EFAULT; goto out_err; } perf_fb_ctrs->delivered = delivered; perf_fb_ctrs->reference = reference; - perf_fb_ctrs->reference_perf = ref_perf; perf_fb_ctrs->wraparound_time = ctr_wrap_time; out_err: if (regs_in_pcc) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 94d489a4c90d..5dfb109cf1f4 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -50,7 +50,8 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(u64 reference_perf, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1); /** @@ -70,7 +71,7 @@ static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) struct cppc_perf_fb_ctrs fb_ctrs = {0}; struct cppc_cpudata *cpu_data; unsigned long local_freq_scale; - u64 perf; + u64 perf, ref_perf; cpu_data = cppc_fi->cpu_data; @@ -79,7 +80,9 @@ static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) return; } - perf = cppc_perf_from_fbctrs(&cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); + ref_perf = cpu_data->perf_caps.reference_perf; + perf = cppc_perf_from_fbctrs(ref_perf, + &cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); if (!perf) return; @@ -747,13 +750,11 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(u64 reference_perf, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; - u64 reference_perf; - - reference_perf = fb_ctrs_t0->reference_perf; delta_reference = get_delta(fb_ctrs_t1->reference, fb_ctrs_t0->reference); @@ -790,7 +791,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cppc_cpudata *cpu_data; - u64 delivered_perf; + u64 delivered_perf, reference_perf; int ret; if (!policy) @@ -807,7 +808,9 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) return 0; } - delivered_perf = cppc_perf_from_fbctrs(&fb_ctrs_t0, &fb_ctrs_t1); + reference_perf = cpu_data->perf_caps.reference_perf; + delivered_perf = cppc_perf_from_fbctrs(reference_perf, + &fb_ctrs_t0, &fb_ctrs_t1); if (!delivered_perf) goto out_invalid_counters; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f7afa20b8ad9..d8e405becdc3 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -121,6 +121,7 @@ struct cppc_perf_caps { u32 guaranteed_perf; u32 highest_perf; u32 nominal_perf; + u32 reference_perf; u32 lowest_perf; u32 lowest_nonlinear_perf; u32 lowest_freq; @@ -138,7 +139,6 @@ struct cppc_perf_ctrls { struct cppc_perf_fb_ctrs { u64 reference; u64 delivered; - u64 reference_perf; u64 wraparound_time; }; From 638a95168fd53a911201681cd5e55c7965b20733 Mon Sep 17 00:00:00 2001 From: Jingkai Tan Date: Thu, 5 Mar 2026 21:38:31 +0000 Subject: [PATCH 09/20] ACPI: processor: idle: Add missing bounds check in flatten_lpi_states() The inner loop in flatten_lpi_states() that combines composite LPI states can increment flat_state_cnt multiple times within the loop. The condition that guards this (checks bounds against ACPI_PROCESSOR _MAX_POWER) occurs at the top of the outer loop. flat_state_cnt might exceed ACPI_PROCESSOR_MAX_POWER if it is incremented multiple times within the inner loop between outer loop iterations. Add a bounds check after the increment inside the inner loop so that it breaks out when flat_state_cnt reaches ACPI_PROCESSOR_MAX_POWER. The existing check in the outer loop will then handle the warning. Signed-off-by: Jingkai Tan Reviewed-by: Sudeep Holla Link: https://patch.msgid.link/20260305213831.53985-1-contact@jingk.ai Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f6c72e3a2be1..d4753420ae0b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1068,6 +1068,8 @@ static unsigned int flatten_lpi_states(struct acpi_processor *pr, stash_composite_state(curr_level, flpi); flat_state_cnt++; flpi++; + if (flat_state_cnt >= ACPI_PROCESSOR_MAX_POWER) + break; } } } From be473f0591f183990a998edee02161b319047eaa Mon Sep 17 00:00:00 2001 From: Pengjie Zhang Date: Wed, 11 Mar 2026 15:13:34 +0800 Subject: [PATCH 10/20] ACPI: CPPC: Fix uninitialized ref variable in cppc_get_perf_caps() Commit 8505bfb4e4ec ("ACPI: CPPC: Move reference performance to capabilities") introduced a logical error when retrieving the reference performance. On platforms lacking the reference performance register, the fallback logic leaves the local 'ref' variable uninitialized (0). This causes the subsequent sanity check to incorrectly return -EFAULT, breaking amd_pstate initialization. Fix this by assigning 'ref = nom' in the fallback path. Fixes: 8505bfb4e4ec ("ACPI: CPPC: Move reference performance to capabilities") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/all/20260310003026.GA2639793@ax162/ Tested-by: Nathan Chancellor Signed-off-by: Pengjie Zhang [ rjw: Subject tweak ] Link: https://patch.msgid.link/20260311071334.1494960-1-zhangpengjie2@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 07bbf5b366a4..5ad922eb937a 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1407,12 +1407,11 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) * If reference perf register is not supported then we should * use the nominal perf value */ - if (CPC_SUPPORTED(reference_reg)) { + if (CPC_SUPPORTED(reference_reg)) cpc_read(cpunum, reference_reg, &ref); - perf_caps->reference_perf = ref; - } else { - perf_caps->reference_perf = nom; - } + else + ref = nom; + perf_caps->reference_perf = ref; if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { From db19103ea847ed139da59a2fb71773081c12cd40 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Wed, 11 Mar 2026 14:50:36 +0800 Subject: [PATCH 11/20] ACPI: processor: idle: Remove redundant cstate check in acpi_processor_power_init The function acpi_processor_cstate_first_run_checks() is responsible for updating max_cstate and performing initial hardware validation. Currently, this function is invoked within acpi_processor_power_init(). However, the initialization flow already ensures this is called during acpi_processor_register_idle_driver(). Therefore, the call in acpi_processor_power_init() is redundant and effectively performs no work, so remove it. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20260311065038.4151558-2-lihuisong@huawei.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d4753420ae0b..758dbe79cdaf 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1411,8 +1411,6 @@ void acpi_processor_power_init(struct acpi_processor *pr) if (disabled_by_idle_boot_param()) return; - acpi_processor_cstate_first_run_checks(); - if (!acpi_processor_get_power_info(pr)) pr->flags.power_setup_done = 1; From 1f23194c8b8208bf3a43beb6c97d4c843197b6f6 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Wed, 11 Mar 2026 14:50:37 +0800 Subject: [PATCH 12/20] ACPI: processor: idle: Move max_cstate update out of the loop The acpi_processor_cstate_first_run_checks() function, which updates max_cstate on certain platforms, only needs to be executed once. Move this call outside of the loop to avoid redundant executions. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20260311065038.4151558-3-lihuisong@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 758dbe79cdaf..625f80fe0b98 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1359,6 +1359,8 @@ void acpi_processor_register_idle_driver(void) int ret = -ENODEV; int cpu; + acpi_processor_cstate_first_run_checks(); + /* * ACPI idle driver is used by all possible CPUs. * Use the processor power info of one in them to set up idle states. @@ -1370,7 +1372,6 @@ void acpi_processor_register_idle_driver(void) if (!pr) continue; - acpi_processor_cstate_first_run_checks(); ret = acpi_processor_get_power_info(pr); if (!ret) { pr->flags.power_setup_done = 1; From 4d613fb1ea0516e1f69d3a4ebfbf2572d5da5368 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Wed, 11 Mar 2026 14:50:38 +0800 Subject: [PATCH 13/20] ACPI: processor: idle: Remove redundant static variable and rename cstate check function The function acpi_processor_cstate_first_run_checks() is currently called only once during initialization in acpi_processor_register_idle_driver(). Since its execution is already limited by the caller's lifecycle, the internal static 'first_run' variable is redundant and can be safely removed. Additionally, the current function name is no longer descriptive of its behavior, so rename the function to acpi_processor_update_max_cstate() to better reflect its actual purpose. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20260311065038.4151558-4-lihuisong@huawei.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 625f80fe0b98..45b5d17443cf 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -819,19 +819,13 @@ static void acpi_processor_setup_cstates(struct acpi_processor *pr) drv->state_count = count; } -static inline void acpi_processor_cstate_first_run_checks(void) +static inline void acpi_processor_update_max_cstate(void) { - static int first_run; - - if (first_run) - return; dmi_check_system(processor_power_dmi_table); max_cstate = acpi_processor_cstate_check(max_cstate); if (max_cstate < ACPI_C_STATES_MAX) pr_notice("processor limited to max C-state %d\n", max_cstate); - first_run++; - if (nocst) return; @@ -840,7 +834,7 @@ static inline void acpi_processor_cstate_first_run_checks(void) #else static inline int disabled_by_idle_boot_param(void) { return 0; } -static inline void acpi_processor_cstate_first_run_checks(void) { } +static inline void acpi_processor_update_max_cstate(void) { } static int acpi_processor_get_cstate_info(struct acpi_processor *pr) { return -ENODEV; @@ -1359,7 +1353,7 @@ void acpi_processor_register_idle_driver(void) int ret = -ENODEV; int cpu; - acpi_processor_cstate_first_run_checks(); + acpi_processor_update_max_cstate(); /* * ACPI idle driver is used by all possible CPUs. From 36cb728754ea4583f145ecacb6e4fb9a6d8e62d6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Mar 2026 09:01:06 +0100 Subject: [PATCH 14/20] ACPI: processor: idle: Replace strlcat() with better alternative strlcpy() and strlcat() are confusing APIs and the former one already gone from the kernel. In preparation to kill strlcat() replace it with the better alternative. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20260317080218.1814693-1-andriy.shevchenko@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 45b5d17443cf..479995a4c48a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1010,9 +1010,7 @@ static bool combine_lpi_states(struct acpi_lpi_state *local, result->arch_flags = parent->arch_flags; result->index = parent->index; - strscpy(result->desc, local->desc, ACPI_CX_DESC_LEN); - strlcat(result->desc, "+", ACPI_CX_DESC_LEN); - strlcat(result->desc, parent->desc, ACPI_CX_DESC_LEN); + scnprintf(result->desc, ACPI_CX_DESC_LEN, "%s+%s", local->desc, parent->desc); return true; } From 0cc24977224a6c7d470860265a4990109f0a32ee Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Wed, 18 Mar 2026 15:20:05 +0530 Subject: [PATCH 15/20] ACPI: CPPC: Check cpc_read() return values consistently Callers of cpc_read() ignore its return value, which can lead to using uninitialized or stale values when the read fails. Fix this by consistently checking cpc_read() return values in cppc_get_perf_caps(), cppc_get_perf_ctrs(), and cppc_get_perf(). Link: https://lore.kernel.org/lkml/48bdf87e-39f1-402f-a7dc-1a0e1e7a819d@nvidia.com/ Suggested-by: Rafael J. Wysocki Signed-off-by: Sumit Gupta Link: https://patch.msgid.link/20260318095005.2437960-1-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 99 +++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 5ad922eb937a..053fc6765a59 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1394,45 +1394,66 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) } } - cpc_read(cpunum, highest_reg, &high); + ret = cpc_read(cpunum, highest_reg, &high); + if (ret) + goto out_err; perf_caps->highest_perf = high; - cpc_read(cpunum, lowest_reg, &low); + ret = cpc_read(cpunum, lowest_reg, &low); + if (ret) + goto out_err; perf_caps->lowest_perf = low; - cpc_read(cpunum, nominal_reg, &nom); + ret = cpc_read(cpunum, nominal_reg, &nom); + if (ret) + goto out_err; perf_caps->nominal_perf = nom; /* * If reference perf register is not supported then we should * use the nominal perf value */ - if (CPC_SUPPORTED(reference_reg)) - cpc_read(cpunum, reference_reg, &ref); - else + if (CPC_SUPPORTED(reference_reg)) { + ret = cpc_read(cpunum, reference_reg, &ref); + if (ret) + goto out_err; + } else { ref = nom; + } perf_caps->reference_perf = ref; if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { perf_caps->guaranteed_perf = 0; } else { - cpc_read(cpunum, guaranteed_reg, &guaranteed); + ret = cpc_read(cpunum, guaranteed_reg, &guaranteed); + if (ret) + goto out_err; perf_caps->guaranteed_perf = guaranteed; } - cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); + ret = cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); + if (ret) + goto out_err; perf_caps->lowest_nonlinear_perf = min_nonlinear; - if (!high || !low || !nom || !ref || !min_nonlinear) + if (!high || !low || !nom || !ref || !min_nonlinear) { ret = -EFAULT; + goto out_err; + } /* Read optional lowest and nominal frequencies if present */ - if (CPC_SUPPORTED(low_freq_reg)) - cpc_read(cpunum, low_freq_reg, &low_f); + if (CPC_SUPPORTED(low_freq_reg)) { + ret = cpc_read(cpunum, low_freq_reg, &low_f); + if (ret) + goto out_err; + } - if (CPC_SUPPORTED(nom_freq_reg)) - cpc_read(cpunum, nom_freq_reg, &nom_f); + if (CPC_SUPPORTED(nom_freq_reg)) { + ret = cpc_read(cpunum, nom_freq_reg, &nom_f); + if (ret) + goto out_err; + } perf_caps->lowest_freq = low_f; perf_caps->nominal_freq = nom_f; @@ -1526,16 +1547,25 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) } } - cpc_read(cpunum, delivered_reg, &delivered); - cpc_read(cpunum, reference_reg, &reference); + ret = cpc_read(cpunum, delivered_reg, &delivered); + if (ret) + goto out_err; + + ret = cpc_read(cpunum, reference_reg, &reference); + if (ret) + goto out_err; + /* * Per spec, if ctr_wrap_time optional register is unsupported, then the * performance counters are assumed to never wrap during the lifetime of * platform */ ctr_wrap_time = (u64)(~((u64)0)); - if (CPC_SUPPORTED(ctr_wrap_reg)) - cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); + if (CPC_SUPPORTED(ctr_wrap_reg)) { + ret = cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); + if (ret) + goto out_err; + } if (!delivered || !reference) { ret = -EFAULT; @@ -1811,24 +1841,39 @@ int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } /* Read optional elements if present */ - if (CPC_SUPPORTED(max_perf_reg)) - cpc_read(cpu, max_perf_reg, &max); + if (CPC_SUPPORTED(max_perf_reg)) { + ret = cpc_read(cpu, max_perf_reg, &max); + if (ret) + goto out_err; + } perf_ctrls->max_perf = max; - if (CPC_SUPPORTED(min_perf_reg)) - cpc_read(cpu, min_perf_reg, &min); + if (CPC_SUPPORTED(min_perf_reg)) { + ret = cpc_read(cpu, min_perf_reg, &min); + if (ret) + goto out_err; + } perf_ctrls->min_perf = min; - if (CPC_SUPPORTED(desired_perf_reg)) - cpc_read(cpu, desired_perf_reg, &desired_perf); + if (CPC_SUPPORTED(desired_perf_reg)) { + ret = cpc_read(cpu, desired_perf_reg, &desired_perf); + if (ret) + goto out_err; + } perf_ctrls->desired_perf = desired_perf; - if (CPC_SUPPORTED(energy_perf_reg)) - cpc_read(cpu, energy_perf_reg, &energy_perf); + if (CPC_SUPPORTED(energy_perf_reg)) { + ret = cpc_read(cpu, energy_perf_reg, &energy_perf); + if (ret) + goto out_err; + } perf_ctrls->energy_perf = energy_perf; - if (CPC_SUPPORTED(auto_sel_reg)) - cpc_read(cpu, auto_sel_reg, &auto_sel); + if (CPC_SUPPORTED(auto_sel_reg)) { + ret = cpc_read(cpu, auto_sel_reg, &auto_sel); + if (ret) + goto out_err; + } perf_ctrls->auto_sel = (bool)auto_sel; out_err: From 00154eede77616d5c95206728ee36a1e5bbe52e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Mar 2026 21:09:42 +0200 Subject: [PATCH 16/20] ACPI: processor: Rearrange and clean up acpi_processor_errata_piix4() In acpi_processor_errata_piix4() it is not necessary to use three struct pci_dev pointers. One is sufficient, so use it everywhere and drop the other two. Additionally, define the auxiliary local variables value1 and value2 in the code block in which they are used. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2846888.mvXUDI8C0e@rafael.j.wysocki --- drivers/acpi/acpi_processor.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index b1652cab631a..2afe798d1586 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -48,11 +48,6 @@ acpi_handle acpi_get_processor_handle(int cpu) static int acpi_processor_errata_piix4(struct pci_dev *dev) { - u8 value1 = 0; - u8 value2 = 0; - struct pci_dev *ide_dev = NULL, *isa_dev = NULL; - - if (!dev) return -EINVAL; @@ -108,16 +103,16 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) * each IDE controller's DMA status to make sure we catch all * DMA activity. */ - ide_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB, PCI_ANY_ID, PCI_ANY_ID, NULL); - if (ide_dev) { - errata.piix4.bmisx = pci_resource_start(ide_dev, 4); + if (dev) { + errata.piix4.bmisx = pci_resource_start(dev, 4); if (errata.piix4.bmisx) - dev_dbg(&ide_dev->dev, + dev_dbg(&dev->dev, "Bus master activity detection (BM-IDE) erratum enabled\n"); - pci_dev_put(ide_dev); + pci_dev_put(dev); } /* @@ -129,18 +124,20 @@ static int acpi_processor_errata_piix4(struct pci_dev *dev) * disable C3 support if this is enabled, as some legacy * devices won't operate well if fast DMA is disabled. */ - isa_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, PCI_ANY_ID, PCI_ANY_ID, NULL); - if (isa_dev) { - pci_read_config_byte(isa_dev, 0x76, &value1); - pci_read_config_byte(isa_dev, 0x77, &value2); + if (dev) { + u8 value1 = 0, value2 = 0; + + pci_read_config_byte(dev, 0x76, &value1); + pci_read_config_byte(dev, 0x77, &value2); if ((value1 & 0x80) || (value2 & 0x80)) { errata.piix4.fdma = 1; - dev_dbg(&isa_dev->dev, + dev_dbg(&dev->dev, "Type-F DMA livelock erratum (C3 disabled)\n"); } - pci_dev_put(isa_dev); + pci_dev_put(dev); } break; From 02c68ed11ceed569ad2d029a4138aead8ff13229 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Fri, 3 Apr 2026 16:53:43 +0800 Subject: [PATCH 17/20] ACPI: processor: idle: Reset power_setup_done flag on initialization failure The 'power_setup_done' flag is a key indicator used across the ACPI processor driver to determine if cpuidle are properly configured and available for a given CPU. Currently, this flag is set during the early stages of initialization. However, if the subsequent registration of the cpuidle driver in acpi_processor_register_idle_driver() or the per-CPU device registration in acpi_processor_power_init() fails, this flag remains set. This may lead to some issues where other functions in ACPI idle driver use these flags. Fix this by explicitly resetting this flag to 0 in these error paths. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20260403085343.866440-1-lihuisong@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 479995a4c48a..6172462c7b4e 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1379,6 +1379,7 @@ void acpi_processor_register_idle_driver(void) ret = cpuidle_register_driver(&acpi_idle_driver); if (ret) { + pr->flags.power_setup_done = 0; pr_debug("register %s failed.\n", acpi_idle_driver.name); return; } @@ -1387,7 +1388,16 @@ void acpi_processor_register_idle_driver(void) void acpi_processor_unregister_idle_driver(void) { + struct acpi_processor *pr; + int cpu; + cpuidle_unregister_driver(&acpi_idle_driver); + for_each_possible_cpu(cpu) { + pr = per_cpu(processors, cpu); + if (!pr) + continue; + pr->flags.power_setup_done = 0; + } } void acpi_processor_power_init(struct acpi_processor *pr) @@ -1424,6 +1434,7 @@ void acpi_processor_power_init(struct acpi_processor *pr) */ if (cpuidle_register_device(dev)) { per_cpu(acpi_cpuidle_device, pr->id) = NULL; + pr->flags.power_setup_done = 0; kfree(dev); } } From 47e6a863a88034be102bde11197f2ca1bc18cbaf Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Fri, 3 Apr 2026 17:02:53 +0800 Subject: [PATCH 18/20] ACPI: processor: idle: Fix NULL pointer dereference in hotplug path A cpuidle_device might fail to register during boot, but the system can continue to run. In such cases, acpi_processor_hotplug() can trigger a NULL pointer dereference when accessing the per-cpu acpi_cpuidle_device. So add NULL pointer check for the per-cpu acpi_cpuidle_device in acpi_processor_hotplug. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20260403090253.998322-1-lihuisong@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 6172462c7b4e..c1cddb4a5887 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1267,16 +1267,15 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr) int acpi_processor_hotplug(struct acpi_processor *pr) { + struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id); int ret = 0; - struct cpuidle_device *dev; if (disabled_by_idle_boot_param()) return 0; - if (!pr->flags.power_setup_done) + if (!pr->flags.power_setup_done || !dev) return -ENODEV; - dev = per_cpu(acpi_cpuidle_device, pr->id); cpuidle_pause_and_lock(); cpuidle_disable_device(dev); ret = acpi_processor_get_power_info(pr); From a4c6c18e93a1d24df9ab95794cef471c89daefe4 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 7 Apr 2026 16:11:40 +0800 Subject: [PATCH 19/20] cpuidle: Extract and export no-lock variants of cpuidle_unregister_device() The cpuidle_unregister_device() function always acquires the internal cpuidle_lock (or pause/resume idle) during their execution. However, in some power notification scenarios (e.g., when old idle states may become unavailable), it is necessary to efficiently disable cpuidle first, then remove and re-create all cpuidle devices for all CPUs. To avoid frequent lock overhead and ensure atomicity across the entire batch operation, the caller needs to hold the cpuidle_lock once outside the loop. To address this, extract the core logic into the new function cpuidle_unregister_device_no_lock() and export it. Signed-off-by: Huisong Li [ rjw: Added missing "inline", subject and changelog tweaks ] Link: https://patch.msgid.link/20260407081141.2493581-2-lihuisong@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 28 ++++++++++++++++++---------- include/linux/cpuidle.h | 2 ++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c7876e9e024f..1a55542efead 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -714,6 +714,23 @@ int cpuidle_register_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_register_device); +void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev) +{ + if (!dev || dev->registered == 0) + return; + + lockdep_assert_held(&cpuidle_lock); + + cpuidle_disable_device(dev); + + cpuidle_remove_sysfs(dev); + + __cpuidle_unregister_device(dev); + + cpuidle_coupled_unregister_device(dev); +} +EXPORT_SYMBOL_GPL(cpuidle_unregister_device_no_lock); + /** * cpuidle_unregister_device - unregisters a CPU's idle PM feature * @dev: the cpu @@ -724,18 +741,9 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) return; cpuidle_pause_and_lock(); - - cpuidle_disable_device(dev); - - cpuidle_remove_sysfs(dev); - - __cpuidle_unregister_device(dev); - - cpuidle_coupled_unregister_device(dev); - + cpuidle_unregister_device_no_lock(dev); cpuidle_resume_and_unlock(); } - EXPORT_SYMBOL_GPL(cpuidle_unregister_device); /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4073690504a7..a2485348def3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -188,6 +188,7 @@ extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx, extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); extern int cpuidle_register_device(struct cpuidle_device *dev); extern void cpuidle_unregister_device(struct cpuidle_device *dev); +extern void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev); extern int cpuidle_register(struct cpuidle_driver *drv, const struct cpumask *const coupled_cpus); extern void cpuidle_unregister(struct cpuidle_driver *drv); @@ -226,6 +227,7 @@ static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } static inline int cpuidle_register_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { } +static inline void cpuidle_unregister_device_no_lock(struct cpuidle_device *dev) {} static inline int cpuidle_register(struct cpuidle_driver *drv, const struct cpumask *const coupled_cpus) {return -ENODEV; } From 07cba0de5598a427cc8a7161202ebf4e79e692bd Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 7 Apr 2026 16:11:41 +0800 Subject: [PATCH 20/20] ACPI: processor: idle: Reset cpuidle on C-state list changes When a power notification event occurs, existing ACPI idle states may become obsolete. The current implementation only performs a partial update, leaving critical cpuidle parameters, like target_residency_ns and exit_latency_ns, stale. Furthermore, per-CPU cpuidle_device data, including last_residency_ns, states_usage, and the disable flag, are not properly synchronized. Using these stale values leads to incorrect power management decisions. To ensure all parameters are correctly synchronized, modify the notification handling logic: 1. Unregister all cpuidle_device instances to ensure a clean slate. 2. Unregister and re-register the ACPI idle driver. This forces the framework to re-evaluate global state parameters and ensures the driver state matches the new hardware power profile. 3. Re-initialize power information and re-register cpuidle_device for all possible CPUs to restore functional idle management. This complete reset ensures that the cpuidle framework and the underlying ACPI states are perfectly synchronized after a power state change. Signed-off-by: Huisong Li [ rjw: Subject rewrite ] Link: https://patch.msgid.link/20260407081141.2493581-3-lihuisong@huawei.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 51 +++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c1cddb4a5887..ee5facccbe10 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1307,37 +1307,42 @@ int acpi_processor_power_state_has_changed(struct acpi_processor *pr) */ if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) { - /* Protect against cpu-hotplug */ cpus_read_lock(); + + /* Unregister cpuidle device of all CPUs */ cpuidle_pause_and_lock(); - - /* Disable all cpuidle devices */ - for_each_online_cpu(cpu) { - _pr = per_cpu(processors, cpu); - if (!_pr || !_pr->flags.power_setup_done) - continue; + for_each_possible_cpu(cpu) { dev = per_cpu(acpi_cpuidle_device, cpu); - cpuidle_disable_device(dev); - } - - /* Populate Updated C-state information */ - acpi_processor_get_power_info(pr); - acpi_processor_setup_cpuidle_states(pr); - - /* Enable all cpuidle devices */ - for_each_online_cpu(cpu) { _pr = per_cpu(processors, cpu); - if (!_pr || !_pr->flags.power_setup_done) + if (!_pr || !_pr->flags.power || !dev) continue; - acpi_processor_get_power_info(_pr); - if (_pr->flags.power) { - dev = per_cpu(acpi_cpuidle_device, cpu); - acpi_processor_setup_cpuidle_dev(_pr, dev); - cpuidle_enable_device(dev); - } + + cpuidle_unregister_device_no_lock(dev); + kfree(dev); + _pr->flags.power = 0; } cpuidle_resume_and_unlock(); + + /* + * Unregister ACPI idle driver, reinitialize ACPI idle states + * and register ACPI idle driver again. + */ + acpi_processor_unregister_idle_driver(); + acpi_processor_register_idle_driver(); + + /* + * Reinitialize power information of all CPUs and re-register + * all cpuidle devices. Now idle states is ok to use, can enable + * cpuidle of each CPU safely one by one. + */ + for_each_possible_cpu(cpu) { + _pr = per_cpu(processors, cpu); + if (!_pr) + continue; + acpi_processor_power_init(_pr); + } + cpus_read_unlock(); }