mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-04 20:53:07 -04:00
drm/i915/hwmon: Fix locking inversion in sysfs getter
In i915 hwmon sysfs getter path we now take a hwmon_lock, then acquire an
rpm wakeref. That results in lock inversion:
<4> [197.079335] ======================================================
<4> [197.085473] WARNING: possible circular locking dependency detected
<4> [197.091611] 6.8.0-rc7-Patchwork_129026v7-gc4dc92fb1152+ #1 Not tainted
<4> [197.098096] ------------------------------------------------------
<4> [197.104231] prometheus-node/839 is trying to acquire lock:
<4> [197.109680] ffffffff82764d80 (fs_reclaim){+.+.}-{0:0}, at: __kmalloc+0x9a/0x350
<4> [197.116939]
but task is already holding lock:
<4> [197.122730] ffff88811b772a40 (&hwmon->hwmon_lock){+.+.}-{3:3}, at: hwm_energy+0x4b/0x100 [i915]
<4> [197.131543]
which lock already depends on the new lock.
...
<4> [197.507922] Chain exists of:
fs_reclaim --> >->reset.mutex --> &hwmon->hwmon_lock
<4> [197.518528] Possible unsafe locking scenario:
<4> [197.524411] CPU0 CPU1
<4> [197.528916] ---- ----
<4> [197.533418] lock(&hwmon->hwmon_lock);
<4> [197.537237] lock(>->reset.mutex);
<4> [197.543376] lock(&hwmon->hwmon_lock);
<4> [197.549682] lock(fs_reclaim);
...
<4> [197.632548] Call Trace:
<4> [197.634990] <TASK>
<4> [197.637088] dump_stack_lvl+0x64/0xb0
<4> [197.640738] check_noncircular+0x15e/0x180
<4> [197.652968] check_prev_add+0xe9/0xce0
<4> [197.656705] __lock_acquire+0x179f/0x2300
<4> [197.660694] lock_acquire+0xd8/0x2d0
<4> [197.673009] fs_reclaim_acquire+0xa1/0xd0
<4> [197.680478] __kmalloc+0x9a/0x350
<4> [197.689063] acpi_ns_internalize_name.part.0+0x4a/0xb0
<4> [197.694170] acpi_ns_get_node_unlocked+0x60/0xf0
<4> [197.720608] acpi_ns_get_node+0x3b/0x60
<4> [197.724428] acpi_get_handle+0x57/0xb0
<4> [197.728164] acpi_has_method+0x20/0x50
<4> [197.731896] acpi_pci_set_power_state+0x43/0x120
<4> [197.736485] pci_power_up+0x24/0x1c0
<4> [197.740047] pci_pm_default_resume_early+0x9/0x30
<4> [197.744725] pci_pm_runtime_resume+0x2d/0x90
<4> [197.753911] __rpm_callback+0x3c/0x110
<4> [197.762586] rpm_callback+0x58/0x70
<4> [197.766064] rpm_resume+0x51e/0x730
<4> [197.769542] rpm_resume+0x267/0x730
<4> [197.773020] rpm_resume+0x267/0x730
<4> [197.776498] rpm_resume+0x267/0x730
<4> [197.779974] __pm_runtime_resume+0x49/0x90
<4> [197.784055] __intel_runtime_pm_get+0x19/0xa0 [i915]
<4> [197.789070] hwm_energy+0x55/0x100 [i915]
<4> [197.793183] hwm_read+0x9a/0x310 [i915]
<4> [197.797124] hwmon_attr_show+0x36/0x120
<4> [197.800946] dev_attr_show+0x15/0x60
<4> [197.804509] sysfs_kf_seq_show+0xb5/0x100
Acquire the wakeref before the lock and hold it as long as the lock is
also held. Follow that pattern across the whole source file where similar
lock inversion can happen.
v2: Keep hardware read under the lock so the whole operation of updating
energy from hardware is still atomic (Guenter),
- instead, acquire the rpm wakeref before the lock and hold it as long
as the lock is held,
- use the same aproach for other similar places across the i915_hwmon.c
source file (Rodrigo).
Fixes: 1b44019a93 ("drm/i915/guc: Disable PL1 power limit when loading GuC firmware")
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: <stable@vger.kernel.org> # v6.5+
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240311203500.518675-2-janusz.krzysztofik@linux.intel.com
This commit is contained in:
committed by
Andi Shyti
parent
79263e4b3f
commit
71b2187714
@@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
|
||||
struct intel_uncore *uncore = ddat->uncore;
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref) {
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref)
|
||||
intel_uncore_rmw(uncore, reg, clear, set);
|
||||
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy)
|
||||
else
|
||||
rgaddr = hwmon->rg.energy_status_all;
|
||||
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref) {
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
with_intel_runtime_pm(uncore->rpm, wakeref)
|
||||
reg_val = intel_uncore_read(uncore, rgaddr);
|
||||
|
||||
if (reg_val >= ei->reg_val_prev)
|
||||
ei->accum_energy += reg_val - ei->reg_val_prev;
|
||||
else
|
||||
ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
|
||||
ei->reg_val_prev = reg_val;
|
||||
if (reg_val >= ei->reg_val_prev)
|
||||
ei->accum_energy += reg_val - ei->reg_val_prev;
|
||||
else
|
||||
ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
|
||||
ei->reg_val_prev = reg_val;
|
||||
|
||||
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
|
||||
hwmon->scl_shift_energy);
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
|
||||
hwmon->scl_shift_energy);
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
@@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
|
||||
|
||||
/* Block waiting for GuC reset to complete when needed */
|
||||
for (;;) {
|
||||
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
|
||||
mutex_lock(&hwmon->hwmon_lock);
|
||||
|
||||
prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
|
||||
@@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
|
||||
}
|
||||
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
|
||||
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&ddat->waitq, &wait);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
|
||||
goto exit;
|
||||
|
||||
/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
|
||||
if (val == PL1_DISABLE) {
|
||||
@@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
|
||||
intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
|
||||
PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
|
||||
exit:
|
||||
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
|
||||
unlock:
|
||||
mutex_unlock(&hwmon->hwmon_lock);
|
||||
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user