mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 10:01:39 -05:00
hung_task: add hung_task_sys_info sysctl to dump sys info on task-hung
When task-hung happens, developers may need different kinds of system information (call-stacks, memory info, locks, etc.) to help debugging. Add 'hung_task_sys_info' sysctl knob to take human readable string like "tasks,mem,timers,locks,ftrace,...", and when task-hung happens, all requested information will be dumped. (refer kernel/sys_info.c for more details). Meanwhile, the newly introduced sys_info() call is used to unify some existing info-dumping knobs. [feng.tang@linux.alibaba.com: maintain consistecy established behavior, per Lance and Petr] Link: https://lkml.kernel.org/r/aRncJo1mA5Zk77Hr@U-2FWC9VHC-2323.local Link: https://lkml.kernel.org/r/20251113111039.22701-3-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com> Suggested-by: Petr Mladek <pmladek@suse.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Reviewed-by: Lance Yang <lance.yang@linux.dev> Cc: Jonathan Corbet <corbet@lwn.net> Cc: "Paul E . McKenney" <paulmck@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
@@ -422,6 +422,11 @@ the system boot.
|
|||||||
|
|
||||||
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
|
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
|
||||||
|
|
||||||
|
hung_task_sys_info
|
||||||
|
==================
|
||||||
|
A comma separated list of extra system information to be dumped when
|
||||||
|
hung task is detected, for example, "tasks,mem,timers,locks,...".
|
||||||
|
Refer 'panic_sys_info' section below for more details.
|
||||||
|
|
||||||
hung_task_timeout_secs
|
hung_task_timeout_secs
|
||||||
======================
|
======================
|
||||||
|
|||||||
@@ -24,6 +24,7 @@
|
|||||||
#include <linux/sched/sysctl.h>
|
#include <linux/sched/sysctl.h>
|
||||||
#include <linux/hung_task.h>
|
#include <linux/hung_task.h>
|
||||||
#include <linux/rwsem.h>
|
#include <linux/rwsem.h>
|
||||||
|
#include <linux/sys_info.h>
|
||||||
|
|
||||||
#include <trace/events/sched.h>
|
#include <trace/events/sched.h>
|
||||||
|
|
||||||
@@ -59,12 +60,17 @@ static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
|
|||||||
static int __read_mostly sysctl_hung_task_warnings = 10;
|
static int __read_mostly sysctl_hung_task_warnings = 10;
|
||||||
|
|
||||||
static int __read_mostly did_panic;
|
static int __read_mostly did_panic;
|
||||||
static bool hung_task_show_lock;
|
|
||||||
static bool hung_task_call_panic;
|
static bool hung_task_call_panic;
|
||||||
static bool hung_task_show_all_bt;
|
|
||||||
|
|
||||||
static struct task_struct *watchdog_task;
|
static struct task_struct *watchdog_task;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A bitmask to control what kinds of system info to be printed when
|
||||||
|
* a hung task is detected, it could be task, memory, lock etc. Refer
|
||||||
|
* include/linux/sys_info.h for detailed bit definition.
|
||||||
|
*/
|
||||||
|
static unsigned long hung_task_si_mask;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
/*
|
/*
|
||||||
* Should we dump all CPUs backtraces in a hung task event?
|
* Should we dump all CPUs backtraces in a hung task event?
|
||||||
@@ -236,7 +242,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
|
|||||||
|
|
||||||
if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
|
if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) {
|
||||||
console_verbose();
|
console_verbose();
|
||||||
hung_task_show_lock = true;
|
|
||||||
hung_task_call_panic = true;
|
hung_task_call_panic = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -259,10 +264,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout,
|
|||||||
" disables this message.\n");
|
" disables this message.\n");
|
||||||
sched_show_task(t);
|
sched_show_task(t);
|
||||||
debug_show_blocker(t, timeout);
|
debug_show_blocker(t, timeout);
|
||||||
hung_task_show_lock = true;
|
|
||||||
|
|
||||||
if (sysctl_hung_task_all_cpu_backtrace)
|
|
||||||
hung_task_show_all_bt = true;
|
|
||||||
if (!sysctl_hung_task_warnings)
|
if (!sysctl_hung_task_warnings)
|
||||||
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
|
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
|
||||||
}
|
}
|
||||||
@@ -304,6 +306,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
|||||||
unsigned long last_break = jiffies;
|
unsigned long last_break = jiffies;
|
||||||
struct task_struct *g, *t;
|
struct task_struct *g, *t;
|
||||||
unsigned long prev_detect_count = sysctl_hung_task_detect_count;
|
unsigned long prev_detect_count = sysctl_hung_task_detect_count;
|
||||||
|
int need_warning = sysctl_hung_task_warnings;
|
||||||
|
unsigned long si_mask = hung_task_si_mask;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the system crashed already then all bets are off,
|
* If the system crashed already then all bets are off,
|
||||||
@@ -312,7 +316,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
|||||||
if (test_taint(TAINT_DIE) || did_panic)
|
if (test_taint(TAINT_DIE) || did_panic)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
hung_task_show_lock = false;
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for_each_process_thread(g, t) {
|
for_each_process_thread(g, t) {
|
||||||
|
|
||||||
@@ -328,14 +332,19 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
|
|||||||
}
|
}
|
||||||
unlock:
|
unlock:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
if (hung_task_show_lock)
|
|
||||||
debug_show_all_locks();
|
|
||||||
|
|
||||||
if (hung_task_show_all_bt) {
|
if (!(sysctl_hung_task_detect_count - prev_detect_count))
|
||||||
hung_task_show_all_bt = false;
|
return;
|
||||||
trigger_all_cpu_backtrace();
|
|
||||||
|
if (need_warning || hung_task_call_panic) {
|
||||||
|
si_mask |= SYS_INFO_LOCKS;
|
||||||
|
|
||||||
|
if (sysctl_hung_task_all_cpu_backtrace)
|
||||||
|
si_mask |= SYS_INFO_ALL_BT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sys_info(si_mask);
|
||||||
|
|
||||||
if (hung_task_call_panic)
|
if (hung_task_call_panic)
|
||||||
panic("hung_task: blocked tasks");
|
panic("hung_task: blocked tasks");
|
||||||
}
|
}
|
||||||
@@ -434,6 +443,13 @@ static const struct ctl_table hung_task_sysctls[] = {
|
|||||||
.mode = 0444,
|
.mode = 0444,
|
||||||
.proc_handler = proc_doulongvec_minmax,
|
.proc_handler = proc_doulongvec_minmax,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "hung_task_sys_info",
|
||||||
|
.data = &hung_task_si_mask,
|
||||||
|
.maxlen = sizeof(hung_task_si_mask),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = sysctl_sys_info_handler,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static void __init hung_task_sysctl_init(void)
|
static void __init hung_task_sysctl_init(void)
|
||||||
|
|||||||
Reference in New Issue
Block a user