mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 00:51:51 -04:00
rv: Add sample hybrid monitor stall
Add a sample monitor to showcase hybrid/timed automata. The stall monitor identifies tasks stalled for longer than a threshold and reacts when that happens. Reviewed-by: Nam Cao <namcao@linutronix.de> Link: https://lore.kernel.org/r/20260330111010.153663-7-gmonaco@redhat.com Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
This commit is contained in:
@@ -16,3 +16,4 @@ Runtime verification (rv) tool
|
||||
rv-mon-wip
|
||||
rv-mon-wwnr
|
||||
rv-mon-sched
|
||||
rv-mon-stall
|
||||
|
||||
44
Documentation/tools/rv/rv-mon-stall.rst
Normal file
44
Documentation/tools/rv/rv-mon-stall.rst
Normal file
@@ -0,0 +1,44 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============
|
||||
rv-mon-stall
|
||||
============
|
||||
--------------------
|
||||
Stalled task monitor
|
||||
--------------------
|
||||
|
||||
:Manual section: 1
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
|
||||
**rv mon stall** [*OPTIONS*]
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
||||
The stalled task (**stall**) monitor is a sample per-task timed monitor that
|
||||
checks if tasks are scheduled within a defined threshold after they are ready.
|
||||
|
||||
See kernel documentation for further information about this monitor:
|
||||
<https://docs.kernel.org/trace/rv/monitor_stall.html>
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
|
||||
.. include:: common_ikm.rst
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
|
||||
**rv**\(1), **rv-mon**\(1)
|
||||
|
||||
Linux kernel *RV* documentation:
|
||||
<https://www.kernel.org/doc/html/latest/trace/rv/index.html>
|
||||
|
||||
AUTHOR
|
||||
======
|
||||
|
||||
Written by Gabriele Monaco <gmonaco@redhat.com>
|
||||
|
||||
.. include:: common_appendix.rst
|
||||
@@ -16,3 +16,4 @@ Runtime Verification
|
||||
monitor_wwnr.rst
|
||||
monitor_sched.rst
|
||||
monitor_rtapp.rst
|
||||
monitor_stall.rst
|
||||
|
||||
43
Documentation/trace/rv/monitor_stall.rst
Normal file
43
Documentation/trace/rv/monitor_stall.rst
Normal file
@@ -0,0 +1,43 @@
|
||||
Monitor stall
|
||||
=============
|
||||
|
||||
- Name: stall - stalled task monitor
|
||||
- Type: per-task hybrid automaton
|
||||
- Author: Gabriele Monaco <gmonaco@redhat.com>
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
The stalled task (stall) monitor is a sample per-task timed monitor that checks
|
||||
if tasks are scheduled within a defined threshold after they are ready::
|
||||
|
||||
|
|
||||
|
|
||||
v
|
||||
#==========================#
|
||||
+-----------------> H dequeued H
|
||||
| #==========================#
|
||||
| |
|
||||
sched_switch_wait | sched_wakeup;reset(clk)
|
||||
| v
|
||||
| +--------------------------+ <+
|
||||
| | enqueued | | sched_wakeup
|
||||
| | clk < threshold_jiffies | -+
|
||||
| +--------------------------+
|
||||
| | ^
|
||||
| sched_switch_in sched_switch_preempt;reset(clk)
|
||||
| v |
|
||||
| +--------------------------+
|
||||
+------------------ | running |
|
||||
+--------------------------+
|
||||
^ sched_switch_in |
|
||||
| sched_wakeup |
|
||||
+----------------------+
|
||||
|
||||
The threshold can be configured as a parameter by either booting with the
|
||||
``stall.threshold_jiffies=<new value>`` argument or writing a new value to
|
||||
``/sys/module/stall/parameters/threshold_jiffies``.
|
||||
|
||||
Specification
|
||||
-------------
|
||||
Graphviz Dot file in tools/verification/models/stall.dot
|
||||
@@ -78,6 +78,7 @@ source "kernel/trace/rv/monitors/pagefault/Kconfig"
|
||||
source "kernel/trace/rv/monitors/sleep/Kconfig"
|
||||
# Add new rtapp monitors here
|
||||
|
||||
source "kernel/trace/rv/monitors/stall/Kconfig"
|
||||
# Add new monitors here
|
||||
|
||||
config RV_REACTORS
|
||||
|
||||
@@ -17,6 +17,7 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
|
||||
obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
|
||||
obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
|
||||
obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
|
||||
obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o
|
||||
# Add new monitors here
|
||||
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
|
||||
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
|
||||
|
||||
13
kernel/trace/rv/monitors/stall/Kconfig
Normal file
13
kernel/trace/rv/monitors/stall/Kconfig
Normal file
@@ -0,0 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
config RV_MON_STALL
|
||||
depends on RV
|
||||
select HA_MON_EVENTS_ID
|
||||
bool "stall monitor"
|
||||
help
|
||||
Enable the stall sample monitor that illustrates the usage of hybrid
|
||||
automata monitors. It can be used to identify tasks stalled for
|
||||
longer than a threshold.
|
||||
|
||||
For further information, see:
|
||||
Documentation/trace/rv/monitor_stall.rst
|
||||
150
kernel/trace/rv/monitors/stall/stall.c
Normal file
150
kernel/trace/rv/monitors/stall/stall.c
Normal file
@@ -0,0 +1,150 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/tracepoint.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/rv.h>
|
||||
#include <rv/instrumentation.h>
|
||||
|
||||
#define MODULE_NAME "stall"
|
||||
|
||||
#include <trace/events/sched.h>
|
||||
#include <rv_trace.h>
|
||||
|
||||
#define RV_MON_TYPE RV_MON_PER_TASK
|
||||
#define HA_TIMER_TYPE HA_TIMER_WHEEL
|
||||
#include "stall.h"
|
||||
#include <rv/ha_monitor.h>
|
||||
|
||||
static u64 threshold_jiffies = 1000;
|
||||
module_param(threshold_jiffies, ullong, 0644);
|
||||
|
||||
static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns)
|
||||
{
|
||||
if (env == clk_stall)
|
||||
return ha_get_clk_jiffy(ha_mon, env);
|
||||
return ENV_INVALID_VALUE;
|
||||
}
|
||||
|
||||
static void ha_reset_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns)
|
||||
{
|
||||
if (env == clk_stall)
|
||||
ha_reset_clk_jiffy(ha_mon, env);
|
||||
}
|
||||
|
||||
static inline bool ha_verify_invariants(struct ha_monitor *ha_mon,
|
||||
enum states curr_state, enum events event,
|
||||
enum states next_state, u64 time_ns)
|
||||
{
|
||||
if (curr_state == enqueued_stall)
|
||||
return ha_check_invariant_jiffy(ha_mon, clk_stall, time_ns);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool ha_verify_guards(struct ha_monitor *ha_mon,
|
||||
enum states curr_state, enum events event,
|
||||
enum states next_state, u64 time_ns)
|
||||
{
|
||||
bool res = true;
|
||||
|
||||
if (curr_state == dequeued_stall && event == sched_wakeup_stall)
|
||||
ha_reset_env(ha_mon, clk_stall, time_ns);
|
||||
else if (curr_state == running_stall && event == sched_switch_preempt_stall)
|
||||
ha_reset_env(ha_mon, clk_stall, time_ns);
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline void ha_setup_invariants(struct ha_monitor *ha_mon,
|
||||
enum states curr_state, enum events event,
|
||||
enum states next_state, u64 time_ns)
|
||||
{
|
||||
if (next_state == curr_state)
|
||||
return;
|
||||
if (next_state == enqueued_stall)
|
||||
ha_start_timer_jiffy(ha_mon, clk_stall, threshold_jiffies, time_ns);
|
||||
else if (curr_state == enqueued_stall)
|
||||
ha_cancel_timer(ha_mon);
|
||||
}
|
||||
|
||||
static bool ha_verify_constraint(struct ha_monitor *ha_mon,
|
||||
enum states curr_state, enum events event,
|
||||
enum states next_state, u64 time_ns)
|
||||
{
|
||||
if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns))
|
||||
return false;
|
||||
|
||||
if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns))
|
||||
return false;
|
||||
|
||||
ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void handle_sched_switch(void *data, bool preempt,
|
||||
struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
unsigned int prev_state)
|
||||
{
|
||||
if (!preempt && prev_state != TASK_RUNNING)
|
||||
da_handle_start_event(prev, sched_switch_wait_stall);
|
||||
else
|
||||
da_handle_event(prev, sched_switch_preempt_stall);
|
||||
da_handle_event(next, sched_switch_in_stall);
|
||||
}
|
||||
|
||||
static void handle_sched_wakeup(void *data, struct task_struct *p)
|
||||
{
|
||||
da_handle_event(p, sched_wakeup_stall);
|
||||
}
|
||||
|
||||
static int enable_stall(void)
|
||||
{
|
||||
int retval;
|
||||
|
||||
retval = da_monitor_init();
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
rv_attach_trace_probe("stall", sched_switch, handle_sched_switch);
|
||||
rv_attach_trace_probe("stall", sched_wakeup, handle_sched_wakeup);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void disable_stall(void)
|
||||
{
|
||||
rv_this.enabled = 0;
|
||||
|
||||
rv_detach_trace_probe("stall", sched_switch, handle_sched_switch);
|
||||
rv_detach_trace_probe("stall", sched_wakeup, handle_sched_wakeup);
|
||||
|
||||
da_monitor_destroy();
|
||||
}
|
||||
|
||||
static struct rv_monitor rv_this = {
|
||||
.name = "stall",
|
||||
.description = "identify tasks stalled for longer than a threshold.",
|
||||
.enable = enable_stall,
|
||||
.disable = disable_stall,
|
||||
.reset = da_monitor_reset_all,
|
||||
.enabled = 0,
|
||||
};
|
||||
|
||||
static int __init register_stall(void)
|
||||
{
|
||||
return rv_register_monitor(&rv_this, NULL);
|
||||
}
|
||||
|
||||
static void __exit unregister_stall(void)
|
||||
{
|
||||
rv_unregister_monitor(&rv_this);
|
||||
}
|
||||
|
||||
module_init(register_stall);
|
||||
module_exit(unregister_stall);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
|
||||
MODULE_DESCRIPTION("stall: identify tasks stalled for longer than a threshold.");
|
||||
81
kernel/trace/rv/monitors/stall/stall.h
Normal file
81
kernel/trace/rv/monitors/stall/stall.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Automatically generated C representation of stall automaton
|
||||
* For further information about this format, see kernel documentation:
|
||||
* Documentation/trace/rv/deterministic_automata.rst
|
||||
*/
|
||||
|
||||
#define MONITOR_NAME stall
|
||||
|
||||
enum states_stall {
|
||||
dequeued_stall,
|
||||
enqueued_stall,
|
||||
running_stall,
|
||||
state_max_stall,
|
||||
};
|
||||
|
||||
#define INVALID_STATE state_max_stall
|
||||
|
||||
enum events_stall {
|
||||
sched_switch_in_stall,
|
||||
sched_switch_preempt_stall,
|
||||
sched_switch_wait_stall,
|
||||
sched_wakeup_stall,
|
||||
event_max_stall,
|
||||
};
|
||||
|
||||
enum envs_stall {
|
||||
clk_stall,
|
||||
env_max_stall,
|
||||
env_max_stored_stall = env_max_stall,
|
||||
};
|
||||
|
||||
_Static_assert(env_max_stored_stall <= MAX_HA_ENV_LEN, "Not enough slots");
|
||||
|
||||
struct automaton_stall {
|
||||
char *state_names[state_max_stall];
|
||||
char *event_names[event_max_stall];
|
||||
char *env_names[env_max_stall];
|
||||
unsigned char function[state_max_stall][event_max_stall];
|
||||
unsigned char initial_state;
|
||||
bool final_states[state_max_stall];
|
||||
};
|
||||
|
||||
static const struct automaton_stall automaton_stall = {
|
||||
.state_names = {
|
||||
"dequeued",
|
||||
"enqueued",
|
||||
"running",
|
||||
},
|
||||
.event_names = {
|
||||
"sched_switch_in",
|
||||
"sched_switch_preempt",
|
||||
"sched_switch_wait",
|
||||
"sched_wakeup",
|
||||
},
|
||||
.env_names = {
|
||||
"clk",
|
||||
},
|
||||
.function = {
|
||||
{
|
||||
INVALID_STATE,
|
||||
INVALID_STATE,
|
||||
INVALID_STATE,
|
||||
enqueued_stall,
|
||||
},
|
||||
{
|
||||
running_stall,
|
||||
INVALID_STATE,
|
||||
INVALID_STATE,
|
||||
enqueued_stall,
|
||||
},
|
||||
{
|
||||
running_stall,
|
||||
enqueued_stall,
|
||||
dequeued_stall,
|
||||
running_stall,
|
||||
},
|
||||
},
|
||||
.initial_state = dequeued_stall,
|
||||
.final_states = { 1, 0, 0 },
|
||||
};
|
||||
19
kernel/trace/rv/monitors/stall/stall_trace.h
Normal file
19
kernel/trace/rv/monitors/stall/stall_trace.h
Normal file
@@ -0,0 +1,19 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
/*
|
||||
* Snippet to be included in rv_trace.h
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_RV_MON_STALL
|
||||
DEFINE_EVENT(event_da_monitor_id, event_stall,
|
||||
TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
|
||||
TP_ARGS(id, state, event, next_state, final_state));
|
||||
|
||||
DEFINE_EVENT(error_da_monitor_id, error_stall,
|
||||
TP_PROTO(int id, char *state, char *event),
|
||||
TP_ARGS(id, state, event));
|
||||
|
||||
DEFINE_EVENT(error_env_da_monitor_id, error_env_stall,
|
||||
TP_PROTO(int id, char *state, char *event, char *env),
|
||||
TP_ARGS(id, state, event, env));
|
||||
#endif /* CONFIG_RV_MON_STALL */
|
||||
@@ -187,6 +187,7 @@ DECLARE_EVENT_CLASS(error_env_da_monitor_id,
|
||||
__get_str(env))
|
||||
);
|
||||
|
||||
#include <monitors/stall/stall_trace.h>
|
||||
// Add new monitors based on CONFIG_HA_MON_EVENTS_ID here
|
||||
|
||||
#endif
|
||||
|
||||
22
tools/verification/models/stall.dot
Normal file
22
tools/verification/models/stall.dot
Normal file
@@ -0,0 +1,22 @@
|
||||
digraph state_automaton {
|
||||
center = true;
|
||||
size = "7,11";
|
||||
{node [shape = circle] "enqueued"};
|
||||
{node [shape = plaintext, style=invis, label=""] "__init_dequeued"};
|
||||
{node [shape = doublecircle] "dequeued"};
|
||||
{node [shape = circle] "running"};
|
||||
"__init_dequeued" -> "dequeued";
|
||||
"enqueued" [label = "enqueued\nclk < threshold_jiffies"];
|
||||
"running" [label = "running"];
|
||||
"dequeued" [label = "dequeued", color = green3];
|
||||
"running" -> "running" [ label = "sched_switch_in\nsched_wakeup" ];
|
||||
"enqueued" -> "enqueued" [ label = "sched_wakeup" ];
|
||||
"enqueued" -> "running" [ label = "sched_switch_in" ];
|
||||
"running" -> "dequeued" [ label = "sched_switch_wait" ];
|
||||
"dequeued" -> "enqueued" [ label = "sched_wakeup;reset(clk)" ];
|
||||
"running" -> "enqueued" [ label = "sched_switch_preempt;reset(clk)" ];
|
||||
{ rank = min ;
|
||||
"__init_dequeued";
|
||||
"dequeued";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user