rv: Add sample hybrid monitor stall

Add a sample monitor to showcase hybrid/timed automata.
The stall monitor identifies tasks stalled for longer than a threshold
and reacts when that happens.

Reviewed-by: Nam Cao <namcao@linutronix.de>
Link: https://lore.kernel.org/r/20260330111010.153663-7-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
This commit is contained in:
Gabriele Monaco
2026-03-30 13:10:04 +02:00
parent 708340c271
commit 13578a0871
12 changed files with 377 additions and 0 deletions

View File

@@ -16,3 +16,4 @@ Runtime verification (rv) tool
rv-mon-wip
rv-mon-wwnr
rv-mon-sched
rv-mon-stall

View File

@@ -0,0 +1,44 @@
.. SPDX-License-Identifier: GPL-2.0
============
rv-mon-stall
============
--------------------
Stalled task monitor
--------------------
:Manual section: 1
SYNOPSIS
========
**rv mon stall** [*OPTIONS*]
DESCRIPTION
===========
The stalled task (**stall**) monitor is a sample per-task timed monitor that
checks if tasks are scheduled within a defined threshold after they are ready.
See kernel documentation for further information about this monitor:
<https://docs.kernel.org/trace/rv/monitor_stall.html>
OPTIONS
=======
.. include:: common_ikm.rst
SEE ALSO
========
**rv**\(1), **rv-mon**\(1)
Linux kernel *RV* documentation:
<https://www.kernel.org/doc/html/latest/trace/rv/index.html>
AUTHOR
======
Written by Gabriele Monaco <gmonaco@redhat.com>
.. include:: common_appendix.rst

View File

@@ -16,3 +16,4 @@ Runtime Verification
monitor_wwnr.rst
monitor_sched.rst
monitor_rtapp.rst
monitor_stall.rst

View File

@@ -0,0 +1,43 @@
Monitor stall
=============
- Name: stall - stalled task monitor
- Type: per-task hybrid automaton
- Author: Gabriele Monaco <gmonaco@redhat.com>
Description
-----------
The stalled task (stall) monitor is a sample per-task timed monitor that checks
if tasks are scheduled within a defined threshold after they are ready::
|
|
v
#==========================#
+-----------------> H dequeued H
| #==========================#
| |
sched_switch_wait | sched_wakeup;reset(clk)
| v
| +--------------------------+ <+
| | enqueued | | sched_wakeup
| | clk < threshold_jiffies | -+
| +--------------------------+
| | ^
| sched_switch_in sched_switch_preempt;reset(clk)
| v |
| +--------------------------+
+------------------ | running |
+--------------------------+
^ sched_switch_in |
| sched_wakeup |
+----------------------+
The threshold can be configured as a parameter by either booting with the
``stall.threshold_jiffies=<new value>`` argument or writing a new value to
``/sys/module/stall/parameters/threshold_jiffies``.
Specification
-------------
Graphviz Dot file in tools/verification/models/stall.dot

View File

@@ -78,6 +78,7 @@ source "kernel/trace/rv/monitors/pagefault/Kconfig"
source "kernel/trace/rv/monitors/sleep/Kconfig"
# Add new rtapp monitors here
source "kernel/trace/rv/monitors/stall/Kconfig"
# Add new monitors here
config RV_REACTORS

View File

@@ -17,6 +17,7 @@ obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
obj-$(CONFIG_RV_MON_STALL) += monitors/stall/stall.o
# Add new monitors here
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o

View File

@@ -0,0 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
#
config RV_MON_STALL
depends on RV
select HA_MON_EVENTS_ID
bool "stall monitor"
help
Enable the stall sample monitor that illustrates the usage of hybrid
automata monitors. It can be used to identify tasks stalled for
longer than a threshold.
For further information, see:
Documentation/trace/rv/monitor_stall.rst

View File

@@ -0,0 +1,150 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ftrace.h>
#include <linux/tracepoint.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rv.h>
#include <rv/instrumentation.h>
#define MODULE_NAME "stall"
#include <trace/events/sched.h>
#include <rv_trace.h>
#define RV_MON_TYPE RV_MON_PER_TASK
#define HA_TIMER_TYPE HA_TIMER_WHEEL
#include "stall.h"
#include <rv/ha_monitor.h>
static u64 threshold_jiffies = 1000;
module_param(threshold_jiffies, ullong, 0644);
static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns)
{
if (env == clk_stall)
return ha_get_clk_jiffy(ha_mon, env);
return ENV_INVALID_VALUE;
}
static void ha_reset_env(struct ha_monitor *ha_mon, enum envs_stall env, u64 time_ns)
{
if (env == clk_stall)
ha_reset_clk_jiffy(ha_mon, env);
}
static inline bool ha_verify_invariants(struct ha_monitor *ha_mon,
enum states curr_state, enum events event,
enum states next_state, u64 time_ns)
{
if (curr_state == enqueued_stall)
return ha_check_invariant_jiffy(ha_mon, clk_stall, time_ns);
return true;
}
static inline bool ha_verify_guards(struct ha_monitor *ha_mon,
enum states curr_state, enum events event,
enum states next_state, u64 time_ns)
{
bool res = true;
if (curr_state == dequeued_stall && event == sched_wakeup_stall)
ha_reset_env(ha_mon, clk_stall, time_ns);
else if (curr_state == running_stall && event == sched_switch_preempt_stall)
ha_reset_env(ha_mon, clk_stall, time_ns);
return res;
}
static inline void ha_setup_invariants(struct ha_monitor *ha_mon,
enum states curr_state, enum events event,
enum states next_state, u64 time_ns)
{
if (next_state == curr_state)
return;
if (next_state == enqueued_stall)
ha_start_timer_jiffy(ha_mon, clk_stall, threshold_jiffies, time_ns);
else if (curr_state == enqueued_stall)
ha_cancel_timer(ha_mon);
}
static bool ha_verify_constraint(struct ha_monitor *ha_mon,
enum states curr_state, enum events event,
enum states next_state, u64 time_ns)
{
if (!ha_verify_invariants(ha_mon, curr_state, event, next_state, time_ns))
return false;
if (!ha_verify_guards(ha_mon, curr_state, event, next_state, time_ns))
return false;
ha_setup_invariants(ha_mon, curr_state, event, next_state, time_ns);
return true;
}
static void handle_sched_switch(void *data, bool preempt,
struct task_struct *prev,
struct task_struct *next,
unsigned int prev_state)
{
if (!preempt && prev_state != TASK_RUNNING)
da_handle_start_event(prev, sched_switch_wait_stall);
else
da_handle_event(prev, sched_switch_preempt_stall);
da_handle_event(next, sched_switch_in_stall);
}
static void handle_sched_wakeup(void *data, struct task_struct *p)
{
da_handle_event(p, sched_wakeup_stall);
}
static int enable_stall(void)
{
int retval;
retval = da_monitor_init();
if (retval)
return retval;
rv_attach_trace_probe("stall", sched_switch, handle_sched_switch);
rv_attach_trace_probe("stall", sched_wakeup, handle_sched_wakeup);
return 0;
}
static void disable_stall(void)
{
rv_this.enabled = 0;
rv_detach_trace_probe("stall", sched_switch, handle_sched_switch);
rv_detach_trace_probe("stall", sched_wakeup, handle_sched_wakeup);
da_monitor_destroy();
}
static struct rv_monitor rv_this = {
.name = "stall",
.description = "identify tasks stalled for longer than a threshold.",
.enable = enable_stall,
.disable = disable_stall,
.reset = da_monitor_reset_all,
.enabled = 0,
};
static int __init register_stall(void)
{
return rv_register_monitor(&rv_this, NULL);
}
static void __exit unregister_stall(void)
{
rv_unregister_monitor(&rv_this);
}
module_init(register_stall);
module_exit(unregister_stall);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
MODULE_DESCRIPTION("stall: identify tasks stalled for longer than a threshold.");

View File

@@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Automatically generated C representation of stall automaton
* For further information about this format, see kernel documentation:
* Documentation/trace/rv/deterministic_automata.rst
*/
#define MONITOR_NAME stall
enum states_stall {
dequeued_stall,
enqueued_stall,
running_stall,
state_max_stall,
};
#define INVALID_STATE state_max_stall
enum events_stall {
sched_switch_in_stall,
sched_switch_preempt_stall,
sched_switch_wait_stall,
sched_wakeup_stall,
event_max_stall,
};
enum envs_stall {
clk_stall,
env_max_stall,
env_max_stored_stall = env_max_stall,
};
_Static_assert(env_max_stored_stall <= MAX_HA_ENV_LEN, "Not enough slots");
struct automaton_stall {
char *state_names[state_max_stall];
char *event_names[event_max_stall];
char *env_names[env_max_stall];
unsigned char function[state_max_stall][event_max_stall];
unsigned char initial_state;
bool final_states[state_max_stall];
};
static const struct automaton_stall automaton_stall = {
.state_names = {
"dequeued",
"enqueued",
"running",
},
.event_names = {
"sched_switch_in",
"sched_switch_preempt",
"sched_switch_wait",
"sched_wakeup",
},
.env_names = {
"clk",
},
.function = {
{
INVALID_STATE,
INVALID_STATE,
INVALID_STATE,
enqueued_stall,
},
{
running_stall,
INVALID_STATE,
INVALID_STATE,
enqueued_stall,
},
{
running_stall,
enqueued_stall,
dequeued_stall,
running_stall,
},
},
.initial_state = dequeued_stall,
.final_states = { 1, 0, 0 },
};

View File

@@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Snippet to be included in rv_trace.h
*/
#ifdef CONFIG_RV_MON_STALL
DEFINE_EVENT(event_da_monitor_id, event_stall,
TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
TP_ARGS(id, state, event, next_state, final_state));
DEFINE_EVENT(error_da_monitor_id, error_stall,
TP_PROTO(int id, char *state, char *event),
TP_ARGS(id, state, event));
DEFINE_EVENT(error_env_da_monitor_id, error_env_stall,
TP_PROTO(int id, char *state, char *event, char *env),
TP_ARGS(id, state, event, env));
#endif /* CONFIG_RV_MON_STALL */

View File

@@ -187,6 +187,7 @@ DECLARE_EVENT_CLASS(error_env_da_monitor_id,
__get_str(env))
);
#include <monitors/stall/stall_trace.h>
// Add new monitors based on CONFIG_HA_MON_EVENTS_ID here
#endif

View File

@@ -0,0 +1,22 @@
digraph state_automaton {
center = true;
size = "7,11";
{node [shape = circle] "enqueued"};
{node [shape = plaintext, style=invis, label=""] "__init_dequeued"};
{node [shape = doublecircle] "dequeued"};
{node [shape = circle] "running"};
"__init_dequeued" -> "dequeued";
"enqueued" [label = "enqueued\nclk < threshold_jiffies"];
"running" [label = "running"];
"dequeued" [label = "dequeued", color = green3];
"running" -> "running" [ label = "sched_switch_in\nsched_wakeup" ];
"enqueued" -> "enqueued" [ label = "sched_wakeup" ];
"enqueued" -> "running" [ label = "sched_switch_in" ];
"running" -> "dequeued" [ label = "sched_switch_wait" ];
"dequeued" -> "enqueued" [ label = "sched_wakeup;reset(clk)" ];
"running" -> "enqueued" [ label = "sched_switch_preempt;reset(clk)" ];
{ rank = min ;
"__init_dequeued";
"dequeued";
}
}