tracing: Drain deferred trigger frees if kthread creation fails

Boot-time trigger registration can fail before the trigger-data cleanup
kthread exists. Deferring those frees until late init is fine, but the
post-boot fallback must still drain the deferred list if kthread
creation never succeeds.

Otherwise, boot-deferred nodes can accumulate on
trigger_data_free_list, later frees fall back to synchronously freeing
only the current object, and the older queued entries are leaked
forever.

To trigger this, add the following to the kernel command line:

  trace_event=sched_switch trace_trigger=sched_switch.traceon,sched_switch.traceon

The second traceon trigger will fail and be freed. This triggers a NULL
pointer dereference and crashes the kernel.

Keep the deferred boot-time behavior, but when kthread creation fails,
drain the whole queued list synchronously. Do the same in the late-init
drain path so queued entries are not stranded there either.

Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20260324221326.1395799-3-atwellwea@gmail.com
Fixes: 61d445af0a ("tracing: Add bulk garbage collection of freeing event_trigger_data")
Signed-off-by: Wesley Atwell <atwellwea@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Wesley Atwell
2026-03-24 16:13:26 -06:00
committed by Steven Rostedt (Google)
parent 1f98857322
commit 250ab25391

View File

@@ -22,6 +22,39 @@ static struct task_struct *trigger_kthread;
static struct llist_head trigger_data_free_list;
static DEFINE_MUTEX(trigger_data_kthread_mutex);
static int trigger_kthread_fn(void *ignore);
static void trigger_create_kthread_locked(void)
{
lockdep_assert_held(&trigger_data_kthread_mutex);
if (!trigger_kthread) {
struct task_struct *kthread;
kthread = kthread_create(trigger_kthread_fn, NULL,
"trigger_data_free");
if (!IS_ERR(kthread))
WRITE_ONCE(trigger_kthread, kthread);
}
}
static void trigger_data_free_queued_locked(void)
{
struct event_trigger_data *data, *tmp;
struct llist_node *llnodes;
lockdep_assert_held(&trigger_data_kthread_mutex);
llnodes = llist_del_all(&trigger_data_free_list);
if (!llnodes)
return;
tracepoint_synchronize_unregister();
llist_for_each_entry_safe(data, tmp, llnodes, llist)
kfree(data);
}
/* Bulk garbage collection of event_trigger_data elements */
static int trigger_kthread_fn(void *ignore)
{
@@ -56,30 +89,50 @@ void trigger_data_free(struct event_trigger_data *data)
if (data->cmd_ops->set_filter)
data->cmd_ops->set_filter(NULL, data, NULL);
if (unlikely(!trigger_kthread)) {
guard(mutex)(&trigger_data_kthread_mutex);
/* Check again after taking mutex */
if (!trigger_kthread) {
struct task_struct *kthread;
kthread = kthread_create(trigger_kthread_fn, NULL,
"trigger_data_free");
if (!IS_ERR(kthread))
WRITE_ONCE(trigger_kthread, kthread);
}
/*
* Boot-time trigger registration can fail before kthread creation
* works. Keep the deferred-free semantics during boot and let late
* init start the kthread to drain the list.
*/
if (system_state == SYSTEM_BOOTING && !trigger_kthread) {
llist_add(&data->llist, &trigger_data_free_list);
return;
}
if (!trigger_kthread) {
/* Do it the slow way */
tracepoint_synchronize_unregister();
kfree(data);
return;
if (unlikely(!trigger_kthread)) {
guard(mutex)(&trigger_data_kthread_mutex);
trigger_create_kthread_locked();
/* Check again after taking mutex */
if (!trigger_kthread) {
llist_add(&data->llist, &trigger_data_free_list);
/* Drain the queued frees synchronously if creation failed. */
trigger_data_free_queued_locked();
return;
}
}
llist_add(&data->llist, &trigger_data_free_list);
wake_up_process(trigger_kthread);
}
static int __init trigger_data_free_init(void)
{
guard(mutex)(&trigger_data_kthread_mutex);
if (llist_empty(&trigger_data_free_list))
return 0;
trigger_create_kthread_locked();
if (trigger_kthread)
wake_up_process(trigger_kthread);
else
trigger_data_free_queued_locked();
return 0;
}
late_initcall(trigger_data_free_init);
static inline void data_ops_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)