mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-06-02 14:33:12 -04:00
nr_subbufs in the ring buffer metadata is always initialized to zero
because it is assigned from cpu_buffer->nr_pages before the page
initialization loop has run. While nr_subbufs is not currently read
by the kernel, it should reflect the actual buffer geometry in the
meta page for correctness.
Move the assignment after the page loop so that cpu_buffer->nr_pages
holds the final count.
Link: https://patch.msgid.link/20260512135420.99194-1-devnexen@gmail.com
Fixes: 34e5b958bd ("tracing: Introduce simple_ring_buffer")
Reviewed-by: Vincent Donnefort <vdonnefort@google.com>
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: David Carlier <devnexen@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
518 lines
14 KiB
C
518 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2025 - Google LLC
|
|
* Author: Vincent Donnefort <vdonnefort@google.com>
|
|
*/
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/simple_ring_buffer.h>
|
|
|
|
#include <asm/barrier.h>
|
|
#include <asm/local.h>
|
|
|
|
enum simple_rb_link_type {
|
|
SIMPLE_RB_LINK_NORMAL = 0,
|
|
SIMPLE_RB_LINK_HEAD = 1,
|
|
SIMPLE_RB_LINK_HEAD_MOVING
|
|
};
|
|
|
|
#define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING)
|
|
|
|
static void simple_bpage_set_head_link(struct simple_buffer_page *bpage)
|
|
{
|
|
unsigned long link = (unsigned long)bpage->link.next;
|
|
|
|
link &= SIMPLE_RB_LINK_MASK;
|
|
link |= SIMPLE_RB_LINK_HEAD;
|
|
|
|
/*
|
|
* Paired with simple_rb_find_head() to order access between the head
|
|
* link and overrun. It ensures we always report an up-to-date value
|
|
* after swapping the reader page.
|
|
*/
|
|
smp_store_release(&bpage->link.next, (struct list_head *)link);
|
|
}
|
|
|
|
static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage,
|
|
struct simple_buffer_page *dst,
|
|
enum simple_rb_link_type new_type)
|
|
{
|
|
unsigned long *link = (unsigned long *)(&bpage->link.next);
|
|
unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD;
|
|
unsigned long new = (unsigned long)(&dst->link) | new_type;
|
|
|
|
return try_cmpxchg(link, &old, new);
|
|
}
|
|
|
|
static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage)
|
|
{
|
|
unsigned long link = (unsigned long)bpage->link.next;
|
|
|
|
WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK));
|
|
}
|
|
|
|
static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link)
|
|
{
|
|
unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK;
|
|
|
|
return container_of((struct list_head *)ptr, struct simple_buffer_page, link);
|
|
}
|
|
|
|
static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage)
|
|
{
|
|
return simple_bpage_from_link(bpage->link.next);
|
|
}
|
|
|
|
static void simple_bpage_reset(struct simple_buffer_page *bpage)
|
|
{
|
|
bpage->write = 0;
|
|
bpage->entries = 0;
|
|
|
|
local_set(&bpage->page->commit, 0);
|
|
}
|
|
|
|
static void simple_bpage_init(struct simple_buffer_page *bpage, void *page)
|
|
{
|
|
INIT_LIST_HEAD(&bpage->link);
|
|
bpage->page = (struct buffer_data_page *)page;
|
|
|
|
simple_bpage_reset(bpage);
|
|
}
|
|
|
|
#define simple_rb_meta_inc(__meta, __inc) \
|
|
WRITE_ONCE((__meta), (__meta + __inc))
|
|
|
|
static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
return !!cpu_buffer->bpages;
|
|
}
|
|
|
|
static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
int retry = cpu_buffer->nr_pages * 2;
|
|
struct simple_buffer_page *head;
|
|
|
|
head = cpu_buffer->head_page;
|
|
|
|
while (retry--) {
|
|
unsigned long link;
|
|
|
|
spin:
|
|
/* See smp_store_release in simple_bpage_set_head_link() */
|
|
link = (unsigned long)smp_load_acquire(&head->link.prev->next);
|
|
|
|
switch (link & ~SIMPLE_RB_LINK_MASK) {
|
|
/* Found the head */
|
|
case SIMPLE_RB_LINK_HEAD:
|
|
cpu_buffer->head_page = head;
|
|
return 0;
|
|
/* The writer caught the head, we can spin, that won't be long */
|
|
case SIMPLE_RB_LINK_HEAD_MOVING:
|
|
goto spin;
|
|
}
|
|
|
|
head = simple_bpage_next_page(head);
|
|
}
|
|
|
|
return -EBUSY;
|
|
}
|
|
|
|
/**
|
|
* simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader
|
|
* @cpu_buffer: A simple_rb_per_cpu
|
|
*
|
|
* This function enables consuming reading. It ensures the current head page will not be overwritten
|
|
* and can be safely read.
|
|
*
|
|
* Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the
|
|
* head page.
|
|
*/
|
|
int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
struct simple_buffer_page *last, *head, *reader;
|
|
unsigned long overrun;
|
|
int retry = 8;
|
|
int ret;
|
|
|
|
if (!simple_rb_loaded(cpu_buffer))
|
|
return -ENODEV;
|
|
|
|
reader = cpu_buffer->reader_page;
|
|
|
|
do {
|
|
/* Run after the writer to find the head */
|
|
ret = simple_rb_find_head(cpu_buffer);
|
|
if (ret)
|
|
return ret;
|
|
|
|
head = cpu_buffer->head_page;
|
|
|
|
/* Connect the reader page around the header page */
|
|
reader->link.next = head->link.next;
|
|
reader->link.prev = head->link.prev;
|
|
|
|
/* The last page before the head */
|
|
last = simple_bpage_from_link(head->link.prev);
|
|
|
|
/* The reader page points to the new header page */
|
|
simple_bpage_set_head_link(reader);
|
|
|
|
overrun = cpu_buffer->meta->overrun;
|
|
} while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--);
|
|
|
|
if (!retry)
|
|
return -EINVAL;
|
|
|
|
cpu_buffer->head_page = simple_bpage_from_link(reader->link.next);
|
|
cpu_buffer->head_page->link.prev = &reader->link;
|
|
cpu_buffer->reader_page = head;
|
|
cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun;
|
|
cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id;
|
|
cpu_buffer->last_overrun = overrun;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page);
|
|
|
|
static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
struct simple_buffer_page *tail, *new_tail;
|
|
|
|
tail = cpu_buffer->tail_page;
|
|
new_tail = simple_bpage_next_page(tail);
|
|
|
|
if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) {
|
|
/*
|
|
* Oh no! we've caught the head. There is none anymore and
|
|
* swap_reader will spin until we set the new one. Overrun must
|
|
* be written first, to make sure we report the correct number
|
|
* of lost events.
|
|
*/
|
|
simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries);
|
|
simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1);
|
|
|
|
simple_bpage_set_head_link(new_tail);
|
|
simple_bpage_set_normal_link(tail);
|
|
}
|
|
|
|
simple_bpage_reset(new_tail);
|
|
cpu_buffer->tail_page = new_tail;
|
|
|
|
simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1);
|
|
|
|
return new_tail;
|
|
}
|
|
|
|
static unsigned long rb_event_size(unsigned long length)
|
|
{
|
|
struct ring_buffer_event *event;
|
|
|
|
return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
|
|
}
|
|
|
|
static struct ring_buffer_event *
|
|
rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta)
|
|
{
|
|
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
|
|
event->time_delta = delta & TS_MASK;
|
|
event->array[0] = delta >> TS_SHIFT;
|
|
|
|
return (struct ring_buffer_event *)((unsigned long)event + 8);
|
|
}
|
|
|
|
static struct ring_buffer_event *
|
|
simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp)
|
|
{
|
|
unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
|
|
struct simple_buffer_page *tail = cpu_buffer->tail_page;
|
|
struct ring_buffer_event *event;
|
|
u32 write, prev_write;
|
|
u64 time_delta;
|
|
|
|
time_delta = timestamp - cpu_buffer->write_stamp;
|
|
|
|
if (test_time_stamp(time_delta))
|
|
ts_ext_size = 8;
|
|
|
|
prev_write = tail->write;
|
|
write = prev_write + event_size + ts_ext_size;
|
|
|
|
if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE)))
|
|
tail = simple_rb_move_tail(cpu_buffer);
|
|
|
|
if (!tail->entries) {
|
|
tail->page->time_stamp = timestamp;
|
|
time_delta = 0;
|
|
ts_ext_size = 0;
|
|
write = event_size;
|
|
prev_write = 0;
|
|
}
|
|
|
|
tail->write = write;
|
|
tail->entries++;
|
|
|
|
cpu_buffer->write_stamp = timestamp;
|
|
|
|
event = (struct ring_buffer_event *)(tail->page->data + prev_write);
|
|
if (ts_ext_size) {
|
|
event = rb_event_add_ts_extend(event, time_delta);
|
|
time_delta = 0;
|
|
}
|
|
|
|
event->type_len = 0;
|
|
event->time_delta = time_delta;
|
|
event->array[0] = event_size - RB_EVNT_HDR_SIZE;
|
|
|
|
return event;
|
|
}
|
|
|
|
/**
|
|
* simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer
|
|
* @cpu_buffer: A simple_rb_per_cpu
|
|
* @length: Size of the entry in bytes
|
|
* @timestamp: Timestamp of the entry
|
|
*
|
|
* Returns the address of the entry where to write data or NULL
|
|
*/
|
|
void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
|
|
u64 timestamp)
|
|
{
|
|
struct ring_buffer_event *rb_event;
|
|
|
|
if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY)
|
|
return NULL;
|
|
|
|
rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp);
|
|
|
|
return &rb_event->array[1];
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve);
|
|
|
|
/**
|
|
* simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve()
|
|
* @cpu_buffer: The simple_rb_per_cpu where the entry has been reserved
|
|
*/
|
|
void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
local_set(&cpu_buffer->tail_page->page->commit,
|
|
cpu_buffer->tail_page->write);
|
|
simple_rb_meta_inc(cpu_buffer->meta->entries, 1);
|
|
|
|
/*
|
|
* Paired with simple_rb_enable_tracing() to ensure data is
|
|
* written to the ring-buffer before teardown.
|
|
*/
|
|
smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY);
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_commit);
|
|
|
|
static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
|
|
{
|
|
u32 prev_status;
|
|
|
|
if (enable)
|
|
return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY);
|
|
|
|
/* Wait for the buffer to be released */
|
|
do {
|
|
prev_status = cmpxchg_acquire(&cpu_buffer->status,
|
|
SIMPLE_RB_READY,
|
|
SIMPLE_RB_UNAVAILABLE);
|
|
} while (prev_status == SIMPLE_RB_WRITING);
|
|
|
|
return prev_status;
|
|
}
|
|
|
|
/**
|
|
* simple_ring_buffer_reset - Reset @cpu_buffer
|
|
* @cpu_buffer: A simple_rb_per_cpu
|
|
*
|
|
* This will not clear the content of the data, only reset counters and pointers
|
|
*
|
|
* Returns 0 on success or -ENODEV if @cpu_buffer was unloaded.
|
|
*/
|
|
int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
struct simple_buffer_page *bpage;
|
|
u32 prev_status;
|
|
int ret;
|
|
|
|
if (!simple_rb_loaded(cpu_buffer))
|
|
return -ENODEV;
|
|
|
|
prev_status = simple_rb_enable_tracing(cpu_buffer, false);
|
|
|
|
ret = simple_rb_find_head(cpu_buffer);
|
|
if (ret)
|
|
return ret;
|
|
|
|
bpage = cpu_buffer->tail_page = cpu_buffer->head_page;
|
|
do {
|
|
simple_bpage_reset(bpage);
|
|
bpage = simple_bpage_next_page(bpage);
|
|
} while (bpage != cpu_buffer->head_page);
|
|
|
|
simple_bpage_reset(cpu_buffer->reader_page);
|
|
|
|
cpu_buffer->last_overrun = 0;
|
|
cpu_buffer->write_stamp = 0;
|
|
|
|
cpu_buffer->meta->reader.read = 0;
|
|
cpu_buffer->meta->reader.lost_events = 0;
|
|
cpu_buffer->meta->entries = 0;
|
|
cpu_buffer->meta->overrun = 0;
|
|
cpu_buffer->meta->read = 0;
|
|
cpu_buffer->meta->pages_lost = 0;
|
|
cpu_buffer->meta->pages_touched = 0;
|
|
|
|
if (prev_status == SIMPLE_RB_READY)
|
|
simple_rb_enable_tracing(cpu_buffer, true);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_reset);
|
|
|
|
int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
|
|
struct simple_buffer_page *bpages,
|
|
const struct ring_buffer_desc *desc,
|
|
void *(*load_page)(unsigned long va),
|
|
void (*unload_page)(void *va))
|
|
{
|
|
struct simple_buffer_page *bpage = bpages;
|
|
int ret = 0;
|
|
void *page;
|
|
int i;
|
|
|
|
/* At least 1 reader page and two pages in the ring-buffer */
|
|
if (desc->nr_page_va < 3)
|
|
return -EINVAL;
|
|
|
|
memset(cpu_buffer, 0, sizeof(*cpu_buffer));
|
|
|
|
cpu_buffer->meta = load_page(desc->meta_va);
|
|
if (!cpu_buffer->meta)
|
|
return -EINVAL;
|
|
|
|
memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
|
|
cpu_buffer->meta->meta_page_size = PAGE_SIZE;
|
|
|
|
/* The reader page is not part of the ring initially */
|
|
page = load_page(desc->page_va[0]);
|
|
if (!page) {
|
|
unload_page(cpu_buffer->meta);
|
|
return -EINVAL;
|
|
}
|
|
|
|
simple_bpage_init(bpage, page);
|
|
bpage->id = 0;
|
|
|
|
cpu_buffer->nr_pages = 1;
|
|
|
|
cpu_buffer->reader_page = bpage;
|
|
cpu_buffer->tail_page = bpage + 1;
|
|
cpu_buffer->head_page = bpage + 1;
|
|
|
|
for (i = 1; i < desc->nr_page_va; i++) {
|
|
page = load_page(desc->page_va[i]);
|
|
if (!page) {
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
simple_bpage_init(++bpage, page);
|
|
|
|
bpage->link.next = &(bpage + 1)->link;
|
|
bpage->link.prev = &(bpage - 1)->link;
|
|
bpage->id = i;
|
|
|
|
cpu_buffer->nr_pages = i + 1;
|
|
}
|
|
|
|
if (ret) {
|
|
for (i--; i >= 0; i--)
|
|
unload_page((void *)desc->page_va[i]);
|
|
unload_page(cpu_buffer->meta);
|
|
|
|
return ret;
|
|
}
|
|
|
|
cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages;
|
|
/* Close the ring */
|
|
bpage->link.next = &cpu_buffer->tail_page->link;
|
|
cpu_buffer->tail_page->link.prev = &bpage->link;
|
|
|
|
/* The last init'ed page points to the head page */
|
|
simple_bpage_set_head_link(bpage);
|
|
|
|
cpu_buffer->bpages = bpages;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void *__load_page(unsigned long page)
|
|
{
|
|
return (void *)page;
|
|
}
|
|
|
|
static void __unload_page(void *page) { }
|
|
|
|
/**
|
|
* simple_ring_buffer_init - Init @cpu_buffer based on @desc
|
|
* @cpu_buffer: A simple_rb_per_cpu buffer to init, allocated by the caller.
|
|
* @bpages: Array of simple_buffer_pages, with as many elements as @desc->nr_page_va
|
|
* @desc: A ring_buffer_desc
|
|
*
|
|
* Returns 0 on success or -EINVAL if the content of @desc is invalid
|
|
*/
|
|
int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
|
|
const struct ring_buffer_desc *desc)
|
|
{
|
|
return simple_ring_buffer_init_mm(cpu_buffer, bpages, desc, __load_page, __unload_page);
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_init);
|
|
|
|
void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
|
|
void (*unload_page)(void *))
|
|
{
|
|
int p;
|
|
|
|
if (!simple_rb_loaded(cpu_buffer))
|
|
return;
|
|
|
|
simple_rb_enable_tracing(cpu_buffer, false);
|
|
|
|
unload_page(cpu_buffer->meta);
|
|
for (p = 0; p < cpu_buffer->nr_pages; p++)
|
|
unload_page(cpu_buffer->bpages[p].page);
|
|
|
|
cpu_buffer->bpages = NULL;
|
|
}
|
|
|
|
/**
|
|
* simple_ring_buffer_unload - Prepare @cpu_buffer for deletion
|
|
* @cpu_buffer: A simple_rb_per_cpu that will be deleted.
|
|
*/
|
|
void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer)
|
|
{
|
|
return simple_ring_buffer_unload_mm(cpu_buffer, __unload_page);
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_unload);
|
|
|
|
/**
|
|
* simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer
|
|
* @cpu_buffer: A simple_rb_per_cpu
|
|
* @enable: True to enable tracing, False to disable it
|
|
*
|
|
* Returns 0 on success or -ENODEV if @cpu_buffer was unloaded
|
|
*/
|
|
int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable)
|
|
{
|
|
if (!simple_rb_loaded(cpu_buffer))
|
|
return -ENODEV;
|
|
|
|
simple_rb_enable_tracing(cpu_buffer, enable);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing);
|