perf record --off-cpu: Dump the remaining PERF_SAMPLE_ in sample_type from BPF's stack trace map

Dump the remaining PERF_SAMPLE_ data, as if it is dumping a direct
sample.

Put the stack trace, tid, off-cpu time and cgroup id into the raw_data
section, just like a direct off-cpu sample coming from BPF's
bpf_perf_event_output().

This ensures that evsel__parse_sample() correctly parses both direct
samples and accumulated samples.

Suggested-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Howard Chu <howardchu95@gmail.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Gautam Menghani <gautam@linux.ibm.com>
Tested-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241108204137.2444151-10-howardchu95@gmail.com
Link: https://lore.kernel.org/r/20250501022809.449767-9-howardchu95@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Howard Chu
2025-04-30 19:28:06 -07:00
committed by Arnaldo Carvalho de Melo
parent 8ae7a5769b
commit 74069a0160

View File

@@ -37,6 +37,8 @@ union off_cpu_data {
u64 array[1024 / sizeof(u64)];
};
u64 off_cpu_raw[MAX_STACKS + 5];
static int off_cpu_config(struct evlist *evlist)
{
char off_cpu_event[64];
@@ -313,6 +315,7 @@ int off_cpu_write(struct perf_session *session)
{
int bytes = 0, size;
int fd, stack;
u32 raw_size;
u64 sample_type, val, sid = 0;
struct evsel *evsel;
struct perf_data_file *file = &session->data->file;
@@ -352,46 +355,54 @@ int off_cpu_write(struct perf_session *session)
while (!bpf_map_get_next_key(fd, &prev, &key)) {
int n = 1; /* start from perf_event_header */
int ip_pos = -1;
bpf_map_lookup_elem(fd, &key, &val);
/* zero-fill some of the fields, will be overwritten by raw_data when parsing */
if (sample_type & PERF_SAMPLE_IDENTIFIER)
data.array[n++] = sid;
if (sample_type & PERF_SAMPLE_IP) {
ip_pos = n;
if (sample_type & PERF_SAMPLE_IP)
data.array[n++] = 0; /* will be updated */
}
if (sample_type & PERF_SAMPLE_TID)
data.array[n++] = (u64)key.pid << 32 | key.tgid;
data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_TIME)
data.array[n++] = tstamp;
if (sample_type & PERF_SAMPLE_ID)
data.array[n++] = sid;
if (sample_type & PERF_SAMPLE_CPU)
data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_PERIOD)
data.array[n++] = val;
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int len = 0;
data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_RAW) {
/*
* [ size ][ data ]
* [ data ]
* [ data ]
* [ data ]
* [ data ][ empty]
*/
int len = 0, i = 0;
void *raw_data = (void *)data.array + n * sizeof(u64);
/* data.array[n] is callchain->nr (updated later) */
data.array[n + 1] = PERF_CONTEXT_USER;
data.array[n + 2] = 0;
off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid;
off_cpu_raw[i++] = val;
bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
while (data.array[n + 2 + len])
/* off_cpu_raw[i] is callchain->nr (updated later) */
off_cpu_raw[i + 1] = PERF_CONTEXT_USER;
off_cpu_raw[i + 2] = 0;
bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]);
while (off_cpu_raw[i + 2 + len])
len++;
/* update length of callchain */
data.array[n] = len + 1;
off_cpu_raw[i] = len + 1;
i += len + 2;
/* update sample ip with the first callchain entry */
if (ip_pos >= 0)
data.array[ip_pos] = data.array[n + 2];
off_cpu_raw[i++] = key.cgroup_id;
/* calculate sample callchain data array length */
n += len + 2;
raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */
memcpy(raw_data, &raw_size, sizeof(raw_size));
memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64));
n += i + 1;
}
if (sample_type & PERF_SAMPLE_CGROUP)
data.array[n++] = key.cgroup_id;