Files
linux/tools/perf/util/tool.c
Namhyung Kim 9b4525fd08 perf tools: Merge deferred user callchains
Save samples with deferred callchains in a separate list and deliver
them after merging the user callchains.  If users don't want to merge
they can set tool->merge_deferred_callchains to false to prevent the
behavior.

With previous result, now perf script will show the merged callchains.

  $ perf script
  ...
  pwd    2312   121.163435:     249113 cpu/cycles/P:
          ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms])
          ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms])
          ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms])
          ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms])
          ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms])
          ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms])
          ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])
              7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
              7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
              7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2)
  ...

The old output can be get using --no-merge-callchain option.
Also perf report can get the user callchain entry at the end.

  $ perf report --no-children --stdio -q -S __build_id_parse.isra.0
  # symbol: __build_id_parse.isra.0
       8.40%  pwd      [kernel.kallsyms]
              |
              ---__build_id_parse.isra.0
                 perf_event_mmap
                 mprotect_fixup
                 do_mprotect_pkey
                 __x64_sys_mprotect
                 do_syscall_64
                 entry_SYSCALL_64_after_hwframe
                 mprotect
                 _dl_sysdep_start
                 _dl_start_user

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-12-02 21:59:14 -08:00

500 lines
16 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include "data.h"
#include "debug.h"
#include "event.h"
#include "header.h"
#include "session.h"
#include "stat.h"
#include "tool.h"
#include "tsc.h"
#include <linux/compiler.h>
#include <sys/mman.h>
#include <stddef.h>
#include <unistd.h>
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(const struct perf_tool *tool __maybe_unused,
struct perf_session *session,
union perf_event *event, u64 file_offset,
const char *file_path)
{
void *src;
size_t decomp_size, src_size;
u64 decomp_last_rem = 0;
size_t mmap_len, decomp_len = perf_session__env(session)->comp_mmap_len;
struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
if (decomp_last) {
decomp_last_rem = decomp_last->size - decomp_last->head;
decomp_len += decomp_last_rem;
}
mmap_len = sizeof(struct decomp) + decomp_len;
decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (decomp == MAP_FAILED) {
pr_err("Couldn't allocate memory for decompression\n");
return -1;
}
decomp->file_pos = file_offset;
decomp->file_path = file_path;
decomp->mmap_len = mmap_len;
decomp->head = 0;
if (decomp_last_rem) {
memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
decomp->size = decomp_last_rem;
}
if (event->header.type == PERF_RECORD_COMPRESSED) {
src = (void *)event + sizeof(struct perf_record_compressed);
src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
} else if (event->header.type == PERF_RECORD_COMPRESSED2) {
src = (void *)event + sizeof(struct perf_record_compressed2);
src_size = event->pack2.data_size;
} else {
return -1;
}
decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
if (!decomp_size) {
munmap(decomp, mmap_len);
pr_err("Couldn't decompress data\n");
return -1;
}
decomp->size += decomp_size;
if (session->active_decomp->decomp == NULL)
session->active_decomp->decomp = decomp;
else
session->active_decomp->decomp_last->next = decomp;
session->active_decomp->decomp_last = decomp;
pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
return 0;
}
#endif
static int process_event_synth_tracing_data_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct evlist **pevlist __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct evlist **pevlist __maybe_unused)
{
if (dump_trace)
perf_event__fprintf_event_update(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
int process_event_sample_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct evsel *evsel __maybe_unused,
struct machine *machine __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static int process_event_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static int process_finished_round_stub(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct ordered_events *oe __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static int skipn(int fd, off_t n)
{
char buf[4096];
ssize_t ret;
while (n > 0) {
ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
if (ret <= 0)
return ret;
n -= ret;
}
return 0;
}
static s64 process_event_auxtrace_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event)
{
dump_printf(": unhandled!\n");
if (perf_data__is_pipe(session->data))
skipn(perf_data__fd(session->data), event->auxtrace.size);
return event->auxtrace.size;
}
static int process_event_op2_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static
int process_event_thread_map_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
if (dump_trace)
perf_event__fprintf_thread_map(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static
int process_event_cpu_map_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
if (dump_trace)
perf_event__fprintf_cpu_map(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static
int process_event_stat_config_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
if (dump_trace)
perf_event__fprintf_stat_config(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static int process_stat_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_stat(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static int process_stat_round_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_stat_round(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static int process_event_time_conv_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_time_conv(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
static int perf_session__process_compressed_event_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused,
u64 file_offset __maybe_unused,
const char *file_path __maybe_unused)
{
dump_printf(": unhandled!\n");
return 0;
}
static int perf_event__process_bpf_metadata_stub(const struct perf_tool *tool __maybe_unused,
struct perf_session *perf_session __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_bpf_metadata(event, stdout);
dump_printf(": unhandled!\n");
return 0;
}
void perf_tool__init(struct perf_tool *tool, bool ordered_events)
{
tool->ordered_events = ordered_events;
tool->ordering_requires_timestamps = false;
tool->namespace_events = false;
tool->cgroup_events = false;
tool->no_warn = false;
tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
tool->merge_deferred_callchains = true;
tool->sample = process_event_sample_stub;
tool->mmap = process_event_stub;
tool->mmap2 = process_event_stub;
tool->comm = process_event_stub;
tool->namespaces = process_event_stub;
tool->cgroup = process_event_stub;
tool->fork = process_event_stub;
tool->exit = process_event_stub;
tool->lost = perf_event__process_lost;
tool->lost_samples = perf_event__process_lost_samples;
tool->aux = perf_event__process_aux;
tool->itrace_start = perf_event__process_itrace_start;
tool->context_switch = perf_event__process_switch;
tool->ksymbol = perf_event__process_ksymbol;
tool->bpf = perf_event__process_bpf;
tool->text_poke = perf_event__process_text_poke;
tool->aux_output_hw_id = perf_event__process_aux_output_hw_id;
tool->read = process_event_sample_stub;
tool->throttle = process_event_stub;
tool->unthrottle = process_event_stub;
tool->callchain_deferred = process_event_sample_stub;
tool->attr = process_event_synth_attr_stub;
tool->event_update = process_event_synth_event_update_stub;
tool->tracing_data = process_event_synth_tracing_data_stub;
tool->build_id = process_event_op2_stub;
if (ordered_events)
tool->finished_round = perf_event__process_finished_round;
else
tool->finished_round = process_finished_round_stub;
tool->id_index = process_event_op2_stub;
tool->auxtrace_info = process_event_op2_stub;
tool->auxtrace = process_event_auxtrace_stub;
tool->auxtrace_error = process_event_op2_stub;
tool->thread_map = process_event_thread_map_stub;
tool->cpu_map = process_event_cpu_map_stub;
tool->stat_config = process_event_stat_config_stub;
tool->stat = process_stat_stub;
tool->stat_round = process_stat_round_stub;
tool->time_conv = process_event_time_conv_stub;
tool->feature = process_event_op2_stub;
#ifdef HAVE_ZSTD_SUPPORT
tool->compressed = perf_session__process_compressed_event;
#else
tool->compressed = perf_session__process_compressed_event_stub;
#endif
tool->finished_init = process_event_op2_stub;
tool->bpf_metadata = perf_event__process_bpf_metadata_stub;
}
bool perf_tool__compressed_is_stub(const struct perf_tool *tool)
{
return tool->compressed == perf_session__process_compressed_event_stub;
}
#define CREATE_DELEGATE_SAMPLE(name) \
static int delegate_ ## name(const struct perf_tool *tool, \
union perf_event *event, \
struct perf_sample *sample, \
struct evsel *evsel, \
struct machine *machine) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, event, sample, evsel, machine); \
}
CREATE_DELEGATE_SAMPLE(read);
CREATE_DELEGATE_SAMPLE(sample);
CREATE_DELEGATE_SAMPLE(callchain_deferred);
#define CREATE_DELEGATE_ATTR(name) \
static int delegate_ ## name(const struct perf_tool *tool, \
union perf_event *event, \
struct evlist **pevlist) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, event, pevlist); \
}
CREATE_DELEGATE_ATTR(attr);
CREATE_DELEGATE_ATTR(event_update);
#define CREATE_DELEGATE_OE(name) \
static int delegate_ ## name(const struct perf_tool *tool, \
union perf_event *event, \
struct ordered_events *oe) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, event, oe); \
}
CREATE_DELEGATE_OE(finished_round);
#define CREATE_DELEGATE_OP(name) \
static int delegate_ ## name(const struct perf_tool *tool, \
union perf_event *event, \
struct perf_sample *sample, \
struct machine *machine) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, event, sample, machine); \
}
CREATE_DELEGATE_OP(aux);
CREATE_DELEGATE_OP(aux_output_hw_id);
CREATE_DELEGATE_OP(bpf);
CREATE_DELEGATE_OP(cgroup);
CREATE_DELEGATE_OP(comm);
CREATE_DELEGATE_OP(context_switch);
CREATE_DELEGATE_OP(exit);
CREATE_DELEGATE_OP(fork);
CREATE_DELEGATE_OP(itrace_start);
CREATE_DELEGATE_OP(ksymbol);
CREATE_DELEGATE_OP(lost);
CREATE_DELEGATE_OP(lost_samples);
CREATE_DELEGATE_OP(mmap);
CREATE_DELEGATE_OP(mmap2);
CREATE_DELEGATE_OP(namespaces);
CREATE_DELEGATE_OP(text_poke);
CREATE_DELEGATE_OP(throttle);
CREATE_DELEGATE_OP(unthrottle);
#define CREATE_DELEGATE_OP2(name) \
static int delegate_ ## name(const struct perf_tool *tool, \
struct perf_session *session, \
union perf_event *event) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, session, event); \
}
CREATE_DELEGATE_OP2(auxtrace_error);
CREATE_DELEGATE_OP2(auxtrace_info);
CREATE_DELEGATE_OP2(bpf_metadata);
CREATE_DELEGATE_OP2(build_id);
CREATE_DELEGATE_OP2(cpu_map);
CREATE_DELEGATE_OP2(feature);
CREATE_DELEGATE_OP2(finished_init);
CREATE_DELEGATE_OP2(id_index);
CREATE_DELEGATE_OP2(stat);
CREATE_DELEGATE_OP2(stat_config);
CREATE_DELEGATE_OP2(stat_round);
CREATE_DELEGATE_OP2(thread_map);
CREATE_DELEGATE_OP2(time_conv);
CREATE_DELEGATE_OP2(tracing_data);
#define CREATE_DELEGATE_OP3(name) \
static s64 delegate_ ## name(const struct perf_tool *tool, \
struct perf_session *session, \
union perf_event *event) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, session, event); \
}
CREATE_DELEGATE_OP3(auxtrace);
#define CREATE_DELEGATE_OP4(name) \
static int delegate_ ## name(const struct perf_tool *tool, \
struct perf_session *session, \
union perf_event *event, \
u64 data, \
const char *str) \
{ \
struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \
struct perf_tool *delegate = del_tool->delegate; \
return delegate->name(delegate, session, event, data, str); \
}
CREATE_DELEGATE_OP4(compressed);
void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate)
{
tool->delegate = delegate;
tool->tool.ordered_events = delegate->ordered_events;
tool->tool.ordering_requires_timestamps = delegate->ordering_requires_timestamps;
tool->tool.namespace_events = delegate->namespace_events;
tool->tool.cgroup_events = delegate->cgroup_events;
tool->tool.no_warn = delegate->no_warn;
tool->tool.show_feat_hdr = delegate->show_feat_hdr;
tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains;
tool->tool.sample = delegate_sample;
tool->tool.read = delegate_read;
tool->tool.mmap = delegate_mmap;
tool->tool.mmap2 = delegate_mmap2;
tool->tool.comm = delegate_comm;
tool->tool.namespaces = delegate_namespaces;
tool->tool.cgroup = delegate_cgroup;
tool->tool.fork = delegate_fork;
tool->tool.exit = delegate_exit;
tool->tool.lost = delegate_lost;
tool->tool.lost_samples = delegate_lost_samples;
tool->tool.aux = delegate_aux;
tool->tool.itrace_start = delegate_itrace_start;
tool->tool.aux_output_hw_id = delegate_aux_output_hw_id;
tool->tool.context_switch = delegate_context_switch;
tool->tool.throttle = delegate_throttle;
tool->tool.unthrottle = delegate_unthrottle;
tool->tool.ksymbol = delegate_ksymbol;
tool->tool.bpf = delegate_bpf;
tool->tool.text_poke = delegate_text_poke;
tool->tool.callchain_deferred = delegate_callchain_deferred;
tool->tool.attr = delegate_attr;
tool->tool.event_update = delegate_event_update;
tool->tool.tracing_data = delegate_tracing_data;
tool->tool.finished_round = delegate_finished_round;
tool->tool.build_id = delegate_build_id;
tool->tool.id_index = delegate_id_index;
tool->tool.auxtrace_info = delegate_auxtrace_info;
tool->tool.auxtrace_error = delegate_auxtrace_error;
tool->tool.time_conv = delegate_time_conv;
tool->tool.thread_map = delegate_thread_map;
tool->tool.cpu_map = delegate_cpu_map;
tool->tool.stat_config = delegate_stat_config;
tool->tool.stat = delegate_stat;
tool->tool.stat_round = delegate_stat_round;
tool->tool.feature = delegate_feature;
tool->tool.finished_init = delegate_finished_init;
tool->tool.bpf_metadata = delegate_bpf_metadata;
tool->tool.compressed = delegate_compressed;
tool->tool.auxtrace = delegate_auxtrace;
}